# WINE Dataset - Gaussian Naive Bayes(NB)

# 1. Importing libraries

In [1]:
#Loading required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# 2. Importing dataset

In [2]:
wines = pd.read_csv('wines.csv')
wines.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,wine_type,quality_label
0,7.0,0.17,0.74,12.8,0.045,24.0,126.0,0.9942,3.26,0.38,12.2,8,white,high
1,7.7,0.64,0.21,2.2,0.077,32.0,133.0,0.9956,3.27,0.45,9.9,5,red,low
2,6.8,0.39,0.34,7.4,0.02,38.0,133.0,0.99212,3.18,0.44,12.0,7,white,medium
3,6.3,0.28,0.47,11.2,0.04,61.0,183.0,0.99592,3.12,0.51,9.5,6,white,medium
4,7.4,0.35,0.2,13.9,0.054,63.0,229.0,0.99888,3.11,0.5,8.9,6,white,medium


# 3. Setting independent and response variables

In [3]:
x=wines.iloc[:,:-2]
y=wines.iloc[:,13]

# 4. Train-test split

In [29]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3)

# 5. Naive Bayes Modelling

In [30]:
#Seleection of model
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()

In [31]:
#Fitting the training dataset into the model
model.fit(x_train,y_train)

GaussianNB()

In [32]:
#Predictions
predict = model.predict(x_test)

In [33]:
#Performance Evaluation
from sklearn.metrics import classification_report, confusion_matrix
report = classification_report(y_test,predict)
print(report)

              precision    recall  f1-score   support

        high       1.00      0.88      0.94        50
         low       0.96      0.98      0.97       725
      medium       0.98      0.98      0.98      1175

    accuracy                           0.97      1950
   macro avg       0.98      0.94      0.96      1950
weighted avg       0.97      0.97      0.97      1950



In [34]:
matrix = confusion_matrix(y_test,predict)
print(matrix)

[[  44    0    6]
 [   0  709   16]
 [   0   29 1146]]


# 6. Rebuild the model - NaiveBayes using GridSearchCV

In [35]:
#Selection of GridSearchCV
from sklearn.model_selection import GridSearchCV
paam_grid = {'C':[0.1,1,10,100,1000],
             'gamma':[1,0.1,0.01,0.001,0.0001]}

In [36]:
#Assigning the parameters
from sklearn.model_selection import StratifiedKFold 
params = {}
nb = GaussianNB()
gs = GridSearchCV(nb, cv=skf, param_grid=params, return_train_score=True)

In [37]:
#Fitting the training dataset
gs.fit(x_train,y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=10, random_state=None, shuffle=False),
             estimator=GaussianNB(), param_grid={}, return_train_score=True)

In [38]:
gs.cv_results_

{'mean_fit_time': array([0.01999936]),
 'std_fit_time': array([0.00407799]),
 'mean_score_time': array([0.00649774]),
 'std_score_time': array([0.00066753]),
 'params': [{}],
 'split0_test_score': array([0.97582418]),
 'split1_test_score': array([0.97802198]),
 'split2_test_score': array([0.98461538]),
 'split3_test_score': array([0.98461538]),
 'split4_test_score': array([0.97582418]),
 'split5_test_score': array([0.97142857]),
 'split6_test_score': array([0.98681319]),
 'split7_test_score': array([0.97577093]),
 'split8_test_score': array([0.98017621]),
 'split9_test_score': array([0.98237885]),
 'mean_test_score': array([0.97954688]),
 'std_test_score': array([0.00472019]),
 'rank_test_score': array([1]),
 'split0_train_score': array([0.98191593]),
 'split1_train_score': array([0.9811828]),
 'split2_train_score': array([0.98020528]),
 'split3_train_score': array([0.97922776]),
 'split4_train_score': array([0.9811828]),
 'split5_train_score': array([0.9799609]),
 'split6_train_score'

In [40]:
#Predictions
grid_predictions = gs.predict(x_test)

In [41]:
pd.crosstab(y_test,grid_predictions)

col_0,high,low,medium
quality_label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
high,44,0,6
low,0,709,16
medium,0,29,1146


In [42]:
#Performance evaluation
report = classification_report(y_test,grid_predictions)
print(report)

              precision    recall  f1-score   support

        high       1.00      0.88      0.94        50
         low       0.96      0.98      0.97       725
      medium       0.98      0.98      0.98      1175

    accuracy                           0.97      1950
   macro avg       0.98      0.94      0.96      1950
weighted avg       0.97      0.97      0.97      1950



ANALYSIS -
1.Accuracy - 97% after and before GridSearchCV using Naive Bayes