In [None]:
import numpy as np 
import pandas as pd
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import CategoricalNB
from sklearn.naive_bayes import ComplementNB
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score

## Import Data

In [None]:
X=pd.read_csv("/kaggle/input/hit-prediction-processed-data/Hit Prediction/X.csv")
X_train=pd.read_csv("/kaggle/input/hit-prediction-processed-data/Hit Prediction/X_train.csv")
X_test=pd.read_csv("/kaggle/input/hit-prediction-processed-data/Hit Prediction/X_test.csv")
X_selected=pd.read_csv("/kaggle/input/hit-prediction-processed-data/Hit Prediction/X_selected.csv")
X_train_selected=pd.read_csv("/kaggle/input/hit-prediction-processed-data/Hit Prediction/X_train_selected.csv")
X_test_selected=pd.read_csv("/kaggle/input/hit-prediction-processed-data/Hit Prediction/X_test_selected.csv")
y=np.load("/kaggle/input/hit-prediction-processed-data/Hit Prediction/y.npy")
y_train=np.load("/kaggle/input/hit-prediction-processed-data/Hit Prediction/y_train.npy")
y_test=np.load("/kaggle/input/hit-prediction-processed-data/Hit Prediction/y_test.npy")
y_selected=np.load("/kaggle/input/hit-prediction-processed-data/Hit Prediction/y_selected.npy")

## Different Types of Naive Bayes Classifier which are being used

* **BernoulliNB**
* **GaussianNB**

## BernoulliNB

In [None]:
bernoulli_nb =  BernoulliNB()
bernoulli_nb.fit(X_train_selected, y_train)
bernoulli_nb.score(X_test_selected,y_test)

## Cross Validation Score for Base Model 

In [None]:
scores = cross_val_score(bernoulli_nb, X_selected, y_selected, cv=5)
scores

In [None]:
print("%0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

### Hyper Parameter Tuning Using Grid Search CV(Gaussian NB)

In [None]:
params = {'alpha': [0.01, 0.1, 0.5, 1.0, 10.0],
          'fit_prior': [True, False],
          'binarize': [None, 0.0, 8.5, 10.0]
         }

In [None]:
bernoulli_nb_grid = GridSearchCV(BernoulliNB(), param_grid=params, n_jobs=-1, cv=5, verbose=False)

In [None]:
bernoulli_nb_grid.fit(X_train_selected,y_train)

## Best Parameters

In [None]:
bernoulli_nb_grid.best_estimator_

In [None]:
bernoulli_nb_grid.best_params_

## Model Fit with best Parameters

In [None]:
clf=bernoulli_nb_grid.best_estimator_
clf.fit(X_train_selected,y_train)
clf.score(X_test_selected,y_test)

### Confusion Matrix

In [None]:
pred=clf.predict(X_test_selected)
print(confusion_matrix(y_test,pred))

### Classification Report

In [None]:
print(classification_report(y_test,pred))

### Cross Validation Score for Hyperparameter Tuned Model

In [None]:
scores = cross_val_score(clf, X_selected, y_selected, cv=5)
scores

In [None]:
print("%0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

**Hence there is no significant difference in the cross valiadtion score  before and after tuning the hyperparameters**.

## GausianNB

### Fitting Basic Model with default Parameters

In [None]:
gaussian_nb = GaussianNB()
gaussian_nb.fit(X_train_selected, y_train)
gaussian_nb.score(X_test_selected,y_test)

In [None]:
scores = cross_val_score(gaussian_nb, X_selected, y_selected, cv=5)
scores

In [None]:
print("%0.5f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

### Hyper Parameter Tuning Using Grid Search CV(Gaussian NB)

In [None]:
params = {
            'var_smoothing': [1e-9, 1e-6, 1e-12],
            'priors': [None, [0.1,]* 2,]
         }

In [None]:
gaussian_nb_grid = GridSearchCV(GaussianNB(), param_grid=params, n_jobs=-1, cv=5, verbose=False)
gaussian_nb_grid.fit(X_train_selected,y_train)

### Model Fit with best Parameters

In [None]:
gaussian_nb_grid.score(X_test_selected,y_test)

In [None]:
gaussian_nb_grid.best_params_

In [None]:
clf=gaussian_nb_grid.best_estimator_

### Confusion Matrix

In [None]:
pred=clf.predict(X_test_selected)
print(confusion_matrix(y_test,pred))

### Classification Report

In [None]:
print(classification_report(y_test,pred))

### Cross Validation Score

In [None]:
scores = cross_val_score(clf, X_selected, y_selected, cv=5)
scores

In [None]:
print("%0.5f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

**Hence also there is no significant difference between the scores before and after tuning the model**

## Conclusion
*The cross val score for Bernoulli Naive Bayer is 68% and Gaussian Naive Bayer is 71%. So we select the **Gaussian Naive Bayer** for classification*