# Naive Bayes
## Hyper-parameter Tunning

In [1]:
import warnings
import pandas as pd
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split, GridSearchCV, RepeatedStratifiedKFold
warnings.filterwarnings('ignore')

df_train = pd.read_csv('Final_Train_dataset.csv')
X_train  = df_train.iloc[:,:-1]
y_train  = df_train.VirusDetected

In [2]:
# Specify different values for the tunning process
StratifiedKFold = RepeatedStratifiedKFold(n_splits     = 5, 
                                          n_repeats    = 3, 
                                          random_state = 42)

var_smoothing   = [int(x) for x in np.logspace(0,-9, num=100)] 
                                          
#Create parameter grid
bayes_grid      = {'var_smoothing':var_smoothing} # Portion of the largest variance of all features that is added to variances for calculation stability

#Create GaussianNB object
bayes_model = GaussianNB()

bayes_search    = GridSearchCV(estimator  = bayes_model,
                               param_grid = bayes_grid,
                               cv         = StratifiedKFold,
                               verbose    = 1, 
                               scoring    = 'accuracy')


In [3]:
from datetime import datetime

# This function returns the time 
def timer(start_time=None):
    if not start_time:
        start_time = datetime.now()
        return start_time
    elif start_time:
        thour, temp_sec = divmod((datetime.now() - start_time).total_seconds(), 3600)
        tmin, tsec = divmod(temp_sec, 60)
        print('\n Time taken: %i hours %i minutes and %s seconds.' % (thour, tmin, round(tsec, 2)))
        
start_time = timer(None) # timing starts from this point for "start_time" variable
bayes_search.fit(X_train,y_train)
timer(start_time) # timing ends here for "start_time" variable

Fitting 15 folds for each of 100 candidates, totalling 1500 fits

 Time taken: 0 hours 0 minutes and 3.75 seconds.


In [4]:
bayes_search.best_params_

{'var_smoothing': 0}

* Specify optimal model 

In [5]:
optimal_model = GaussianNB().fit(X_train, y_train)