

```
# This is formatted as code
```

# COMP551 Group101 Naive Bayes, Evaluation, and Experiment
 Eric Shen 260798146
 
 Modified for use with Hybrid Naive Bayes by Edwin


## Useful Imports

In [0]:
import numpy as np
import pandas as pd
import csv
from tqdm import tqdm
import math
import seaborn as sns
import matplotlib.pyplot as plt

## Gaussian Naive Bayes


In [0]:
import numpy as np

#============================================================================================================================================================================================================================================================================
#
# GAUSSIAN NAIVE-BAYES MODEL CLASS
#
#   @Author: Edwin Pan of Group 101 of Winter 2020 COMP551 at McGill University
#
#============================================================================================================================================================================================================================================================================

class GaussianNaiveBayesModel:

  #Constructor and Instance Variables
  #@takes no_of_features, the amount of features that will be taken in
  #@takes no_of_classes, which is how many outputs are possible and being read in in integers (ie 2 indicates boolean, 3 indicates ternary, etc.)
  def __init__(self,no_of_features,no_of_classes):
    if(no_of_classes==1):
      print("Will not create model with one possible output.")
      return None
    self.class_prior = np.zeros( no_of_classes )
    self.mean = np.zeros( (no_of_features, no_of_classes) )
    self.variance = np.zeros( (no_of_features, no_of_classes) )
    return

  #helper binarizeGBClasses function
  #Takes a one-dimensional matrix and replaces elements of "g" with 1 and elements of "b" with 0.
  def __binarizeGBClasses__(self,matrix):
    for i in range(len(matrix)):
      if matrix[i]=="g":
        matrix[i] = 1
      elif matrix[i]=="b":
        matrix[i] = 0
    return matrix

  #helper normal function
  #Gives the probability of x occuring in normal function given the mean and variance
  def __normal__(self,x,m,v):
    if(v==0):
      return 0
    pi = 3.1415926535898535
    base = 1
    power = 1
    numerator = 1
    denumerator = (2*pi)**0.5*v
    e_numerator = 0-(x-m)**2
    e_denumerator = v**2*2
    base = numerator/denumerator
    power = e_numerator/e_denumerator
    return base*np.exp(power)

  #Fit function
  #@Takes DataFrame of training data, Double of learningRate, and Integer of gradientDescentIteration
  #@Returns nothing
  #Fits the model
  def fit(self, trainingDataFeatures, trainingDataClasses):
    trainingDataClasses = self.__binarizeGBClasses__(trainingDataClasses)
    lists_by_class = []
    #Calculate Class Prior first
    for c in range( len(self.class_prior) ):
      lists_by_class.append( [] )
      for i in range( trainingDataClasses.size ):
        if( trainingDataClasses[i] == c ):
          self.class_prior[c] = self.class_prior[c] + 1
          lists_by_class[c].append(i)
      self.class_prior[c] /= len(trainingDataClasses)
    #Now calculate the means and variances for all feature and class pairs
    for c in range( len(self.class_prior) ):
      #We first first calculate the Mean for each feature's classes:
      #Save the sum of the features for each class in each mean[f,c].
      for i in lists_by_class[c]:
        for f in range( len(self.mean) ):
          self.mean[f][c] += trainingDataFeatures[i][f]*1
      #Now divide each sum of features by the amount of features they were to obtain the mean.
      for f in range( len(self.mean) ):
        self.mean[f][c] /= ( len(lists_by_class[c]) )
      #Now that we know the mean for this class of value, we can now calculate the variances:
      #Save the sum the squared errors per instance of feature per class in variance[f][c].
      for i in lists_by_class[c]:
        for f in range( len(self.variance) ):
          self.variance[f][c] += (trainingDataFeatures[i][f]*1 - self.mean[f][c])**2
      #Now that we have the sum, we now take the average error squared and then squareroot that average
      for f in range( len(self.variance) ):
        self.variance[f][c] = ( self.variance[f][c]/(len(lists_by_class[c])) )**0.5
    return #Thus we now have our class priors as well as the means and variances values for calculating probabilities.

  #Predict function
  #@Takes in an input Series of input datapoint
  #@Returns the classification of the input datapoint classifications
  def predict(self, x):
    #Get an array in which we will put the probabilities of each class into
    output_probabilities = np.zeros( len(self.class_prior) )
    #Calculate the probability of each classification type
    for c in range( len(output_probabilities) ):
      prior_class_probability = 0
      conditional_probability = 1
      marginal_probability = 0
      #Prior
      prior_class_probability = self.class_prior[c]
      #Conditional of X given the c currently observed
      conditional_probability = 1
      for f in range( len(self.mean) ):
        if( self.variance[f][c] == 0):
          continue
        conditional_probability *= self.__normal__(x[f], self.mean[f][c], self.variance[f][c])
      #Marginal of X over all classes
      for k in range( len(output_probabilities) ):
        marginal_partial = 1;
        for f in range( len(self.mean) ):
          if( self.variance[f][k] == 0):
            continue
          marginal_partial *= self.__normal__(x[f], self.mean[f][k], self.variance[f][k])
        marginal_probability += marginal_partial
      #Calculate the output probability of this class c
      output_probabilities[c] = prior_class_probability*conditional_probability/marginal_probability
    greater_class = 0;
    for c in range(1,len(output_probabilities)):
      if( output_probabilities[greater_class] < output_probabilities[c] ):
        greater_class = c
    return greater_class

  #evaluate_acc method.
  #@Takes in Training Data (NxD Matrix) and Classifications (Nx1 Matrix);
  #@Returns the accuracy of the current weights
  def evaluate_acc(self, X, Y):
    everythingRight = 0
    everything = 0
    for i in range( len(X) ):  #For each instance
      if( self.predict( X[i] ) == Y[i] ):  #Use the model, predict the output with obtained features, and tally the result.
        everythingRight += 1
      everything += 1                 #don't forget the total count.
    return everythingRight/everything

## Normaliztion from Edwin's code


In [0]:
import numpy as np

#==============================================================================================================
#
#   Vector Normalizer
#
#     Takes an input vector of numbers and normalizes its values between 0 and 1.
#
#==============================================================================================================
def normalize_vector(vector):
  #Obtain Normalization Values
  min_value = vector[0]
  max_value = vector[0]
  for i in range(len(vector)):
    if vector[i] < min_value:
      min_value = vector[i]
    elif vector[i] > max_value:
      max_value = vector[i]
  #Normalize all vector elements
  for i in range(len(vector)):
    vector[i] = ( vector[i] - min_value )/(max_value-min_value)
  return vector

## Processes for four datasets

### train_test_split

In [0]:
def train_test_split(mydataset: np.ndarray, k: int, Normalize: bool):

    #Normalize all feature input columns
    if(Normalize):
      for column in range( len(mydataset[0]) - 1 ):
        vector = []
        for instance in range(len(mydataset)):
          vector.append( mydataset[instance][column] )
        vector = normalize_vector(vector)
        for instance in range(len(mydataset)):
          mydataset[instance][column] = vector[instance]

    np.random.shuffle(mydataset)
    rows = mydataset.shape[0]
    mydataset_train = mydataset[: (int)(k * rows/10), :]
    mydataset_test = mydataset[(int)(k * rows/10):, :]
    
    mydataset_train_x = mydataset_train[:, :-1]
    mydataset_train_y = mydataset_train[:, -1]
    mydataset_test_x = mydataset_test[:, :-1]
    mydataset_test_y = mydataset_test[:, -1]

    return mydataset_train_x, mydataset_train_y, mydataset_test_x, mydataset_test_y


### Process Ionosphere

In [0]:
def process_ionosphere():

    ionosphere = pd.read_csv("ionosphere.csv", header=None)
    ionosphere          = ionosphere.drop([1],axis=1).to_numpy()

    classIndex = len(ionosphere[0]) -1
    for i in range(len(ionosphere)): 
      if( ionosphere[i][classIndex] == "g" ): 
          ionosphere[i][classIndex] = 1
      else:
          ionosphere[i][classIndex] = 0


    ionosphere = np.array(ionosphere[0:])

    return train_test_split(ionosphere, 9, True)

### One Hot Encoding for adult data

In [0]:
import pandas as pd
from sklearn.feature_extraction import DictVectorizer
from sklearn import preprocessing
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder


def clean(data):
    data = data.dropna(axis='index')
    return data.reset_index(drop=True)


def preprocess(data):
    le = preprocessing.LabelEncoder()
    le.fit(data['workclass'])
    data['workclass'] = le.transform(data['workclass'])
    le.fit(data['education'])
    data['education'] = le.transform(data['education'])
    le.fit(data['marital-status'])
    data['marital-status'] = le.transform(data['marital-status'])
    le.fit(data['occupation'])
    data['occupation'] = le.transform(data['occupation'])
    le.fit(data['relationship'])
    data['relationship'] = le.transform(data['relationship'])
    le.fit(data['race'])
    data['race'] = le.transform(data['race'])
    le.fit(data['sex'])
    data['sex'] = le.transform(data['sex'])
    le.fit(data['native-country'])
    data['native-country'] = le.transform(data['native-country'])
    le.fit(data['id'])
    data['id'] = le.transform(data['id'])
    temp = data.drop(columns='id').copy()
    data = data[temp.apply(lambda x: np.abs(x - x.mean()) / x.std() < 3).all(axis=1)]
    return data.reset_index(drop=True)


def one_hot_encoder(data):
    # values = np.array(data)
    values = data.to_numpy()
    # integer encode
    label_encoder = LabelEncoder()
    integer_encoded = label_encoder.fit_transform(values)
    # binary encode
    onehot_encoder = OneHotEncoder(sparse=False)
    integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
    onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
    return onehot_encoded


def one_hot_encoder_without_sklean(data):
    temp = data.drop(columns='id')
    id = data.id
    data = pd.get_dummies(temp, prefix_sep='_', drop_first=True)
    data['id'] = id
    data.head()
    data = data.replace(['<=50K', '>50K'], [0, 1])
    temp = data.drop(columns='id').copy()
    data = data[temp.apply(lambda x: np.abs(x - x.mean()) / x.std() < 3).all(axis=1)]
    return data.reset_index(drop=True)


# show the distribution of the positive vs. negative classes
def show_povsneg(data):
    sumid = [(data.id == 0).sum(), (data.id == 1).sum()]
    xl = ['<=50', '>50']
    plt.bar(x=xl, height=sumid)
    for i, v in zip(xl, sumid):
        plt.annotate(str(v), xy=(i, v), color='black', va='center', size=11)
    plt.ylabel('amount')
    plt.xlabel('wage')
    plt.title('distribution of different wages(adult)')
    plt.show()

### Process Adult

In [0]:
def process_Adult():
  data = pd.read_csv('adult.data', engine='python', sep=',\s', na_values=['?'],
                    names=['age', 'workclass', 'fnlwgt', 'education', 'education-num', 
                           'marital-status', 'occupation','relationship', 'race', 'sex', 
                           'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'id'])
  target = data['id']

  data = clean(data)
  data = preprocess(data)

  workclass = one_hot_encoder(data['workclass'])
  # print(workclass.shape[1])
  data = pd.concat([data, pd.DataFrame(workclass)], axis=1)
  education = one_hot_encoder(data['education'])
  # print(education.shape[1])
  data = pd.concat([data, pd.DataFrame(education)], axis=1)
  marital_status = one_hot_encoder(data['marital-status'])
  # print(marital_status.shape[1])
  data = pd.concat([data, pd.DataFrame(marital_status)], axis=1)
  occupation = one_hot_encoder(data['occupation'])
  # print(occupation.shape[1])
  data = pd.concat([data, pd.DataFrame(occupation)], axis=1)
  relationship = one_hot_encoder(data['relationship'])
  # print(relationship.shape[1])
  data = pd.concat([data, pd.DataFrame(relationship)], axis=1)
  race = one_hot_encoder(data['race'])
  # print(race.shape[1])
  data = pd.concat([data, pd.DataFrame(race)], axis=1)
  native_country = one_hot_encoder(data['native-country'])
  # print(native_country.shape[1])
  data = pd.concat([data, pd.DataFrame(native_country)], axis=1)

  # data=data.replace(['Male','Female'],[1,0])
  # data=data.replace(['<=50K','>50K'],[0,1])

  del data['workclass']
  del data['education']
  del data['marital-status']
  del data['occupation']
  del data['race']
  del data['relationship']
  del data['native-country']
  ids = data['id'].copy()
  del data["id"]
  data.insert(data.shape[1], "id", ids)

  adult_dataset_result = data.to_numpy()

  return train_test_split(adult_dataset_result, 9, False)

### Process wines


In [0]:
def process_wines():
    with open("winequality-white.csv", 'r') as f:
        wines = list(csv.reader(f, delimiter=";"))
    global wines_header
    wines_header = np.array(wines[0])  # with label header
    wines = np.array(wines[1:], dtype=np.float)  # with label

    # clean malinformed values by deleting the rows they inhabit
    invalid_index = []
    for i in range(len(wines)):
        for number in wines[i]:
            if math.isnan(number):
                np.delete(wines, i, 0)

    # differentiate labels
    for i in tqdm(range(len(wines[:, -1]))):
        if wines[:, -1][i] > 5:
            wines[:, -1][i] = 1
        else:
            wines[:, -1][i] = 0

    return train_test_split(wines, 9, True)


### Process Breast Cancer

In [0]:
def process_tumors():
    with open("breast-cancer-wisconsin.csv", 'r') as f:
        tumors = list(csv.reader(f, delimiter=";"))

    global tumors_header
    tumors_header = ["clump thickness", "cell size", "cell shape", "marginal adhesion", \
                     "single epithelial cell size", "number of bare nuclei", "bland chromatin", \
                     "number of normal nuclei", "mitosis", "label"]  # with label header but no IDs

    # highlight malinformed values
    invalid_index = []
    for i in tqdm(range(len(tumors))):
        tumors[i] = tumors[i][0].split(",")
        for j in range(len(tumors[i])):
            if tumors[i][j].isnumeric() == False:
                invalid_index.append(i)  # the whole row
        # differentiate labels
        if int(tumors[i][-1]) <= 2:
            tumors[i][-1] = '0'
        else:
            tumors[i][-1] = '1'

    # clean malinformed values by deleting the rows they inhabit
    invalid_index.sort(reverse=True)
    for i in invalid_index:
        tumors.remove(tumors[i])

    tumors = np.array(tumors[0:], dtype=np.float)
    tumors = tumors[:, 1:]

    return train_test_split(tumors, 9, False)

## Evaluation

In [0]:
def evaluation(prediction: np.ndarray, groundtruth: np.ndarray):
    # sanity check
    if len(prediction) != len(groundtruth):
        raise TypeError
    
    tn,fp,fn,tp = 0,0,0,0 #true negative, false positive, false negative, true positive
    
    for i in range(len(prediction)):
        if prediction[i] == 0 and groundtruth[i] == 0:
            tn += 1
        if prediction[i] == 1 and groundtruth[i] == 0:
            fp += 1
        if prediction[i] == 0 and groundtruth[i] == 1:
            fn += 1
        if prediction[i] == 1 and groundtruth[i] == 1:
            tp += 1
    return tn,fp,fn,tp

In [0]:
def merge_chunks(data_split,indices):
    indices = list(indices).sort()
    if len([indices]) < 2:
        return data_split[0]
    data_merged = data_split[indices[0]]
    indices.remove(indices[0]) #remove the first element so that it does not get re-merged
    for i in indices:
        data_merged = np.concatenate(data_merged,data_split[i],axis=0)
        
    return data_merged

## Cross Validation

In [0]:
import numpy as np
import copy

#============================================================================================================================================================================================================================================================================
#
# K-CROSS VALIDATION
#
#   @Author: Edwin Pan of Group 101 of Winter 2020 COMP551 at McGill University
#
#   This section is a script for applying K-Cross Validation 
#
#============================================================================================================================================================================================================================================================================

def kCrossValidate(k,cleanModel,featuresDataset,classificationsDataset,learningRate=None,gradientDescentIterations=None):

  #Create a list ready to keep track of all the models ever made.
  allKModels = []

  #First, create all of the dataset partitions needed. 
  features_test_block =             []
  classifications_test_block  =     []
  features_training_block =         []
  classifications_training_block =  []
  dataset_size = classificationsDataset.size  #Total number of instances
  for fold in range(k):
    #Get the subsets
    lower_index = dataset_size*fold//k        #Get the lower bound index from the full dataset for the test dataset
    upper_index = dataset_size*(fold+1)//k    #Get the upper bound idnex from the full dataset for the test dataset
    #The Test Block of size 1/k per k
    features_test_block.append(         featuresDataset[(lower_index):(upper_index)]                )
    classifications_test_block.append(      classificationsDataset[(lower_index):(upper_index)]                 )
    #The Training Block of size (k-1)k per k
    if(lower_index==0):   #If we're dealing with the first fold
      features_training_block.append(                featuresDataset[upper_index:] )
      classifications_training_block.append(  classificationsDataset[upper_index:]   ) 
    elif(upper_index == dataset_size):    #If we're dealing with the last fold
      features_training_block.append(                featuresDataset[:lower_index] )  
      classifications_training_block.append(  classificationsDataset[:lower_index]  )
    else:
      #Note that these two following operations only work if lower_index is not 0 and upper_index is not the upper bound such that we get empty matrices.
      new_block_size = dataset_size - upper_index + lower_index;
      no_of_features = len(featuresDataset[0])
      #Before making the block, we need to make sure our new array will contain the right datatype. If we find string, make our array hold objects.
      datatype = type(float)
      for f in range(no_of_features):
        if not isinstance( featuresDataset[0][f], float ) and not isinstance( featuresDataset[0][f], int):
          datatype = object
          break
      #Make the new block
      new_features_block = np.zeros( (new_block_size,no_of_features), dtype=datatype )
      new_classes_block = np.zeros( new_block_size )
      j = 0 #Row being checked
      for i in range( lower_index ):
        for f in range(no_of_features):
          new_features_block[j][f] = featuresDataset[i][f]
        new_classes_block[j] = classificationsDataset[i]
        j += 1
      for i in range( upper_index, dataset_size ):
        for f in range(no_of_features):
          new_features_block[j][f] = featuresDataset[i][f]
        new_classes_block[j] = classificationsDataset[i]
        j += 1
      features_training_block.append( new_features_block )
      classifications_training_block.append( new_classes_block )
      #features_training_block.append(         np.concatenate(featuresDataset[:lower_index],(featuresDataset[upper_index:])) )   
      #classifications_training_block.append(  np.concatenate(classificationsDataset[:lower_index],(classificationsDataset[upper_index:]))   ) 
      
  #Now we start training and testing the model.
  average_accuracy = 0;
  for fold in range(k):
    model = copy.deepcopy(cleanModel)
    if(learningRate==None or gradientDescentIterations==None):
      model.fit(trainingDataFeatures= features_training_block[fold], 
                trainingDataClasses=  classifications_training_block[fold]
                )
    else:
      model.fit(trainingDataFeatures=     features_training_block[fold], 
                trainingDataClasses=      classifications_training_block[fold], 
                learningRate=             learningRate, 
                gradientDescentIterations=gradientDescentIterations,
                beVerbose= False
                )
    fold_accuracy = model.evaluate_acc( X=(features_test_block[fold]), Y=(classifications_test_block[fold]) )
    average_accuracy += fold_accuracy
    #print("Accuracy of fold number " + str(fold+1) + " is " + str(fold_accuracy) + ".")
    allKModels.append(model)
    continue
  average_accuracy /= k
  #Return the average accuracy obtained
  return average_accuracy, allKModels

## Model Averager

Given enough time, we would like to combine all of the k models obtained through the k-fold cross validation to create a new model for use on predicting the classifications on the test dataset. Combining of the models would come down to essentially averaging each weight among the models.

In [0]:
import copy

def averageHNBModels(models):
  numberOfModels = len(models)
  newModel = copy.deepcopy(model[0])
  for k, v in newModel.feature_models.values():
    if newModel.feature_models[k].__type__ == "BINARY":
      average_featuremodel_mean = 0
      average_featuremodel_variance = 0
      for m in models:
        m.feature_models[k].mean
    elif newModel.feature_models[k].__type__ == "CATEGORICAL":
      pass
    elif newModel.feature_models[k].__type__ == "GAUSSIAN":
      pass
      #INCOMPLETE CODE



## Four Test

In [0]:
import time
ionosphere_train_x, ionosphere_train_y, ionosphere_test_x, ionosphere_test_y = process_ionosphere()
start = time.time()
ionosphere_HNBModel = GaussianNaiveBayesModel( len(ionosphere_train_x[0]), 2 )
average_accuracy, allModels = kCrossValidate(5,ionosphere_HNBModel,ionosphere_train_x,ionosphere_train_y)
ionosphere_HNBModel.fit(ionosphere_train_x,ionosphere_train_y)
final_test_accuracy = ionosphere_HNBModel.evaluate_acc(ionosphere_test_x,ionosphere_test_y)
end = time.time()
print("Gaussian Naive Bayes K-Cross Validated (K=5) accuracy of our model is " + str(average_accuracy) +".")
print("Final accuracy on Ionosphere's testing set: ", final_test_accuracy)
print("Gaussian Naive Bayes on Ionosphere train set time: ", (end - start)/5)

Hybrid Naive Bayes K-Cross Validated (K=5) accuracy of our model is 0.8222222222222222.
Final accuracy on Ionosphere's testing set:  0.8333333333333334
Hybrid Naive Bayes on Ionosphere train set time:  0.11877923011779785


In [0]:
import time
cancer_train_x, cancer_train_y, cancer_test_x, cancer_test_y = process_tumors()
start = time.time()
cancer_HNBModel = GaussianNaiveBayesModel( len(cancer_train_x[0]), 2 )
average_accuracy, allModels = kCrossValidate(5,cancer_HNBModel,cancer_train_x,cancer_train_y)
cancer_HNBModel.fit(cancer_train_x,cancer_train_y)
final_test_accuracy = cancer_HNBModel.evaluate_acc(cancer_test_x,cancer_test_y)
end = time.time()
print("Gaussian Naive Bayes K-Cross Validated (K=5) accuracy of our model is " + str(average_accuracy) +".")
print("Final accuracy on Cancer's testing set: ", final_test_accuracy)
print("Gaussian Naive Bayes on Cancer train set time: ", (end - start)/5)

100%|██████████| 699/699 [00:00<00:00, 170543.80it/s]


Hybrid Naive Bayes K-Cross Validated (K=5) accuracy of our model is 0.964121018259363.
Final accuracy on Cancer's testing set:  0.9420289855072463
Hybrid Naive Bayes on Cancer train set time:  0.07529220581054688


In [0]:
import time
Adult_train_x, Adult_train_y, Adult_test_x, Adult_test_y  = process_Adult()
start = time.time()
adult_HNBModel = GaussianNaiveBayesModel( len(Adult_train_x[0]), 2 )
average_accuracy, allModels = kCrossValidate(5,adult_HNBModel,Adult_train_x,Adult_train_y)
adult_HNBModel.fit(Adult_train_x,Adult_train_y)
final_test_accuracy = adult_HNBModel.evaluate_acc(Adult_test_x,Adult_test_y)
end = time.time()
print("Gaussian Naive Bayes K-Cross Validated (K=5) accuracy of our model is " + str(average_accuracy) +".")
print("Final accuracy on Adult's testing set: ", final_test_accuracy)
print("Gaussian Naive Bayes on Adult train set time: ", (end - start)/5)



Hybrid Naive Bayes K-Cross Validated (K=5) accuracy of our model is 0.7952909439268457.
Final accuracy on Adult's testing set:  0.7895962732919255
Hybrid Naive Bayes on Adult train set time:  20.904246473312377


In [0]:
import time
x_wines_train, y_wines_train, x_wines_test, y_wines_test = process_wines()
start = time.time()
wine_HNBModel = GaussianNaiveBayesModel( len(x_wines_train[0]), 2 )
average_accuracy, allModels = kCrossValidate(5,wine_HNBModel,x_wines_train,y_wines_train)
wine_HNBModel.fit(x_wines_train,y_wines_train)
final_test_accuracy = wine_HNBModel.evaluate_acc(x_wines_test,y_wines_test)
end = time.time()
print("Gaussian Naive Bayes K-Cross Validated (K=5) accuracy of our model is " + str(average_accuracy) +".")
print("Final accuracy on Wines testing set: ", final_test_accuracy)
print("Gaussian Naive Bayes on Wines train set time: ", (end - start)/5)

100%|██████████| 4898/4898 [00:00<00:00, 495721.76it/s]


Hybrid Naive Bayes K-Cross Validated (K=5) accuracy of our model is 0.7019080049727041.
Final accuracy on Wines testing set:  0.7448979591836735
Hybrid Naive Bayes on Wines train set time:  0.6234371185302734
