In [4]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split

appleData = pd.read_csv('apple_quality.csv') #predict if apple has good or bad quality
appleData = appleData.iloc[:1000] 
data = appleData.iloc[:, 1:appleData.shape[1]-1]
labels = appleData['Quality'].replace({'good': 1, 'bad': 0}) #encode labels

xtrain, xtest, ytrain, ytest = train_test_split(data, labels, test_size=0.2, random_state=42)

In [5]:
def naiveBayes(data, labels):
    samples, features = data.shape
    classes = np.unique(labels)
    means = {}
    stds = {}
    priors = {}

    for c in classes:
        mask = labels == c
        priors[c] = len(labels[mask]) / data.shape[0]
        means[c] = np.mean(data[mask], axis=0)
        stds[c] = np.std(data[mask], axis=0)

    return means, stds, priors, classes

def pVal(mean, std, data):
    const = 1.0 / (std * np.sqrt(2 * 3.14))
    exponent = - (data - mean) ** 2 / (2 * std ** 2)
    return const * np.exp(exponent)
    
def predict(means, stds, priors, classes, data):
    pred = [0] * data.shape[0]
    
    for i in range(data.shape[0]):
        probs = np.zeros(len(classes))
        for c in classes:
            likelihoods = pVal(means[c], stds[c], np.array(data.iloc[i]))
            probs[c] = np.prod(likelihoods) * priors[c]
            
        pred[i] = np.argmax(probs)
        
        
    return pred
    
means, stds, priors, classes = naiveBayes(xtrain, ytrain)
pred = predict(means, stds, priors, classes, xtest)
acc = accuracy_score(pred, ytest)
print(acc)

0.765


In [6]:
model = GaussianNB()
model.fit(xtrain, ytrain)
pred = model.predict(xtest)
acc = accuracy_score(pred, ytest)
print(acc)

0.765
