In [5]:
import pandas as pd, numpy as np, seaborn as sns

In [4]:
iris = sns.load_dataset('iris')
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [7]:
x = iris[['sepal_length','petal_length','sepal_width','petal_width']]
y = iris['species']

In [8]:
from sklearn.model_selection import train_test_split

xtrain,xtest,ytrain,ytest = train_test_split(x,y,
                                            test_size = 30)

## User-defined functions to fit and predict as GaussianNB

In [52]:
def gaussian_NB_fit(x_train,y_train):
    samples,features = x_train.shape   # Storing total num of rows as sample and columns as features
    classes = np.unique(y_train)
    n_classes = len(classes)

    # Used dictionary to store the entries classwise
    cls_priors = {}
    cls_means = {}
    cls_var = {}

    for i in classes:
        x_cls = x_train[y_train == i]  #Storing Xtrain columns unique classwise of ytrain
        cls_priors[i] = len(x_cls)/samples  #Finding the probability of each unique class
        cls_means[i] = np.mean(x_cls, axis =0)  #Calculating the mean of each column claswise
        cls_var[i] = np.var(x_cls, axis =0)   #Claculating variance in the same way

# Creting a dictionary of all class outputs inside single key later it will be 
# easy to access in the prediction part using the keys.
# each key stores the claculated values of each unique class together
    output = {'class_priors': cls_priors, 'class_means':cls_means,
              'class_variance':cls_var}
    
    return output


# Creating a function to predict the values using the parameter
# model as object of gaussian_NB_fit function and the xtest value 
def gaussian_NB_predict(model,x_test):
    cp = model['class_priors']    # Finding the values using keys of the object
    cm = model['class_means']
    cv = model['class_variance']

    # then we have to find samples and cls length of xtest
    t_sample = x_test.shape[0]
    t_cls = len(cp)

    prediction = []
    
    for i in range(t_sample):
        posterior_probs = []

        for j in cp:
            prior = cp[j]  # Creating a loop and finding the prior,mean & var classwise
            mean = cm[j]
            var = cv[j]

            # from the pdf formula Finding the exponent and likelihood value
            exponent = -0.5 * np.sum((x_test.iloc[i] - mean) ** 2 / var)
            likelihood = np.exp(exponent) / np.sqrt(2 * np.pi * var).prod()

            posterior_prob = prior * likelihood
            posterior_probs.append(posterior_prob)

        prediction.append(list(cp.keys())[np.argmax(posterior_probs)])

    return pd.Series(prediction)


## Fitting and predicting values

In [53]:
gnb_fit = gaussian_NB_fit(xtrain,ytrain)

In [54]:
ytrainpred = gaussian_NB_predict(gnb_fit,xtrain)
ytestpred = gaussian_NB_predict(gnb_fit,xtest)


In [55]:
from sklearn.metrics import accuracy_score, confusion_matrix

accuracy_score(ytrain,ytrainpred), accuracy_score(ytest,ytestpred)

(0.9666666666666667, 0.9333333333333333)

In [56]:
confusion_matrix(ytrain,ytrainpred),confusion_matrix(ytest,ytestpred)

(array([[37,  0,  0],
        [ 0, 40,  2],
        [ 0,  2, 39]], dtype=int64),
 array([[13,  0,  0],
        [ 0,  8,  0],
        [ 0,  2,  7]], dtype=int64))

## User definrd function for classification_report

In [57]:
def Classification_Report(actuals,predicteds):
    
    f1_scores = []
    recalls = []
    accuracys = []
    precisions = []
    
    for i in actuals.unique():
        tp = 0
        fp = 0
        fn = 0
        
        for actual,predicted in zip(actuals,predicteds):
            if actual == i and predicted == i:
                tp += 1
                
            elif actual != i and predicted == i:
                fp += 1
                
            elif actual == i and predicted != i:
                fn += 1
            
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        accuracy = tp/(tp+fp+fn) if (tp+fp+fn) > 0 else 0
        
        precisions.append(precision)
        accuracys.append(accuracy)
        recalls.append(recall)
        f1_scores.append(f1_score)
    
    return pd.DataFrame({'Precision':precisions,'Recall':recalls,'f1_score':f1_scores,'Accuracys':accuracys},
                       index = actuals.unique())
   
        

In [58]:
Classification_Report(ytest,ytestpred)

Unnamed: 0,Precision,Recall,f1_score,Accuracys
setosa,1.0,1.0,1.0,1.0
versicolor,0.8,1.0,0.888889,0.8
virginica,1.0,0.777778,0.875,0.777778


In [59]:
Classification_Report(ytrain,ytrainpred)

Unnamed: 0,Precision,Recall,f1_score,Accuracys
versicolor,0.952381,0.952381,0.952381,0.909091
virginica,0.95122,0.95122,0.95122,0.906977
setosa,1.0,1.0,1.0,1.0
