In [46]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.metrics import confusion_matrix, classification_report 

In [27]:
# suppress warnings
import warnings
warnings.filterwarnings('ignore')

In [28]:
def chng(x):
    if x==-1:
        return 0
    return x

In [218]:
class NaiveBayespro:

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self._classes = np.unique(y)
        n_classes = len(self._classes)
        

        # calculate mean, variance, and prior for each class
        self._mean = np.zeros((n_classes, n_features), dtype=np.float64)
        self._var = np.zeros((n_classes, n_features), dtype=np.float64)
        self._priors =  np.zeros(n_classes, dtype=np.float64)

        for idx, c in enumerate(self._classes):
            X_c = X[y==c]
            self._mean[idx, :] = X_c.mean(axis=0)
            self._var[idx, :] = X_c.var(axis=0)
            self._priors[idx] = X_c.shape[0] / float(n_samples)

    def predict(self, X):
        X = np.array(X)
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)

    def get_parameter(self):
        print("Class Probabilties : ",self._priors)
        
    def _predict(self, x):
        posteriors = []

        # calculate posterior probability for each class
        for idx, c in enumerate(self._classes):
            prior = np.log(self._priors[idx])
            posterior = np.sum(np.log(self._pdf(idx, x)))
            posterior = prior + posterior
            posteriors.append(posterior)
            
        # return class with highest posterior probability
        return self._classes[np.argmax(posteriors)]          

    def _pdf(self, class_idx, x):
        mean = self._mean[class_idx]
        var = self._var[class_idx]
        numerator = np.exp(- (x-mean)**2 / (2 * var))
        denominator = np.sqrt(2 * np.pi * var)
        return numerator / denominator
    
    def score(self,x_test,y_test):
        y_pred = self.predict(x_test)
        return sum([1 for i,j in zip(y_pred,y_test) if i==j]) / len(y_test)    
    
    def accuracy(self,x_test,y_test):
        y_pred = self.predict(x_test)
        return np.sum(y_pred == y_test)/len(x_test)
    
    def confusion_mat(self,x_test,y_test):
        y_pred = self.predict(x_test)
        self.cm = confusion_matrix(y_test,y_pred)
        return self.cm
        
    def performance(self,x_test,y_test):
        cm = self.confusion_mat(x_test,y_test)
        tn = cm[0][0]
        fp = cm[0][1]
        fn = cm[1][0]
        tp = cm[1][1]
        sn = tp/(tp+fn)
        sp = tn/(tn+fp)
        pr = tp/(tp+fp)
        fm = 2*(pr*sn)/(pr+sn)
        print(f"Sensitivity is {sn}")
        print(f"Specificity is {sp}")
        print(f"Precission is {pr}")
        print(f"F_measur is {fm}")

In [30]:
df3= pd.read_csv('data3.csv', header= None)
df3.head()

Unnamed: 0,0,1,2,3
0,-9.8032,-7.7544,1.472,1
1,-3.0264,-4.5213,2.1924,1
2,-1.4833,-6.6244,3.1342,1
3,-4.969,-0.78348,-3.5189,1
4,-6.1224,-5.0951,0.99638,1


In [31]:
df3.shape

(300, 4)

In [32]:
df3[3].unique()

array([1, 2, 3], dtype=int64)

In [50]:
x = dict(df3[3].value_counts())
print(x)

{1: 100, 2: 100, 3: 100}


In [35]:
X = df3.drop(df3[3], axis=1)
y = df3[3]

In [93]:
X_train3, X_test3, y_train3, y_test3 = train_test_split(X,y,test_size=0.30,shuffle=True)

In [124]:
modelproplus = NaiveBayespro()

In [125]:
modelproplus.fit(X_train3, y_train3)

In [126]:
predicted_yproplus = modelproplus.predict(X_test3)

In [127]:
modelproplus.score(X_test3,y_test3)

0.6111111111111112

In [128]:
modelproplus.accuracy(X_test,y_test)

0.6777777777777778

In [129]:
modelproplus.performance(X_test,y_test)

Sensitivity is 1.0
Specificity is 0.9130434782608695
Precission is 0.9166666666666666
F_measur is 0.9565217391304348


In [130]:
modelproplus.get_parameter()

Class Probabilties :  [0.31428571 0.36666667 0.31904762]
