In [7]:
import csv
import numpy as np
from sklearn.metrics import confusion_matrix, f1_score, roc_curve, auc
import matplotlib.pyplot as plt
from itertools import cycle
from scipy import interp
import random
import math
       
# Example of Naive Bayes implemented from Scratch in Python

#calculating mean of column values belonging to one class
def mean(columnvalues):
    s=0
    n=float(len(columnvalues))
    for i in range(len(columnvalues)):
        s=s+float(columnvalues[i])
    return s/n

#calculating standard deviation of column values belonging to one class
def stdev(columnvalues):
    avg = mean(columnvalues)
    s=0.0
    num=len(columnvalues)
    for i in range(num):
        s=s+pow(float(columnvalues[i])-avg,2)
    variance = s/(float(num-1))
    return math.sqrt(variance)


# Reading CSV file
filename = 'heart_disease_data.csv'
lines = csv.reader(open(filename, "r"))
dataset = list(lines)
for i in range(len(dataset)-1):
    dataset[i] = [float(x) for x in dataset[i+1]]


for z in range(5):
    print("\n\n\nTest Train Split no. ",z+1,"\n\n\n")
    trainsize = int(len(dataset) * 0.75)
    trainset = []
    testset = list(dataset)
    for i in range(trainsize):
        index = random.randrange(len(testset))
        trainset.append(testset.pop(index))

    # separate list according to class
    classlist = {}
    for i in range(len(dataset)):
        class_num = float(dataset[i][-1])
        row=dataset[i]
        if (class_num not in classlist):
            classlist[class_num] = []
        classlist[class_num].append(row)
    
    # preparing data class wise
    class_data = {}
    for class_num, row in classlist.items():
        class_datarow = [(mean(columnvalues), stdev(columnvalues)) for columnvalues in zip(*row)]
        class_datrow=class_datarow[0:13]
        class_data[class_num] =class_datarow 
     
    # Getting test vector
    y_test=[]
    for j in range(len(testset)):
        y_test.append(testset[j][-1])    
        
    # Getting prediction vector
    y_pred = []
    for i in range(len(testset)):
        class_probability = {}
        for class_num, row in class_data.items():
            class_probability[class_num] = 1
            for j in range(len(row)):
                calculated_mean, calculated_dev = row[j]
                x = float(testset[i][j])
                if(calculated_dev!=0):
                    power =math.exp(-(math.pow(x-calculated_mean,2)/(2*math.pow(calculated_dev,2))))
                    probability= (1 / (math.sqrt(2*math.pi) *calculated_dev)) * power
                class_probability[class_num] *= probability

        resultant_class, max_prob = -1, -1
        for class_num, probability in class_probability.items():
            if resultant_class == -1 or probability > max_prob:
                max_prob = probability
                resultant_class = class_num 
        
        y_pred.append(resultant_class)
    
    # Getting Accuracy
    count = 0
    for i in range(len(testset)):
        if testset[i][-1] == y_pred[i]:
            count += 1
    accuracy=(count/float(len(testset))) * 100.0
    print("\n\n Accuracy: ",accuracy,"%")

    y1=[float(k) for k in y_test]
    y_pred1=[float(k) for k in y_pred]
    
    print("\n\n\n\nF1 Score")
    f_score = f1_score(y1,y_pred1,average='weighted')
    print(f_score)
    
 




Test Train Split no.  1 





 Accuracy:  88.1578947368421 %




F1 Score
0.8811634349030472



Test Train Split no.  2 





 Accuracy:  90.78947368421053 %




F1 Score
0.9074477107153441



Test Train Split no.  3 





 Accuracy:  88.1578947368421 %




F1 Score
0.8813941390671061



Test Train Split no.  4 





 Accuracy:  89.47368421052632 %




F1 Score
0.8948830409356726



Test Train Split no.  5 





 Accuracy:  85.52631578947368 %




F1 Score
0.8556542680787721
