In [20]:
import csv
import math
import random

def loadcsv(filename) :
    dataset = list(csv.reader(open(filename,"r")))
    for i in range(len(dataset)) :
        dataset[i] = [float(x) for x in dataset[i]]
    return dataset

def split(dataset,splitratio) :
    trainsize = int(len(dataset) * splitratio)
    trainset,testset = dataset[:trainsize],dataset[trainsize:]
    return [trainset,testset]

def summarizeByClass(dataset) :
    seperated = {}
    for i in range(len(dataset)) :
        vector = dataset[i]
        if vector[-1] not in seperated :
            seperated[vector[-1]] = []
        seperated[vector[-1]].append(vector)
    
    summaries = {}
    
    for classvalue,instances in seperated.items() :
        summaries[classvalue] = [(mean(att),stdev(att)) for att in zip(*instances)][:-1]
    return summaries

def mean(num) :
    return sum(num)/len(num)

def stdev(num) :
    avg = mean(num)
    v = 0
    for x in num :
        v+=(x-avg)**2
    return math.sqrt(v/(len(num)-1))

def calcprob(x,mean,stdev) :
    exp = math.exp((-(x-mean)**2)/(2*(stdev**2)))
    return (1/math.sqrt(2*math.pi*(stdev**2)))*exp

def predict(summaries,invec) :
    probabilities = {}
    for classvalue,classsummaries in summaries.items() :
        probabilities[classvalue] = 1
        for i in range(len(classsummaries)) :
            mean,stdev = classsummaries[i]
            x = invec[i]
            probabilities[classvalue]*=calcprob(x,mean,stdev)
            
    bestlabel,bestprob = None,-1
    for classvalue,probability in probabilities.items():
        if bestlabel == None or probability > bestprob :
            bestprob = probability
            bestlabel = classvalue
    return bestlabel

def getPrediction(summaries,testset) :
    prediction = []
    for i in range(len(testset)) :
        result = predict(summaries,testset[i])
        prediction.append(result)
    return prediction
    
def getAccuracy(prediction,testset):
    correct = 0;
    for i in range(len(testset)) :
        if testset[i][-1] == prediction[i] :
            correct+=1
    return (correct/len(testset))*100.0
    
filename = "medical.csv"
dataset = loadcsv(filename)
splitratio = 0.6
trainset,testset = split(dataset,splitratio)
summaries = summarizeByClass(trainset)
prediction = getPrediction(summaries,testset)
accuracy = getAccuracy(prediction,testset)
print(accuracy)

57.14285714285714
