In [1]:
import math
import pandas as pd
import numpy as np

df_data=pd.read_csv('pima-indians-diabetes.data.csv',header=None).astype('float')
size=int(len(df_data)*0.67)
train_df=df_data.iloc[:size].copy()
test_df=df_data.iloc[size:].copy()

def summarizeByClass(train_df):
    summary={}
    for key,value in train_df.groupby(train_df.columns[-1]):
        summary[key]=summarize(value)
    return summary

def summarize(data):
    summaries=[(mean(data[attribute]),stdev(data[attribute])) for attribute in data.columns[0:-1]]
    return summaries

def mean(numbers):
    return sum(numbers)/float(len(numbers))

def stdev(numbers):
    avg=mean(numbers)
    variance=sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
    return math.sqrt(variance)

def getPrediction(summaries,testset):
    predictions=[]
    for i in range(len(testset)):
        result=predict(summaries,testset.iloc[i:i+1])
        predictions.append(result)
    return predictions

def predict(summaries,inputVector):
    probabilities=calcbyval(summaries,inputVector)
    bestVal,bestprob=None,-1
    for classValue,probability in probabilities.items():
        if bestVal is None or probability > bestprob:
            bestprob=probability
            bestVal=classValue
    return bestVal

def calcbyval(summaries,inputVector):
    probabilities={}
    for classValue,classSummaries in summaries.items():
        probabilities[classValue]=1
        for i in range(len(classSummaries)):
            mean,stdev=classSummaries[i]
            x=inputVector[i]
            probabilities[classValue]*=calcprob(x,mean,stdev)
    return probabilities

def calcprob(x,mean,stdev):
    exponent=math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
    return (1/(math.sqrt(2*math.pi)*stdev))*exponent

def getAccuracy(testset,predictions):
    correct=1
    for i in range(len(testset)):
        if testset.iloc[i:i+1][8].values==predictions[i]:
            correct+=1
    return (correct/float(len(testset)))*100.0

summaries=summarizeByClass(train_df)
predictions=getPrediction(summaries,test_df)
accuracy=getAccuracy(test_df,predictions)
print('Split {0} rows into train={1} and test={2} rows'.format(len(df_data), len(train_df), len(test_df)))
print("The Accuracy is "+str(accuracy))

Split 768 rows into train=514 and test=254 rows
The Accuracy is 77.16535433070865
