In [3]:
import pandas as pd
import random
import math
import operator
import numpy as np

def safe_div(x,y):
    if y == 0:
        return 0
    return x / y

def splitDataset(dataset, splitRatio):
	split = int(len(dataset)*splitRatio)
	return dataset.iloc[split:], dataset.iloc[:split]
   
      
def calculateProbability(x, mean, stdev):	
	exponent = math.exp(-safe_div(math.pow(x-mean,2),(2*math.pow(stdev,2))))
	final = safe_div(1 , (math.sqrt(2*math.pi) * stdev)) * exponent
	return final
 
def calculateClassProbabilities(summaries, inputVector):
	pro={}
	for index in summaries.index:
		pro[index]=1
		for x in inputVector.index :
			pro[index] *= calculateProbability(inputVector[x] , summaries[x]["mean"][index] ,summaries[x]["std"][index] )
	return pro

def predict(summaries, inputVector):
	probabilities = calculateClassProbabilities(summaries, inputVector)
	bestLabel, bestProb = None, -1
	for classValue, probability in probabilities.items():
		if probability > bestProb:
			bestProb = probability
			bestLabel = classValue
	return bestLabel

def getPredictions(summaries, testSet):
	predictions = [];
	for i in range(len(testSet)):
		result = predict(summaries, testSet.iloc[i])
		predictions.append(result);
	return predictions


def getAccuracy(actual, predictions):
    return 100 * (np.array(actual)==np.array(predictions)).sum() / len(actual)
    
filename = 'ConceptLearning.csv'
dataset = pd.read_csv(filename,names=["OUTLOOK","TEMPERATURE","HUMIDITY","WIND","TARGET"])
trainingSet, testSet = splitDataset(dataset, splitRatio=0.5)

summaries = dataset.groupby(by="TARGET").aggregate([np.mean,np.std])
# test model
predictions = getPredictions(summaries, testSet.iloc[:,:-1])
actual = testSet.iloc[:,-1].values
print('Actual values: {0}'.format(actual))
print('Predictions: {0}'.format(predictions))
print("accuracy is {0} %".format(getAccuracy(actual,predictions)))


Actual values: [ 5  5 10 10 10]
Predictions: [5, 5, 5, 10, 10]
accuracy is 80.0 %
