In [12]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
import math 
import random as rand

In [13]:
iris= load_iris()
iris_data = iris.data
len(iris_data)

150

In [48]:
def separate_by_class(dataset):
    separated = dict()
    for i in range(len(dataset)):
        vector = dataset[i]
        class_value = vector[-1]
        if (class_value not in separated):
            separated[class_value] = list()
        separated[class_value].append(vector)
    return separated

def mean(numbers):
    return sum(numbers)/float(len(numbers))

def standarddeviation(numbers):
    average = mean(numbers)
    powers = [pow(x-average,2) for x in numbers] 
    variance = sum(powers)/float(len(numbers)-1)
    return math.sqrt(variance)

def summarize(dataset):
    summation = [(mean(row),standarddeviation(row)) for row in zip(*dataset)]
    del(summation[-1])
    return summation

def summarizeByClass(dataset):
    separated = separate_by_class(dataset)
    summaries = {}
    for classValue, instances in separated.items():
        summaries[classValue] = summarize(instances)
    return summaries    

def calculateProbability(x,mean,stdev):
    exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
    return (1/(math.sqrt(2*math.pi)*stdev))*exponent

def claculateProbabilities(summaries,inputVector):
    probabilities = {}
    for classValue, classSummaries in summaries.items():
        probabilities[classValue] = 1
        for i in range(len(classSummaries)):
            mean, stdev = classSummaries[i]
            x = inputVector[i]
            probabilities[classValue] *= calculateProbability(x,mean,stdev)
        return probabilities      
    
def predict(summaries, inputVector):
    probabilities = claculateProbabilities(summaries,inputVector)
    bestLabel , bestProb = None , -1
    for classValue, probability in probabilities.items():
        if bestLabel is None or probability > bestProb:
            bestProb = probability
            bestLabel = classValue
    return bestLabel      

def getPredictions(summaries,testSet):
    predictions = []
    for i in range(len(testSet)):
        result = predict(summaries,testSet[i])
        predictions.append(result)
    return predictions     

def getAccuracy(testSet, predictions):
    correct = 0
    for i in range(len(testSet)):
        if testSet[i][-1] == predictions[i]:
            correct += 1
    return (correct/float(len(testSet)))*100.0   

def splitDataSet(dataSet, splitRatio):
    trainSize = int(len(dataSet)*splitRatio)
    trainSet = []
    copy = list(dataSet)
    while len(trainSet) < trainSize:
        index = rand.randrange(len(copy))
        trainSet.append(copy.pop(index))
    return [trainSet, copy]    

In [49]:
data = np.concatenate((iris_data, np.array([iris.target]).T), axis=1)
data

array([[5.1, 3.5, 1.4, 0.2, 0. ],
       [4.9, 3. , 1.4, 0.2, 0. ],
       [4.7, 3.2, 1.3, 0.2, 0. ],
       [4.6, 3.1, 1.5, 0.2, 0. ],
       [5. , 3.6, 1.4, 0.2, 0. ],
       [5.4, 3.9, 1.7, 0.4, 0. ],
       [4.6, 3.4, 1.4, 0.3, 0. ],
       [5. , 3.4, 1.5, 0.2, 0. ],
       [4.4, 2.9, 1.4, 0.2, 0. ],
       [4.9, 3.1, 1.5, 0.1, 0. ],
       [5.4, 3.7, 1.5, 0.2, 0. ],
       [4.8, 3.4, 1.6, 0.2, 0. ],
       [4.8, 3. , 1.4, 0.1, 0. ],
       [4.3, 3. , 1.1, 0.1, 0. ],
       [5.8, 4. , 1.2, 0.2, 0. ],
       [5.7, 4.4, 1.5, 0.4, 0. ],
       [5.4, 3.9, 1.3, 0.4, 0. ],
       [5.1, 3.5, 1.4, 0.3, 0. ],
       [5.7, 3.8, 1.7, 0.3, 0. ],
       [5.1, 3.8, 1.5, 0.3, 0. ],
       [5.4, 3.4, 1.7, 0.2, 0. ],
       [5.1, 3.7, 1.5, 0.4, 0. ],
       [4.6, 3.6, 1. , 0.2, 0. ],
       [5.1, 3.3, 1.7, 0.5, 0. ],
       [4.8, 3.4, 1.9, 0.2, 0. ],
       [5. , 3. , 1.6, 0.2, 0. ],
       [5. , 3.4, 1.6, 0.4, 0. ],
       [5.2, 3.5, 1.5, 0.2, 0. ],
       [5.2, 3.4, 1.4, 0.2, 0. ],
       [4.7, 3

In [50]:
trainingSet, testingSet = splitDataSet(data, 0.80)
print('Split {0} dataset into train ={1} and test = {2} dataset',format(len(data)) , len(trainingSet),len(testingSet))
summaries = summarizeByClass(trainingSet)
predictions = getPredictions(summaries, testingSet)
accuracy = getAccuracy(testingSet, predictions)
print('Accuracy:', accuracy)

Split {0} dataset into train ={1} and test = {2} dataset 150 120 30
Accuracy: 20.0
