In [1]:
import pandas as pd
import numpy as np
import random as rm
import math
import matplotlib.pyplot as plt

#define Function
def discretize(X, rn):
    [m,n] = np.shape(X)
    for i in range(0,m):
        for j in range(0,n):
            for k in range(0, (np.shape(rn)[0] - 1)):
                if X[i,j] > rn[k] and X[i,j] < rn[k+1]:
                    X[i,j] = k
    return X

def featureNormalize(X):
    X_Norm = X.astype(float)                                     
    mu = X.mean(axis=0)
    sigma = X.std(axis=0)                                        
    columns = np.shape(X)[1]
    for i in range(0,columns):
        X_Norm[:,i] = (X[:,i] - mu[i]) / (sigma[i])              
    return X_Norm

def learn(X, y, pattern, data_class):
    events = sum(pattern)
    [m,n] = np.shape(X)
    count = 0;
    prob = np.zeros(events)
    for i in range(0,n):
        for j in range(0,pattern[i]):
            prob[count] = (np.sum(X[np.nonzero(np.array(y[y==data_class]))]==j-1)+1) / (np.shape(np.array(y[y== data_class]))[0]+pattern[i])
            count = count + 1
    return prob

def predict(X_test, probability, pattern, prob_s):
    count = 1
    prob = np.zeros(np.shape(X_test)[0])
    for i in range(0, np.shape(pattern)[0]):                    
        prob[i] = probability[count + (X_test[i])]
        count = count + pattern[i]
    prob_sum = np.exp(np.sum(np.log(np.transpose(prob))), dtype=np.float64)       
    prediction = prob_sum * prob_s
    return prediction

#import Data
dataSet = np.asmatrix(pd.read_csv("iris.csv", header=None))                                   

X = np.array(dataSet[: , 0:4])
y = np.array(dataSet[:, 4])

[m,n] = np.shape(X)

l = list(range(m))             
rm.shuffle(l)
X = X[l, :]
y = y[l]

#training set/cross validation/test
training = math.ceil(m * 0.6)
crossvalidation = math.ceil(m * 0.2)

X_training = X[0:training, :]
X_crossvalidation = X[training: training+crossvalidation, :]
X_test = X[training+crossvalidation:m, :]

y_training = y[0:training]
y_crossvalidation = y[training: training+crossvalidation]
y_test = (y[training+crossvalidation:m]).astype(int)

#features normalization

X_training = featureNormalize(X_training)
X_crossvalidation = featureNormalize(X_crossvalidation)
X_test = featureNormalize(X_test)

#discretization of value

columns = 30
minv = -3.2
maxv = 3.2
step = (abs(minv) + abs(maxv)) / columns
range_array = np.array(list(np.arange(minv, maxv+0.1, step)))

X_training = discretize(X_training, range_array).astype(int)
X_crossvalidation = discretize(X_crossvalidation, range_array).astype(int)
X_test = discretize(X_test, range_array).astype(int)

pattern = np.array([30,30,30,30])

#learning
prob1 = learn(X_training, y_training, pattern, 1)
prob2 = learn(X_training, y_training, pattern, 2)
prob3 = learn(X_training, y_training, pattern, 3)

#smoothing
prob1_s = (np.shape(np.array(y[y==1]))[0]) / (m+3)
prob2_s = (np.shape(np.array(y[y==2]))[0]) / (m+3)
prob3_s = (np.shape(np.array(y[y==3]))[0]) / (m+3)


#prediction
prediction1 = np.zeros(30)
prediction2 = np.zeros(30)
prediction3 = np.zeros(30)

for i in range (0,30):
    prediction1[i] = predict(X_test[i,:], prob1, pattern, prob1_s)
    prediction2[i] = predict(X_test[i,:], prob2, pattern, prob2_s)
    prediction3[i] = predict(X_test[i,:], prob3, pattern, prob3_s)

predictions = np.array([np.transpose(prediction1), np.transpose(prediction2), np.transpose(prediction3)])

#max probability
prob_max1 = (np.max(predictions, axis=0) == (prediction1)).astype(int).dot(1)
prob_max2 = (np.max(predictions, axis=0) == (prediction2)).astype(int).dot(2)
prob_max3 = (np.max(predictions, axis=0) == (prediction3)).astype(int).dot(3)

result = np.transpose(np.array([np.transpose(prob_max1) + np.transpose(prob_max2) + np.transpose(prob_max3)]))
print('Results:', np.transpose(result))

accuracy = np.shape(np.array(y_test[y_test == result]))[0] / (np.shape(y_test)[0])
print('Accuracy:', accuracy * 100, '%')

Results: [[2 2 2 3 3 1 3 2 2 2 3 2 3 2 2 2 3 1 2 1 2 2 2 2 1 2 2 3 2 2]]
Accuracy: 56.666666666666664 %
