# Import Libraries

In [None]:
import csv
import math
import numpy as np
import pandas as pd
from math import sqrt
from math import pi
from math import exp
from numpy import array
from random import seed
from random import randrange
from sklearn.model_selection import KFold #For tallying results with self-implimented Kfold
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.preprocessing import StandardScaler
from tabulate import tabulate

# Load Dataset

In [None]:
def load_dataset():
    data_set = list()
    with open('./fetal_health.csv') as file:
        reader = csv.reader(file)
        for row in reader:
            data_set.append(row)
    features = np.array(data_set[0][:-1])
    data = data_set[1:]
    data = np.array(data)
    data = data.astype("float")
    kbest = SelectKBest(f_classif,k=11)
    kbest.fit(data[:,:-1],data[:,-1])
    mask = kbest.get_support()
    # print("The beat Eleven features are :", ",".join(features[mask]))
    features = features[mask]
    x_new = kbest.transform(data[:,:-1])
    x_new = StandardScaler().fit_transform(x_new)
    y = data[:,-1]
    y = y[:,np.newaxis]
    new_data = np.concatenate((x_new,y),axis=1)
    return new_data,features

# Divide Classes

In [None]:
def know_classes(data):
    sep_class = dict()
    for i in range(len(data)):
        temp  = data[i]
        lab = temp[-1]
        if(lab in sep_class):
            sep_class[lab].append(temp)
        else:
            sep_class[lab] = list()
    return sep_class

# Mean and SD of classes

In [None]:
def summary_dataset(data):
    size = len(data[0])
    tot = len(data)
    summ_data = list()
    for i in range(size-1):
        mean = sum(data[:,i])/tot
        sd = (data[:,i] - mean)**2
        sd = sum(sd)/tot-1
        summ_data.append([mean,sd])
    return summ_data

def summary_class(sep_class):
    classes = list()
    summ_class = list()
    for key in sep_class:
        data_1 = sep_class[key]
        data_1 = np.array(data_1)
        size = len(data_1[0])
        tot = len(data_1)
        classes.append(key)
        summ_data_1 = list()
        for i in range(size-1):
            mean = sum(data_1[:,i])/tot
            sd = np.std(data_1[:,i])
            summ_data_1.append([mean,sd])
        summ_class.append(summ_data_1)
    return summ_class,classes

# Liklihood Estimation with Guassian Distribution

In [None]:
def probal(x, mn, sd):
	power = exp(-((x-mn)**2 / (2 * sd**2 + 1e-05 )))
	return (1 / (sqrt(2 * pi) * (sd+ 1e-05 ))) * power

# Calculate Posterior Probabilities

In [None]:
def calculate(summ_class,row,classes,sep_class):
    prob = dict()
    for i in range(len(summ_class)):
        io = len(sep_class[classes[i]])

        prob[classes[i]] = io/float(2126)
        for j in range(len(summ_class[0])):
            m ,s = summ_class[i][j]
            prob[classes[i]] *= probal(row[j],m,s)
    return prob

def test_model(summ_class,test_data,classes,sep_class):
    labels = test_data[:,-1]
    pred = []
    acc = 0
    s = len(test_data)
    for i in range(int(s)):
        p = calculate(summ_class,test_data[i],classes,sep_class)
        ke = max(p, key=p.get)
        pred.append(ke)
        if(labels[i] == ke):
            acc = acc + 1
    res = (acc/s)*100
    print("Accuracy with on training "+str(res)+ "%")
    return {"true_Labels":labels,"pred":pred}

# Training using KFold

In [None]:
data,features = load_dataset()
def cross_validation_split(dataset, fold):
  data = dataset
  dataset = pd.DataFrame(dataset)
  dataset = dataset.sample(frac = 1).reset_index(drop=True)
  div = math.ceil(len(dataset)/fold)
  folds = []
  for i in range(0,fold):
    print("Fold---{0}".format(i))
    folds.append(np.array(dataset.loc[i*div:(i*div)+div-1, : ]))
    #print('fold: ',i,', start: ',i*div,' end: ',(i*div)+div-1)
    train_data = dataset.drop(range(i*div,(i*div)+div-fold-1),axis=0,inplace=False)

    train_data = train_data.to_numpy()
    #print(train_data.shape)
    test_data = folds[i]
    sep_class = know_classes(train_data)
    summ_data = summary_dataset(train_data)
    summ_class,classes = summary_class(sep_class)
    confusion = test_model(summ_class,data,classes,sep_class)
    test_model(summ_class,test_data,classes,sep_class)
  return np.array(folds,dtype=object)

seed(1)
folds = cross_validation_split(data, 5)

Fold---0
Accuracy with on training 84.24270931326434%
Accuracy with on training 80.04694835680752%
Fold---1
Accuracy with on training 84.05456255879587%
Accuracy with on training 83.80281690140845%
Fold---2
Accuracy with on training 84.1486359360301%
Accuracy with on training 84.27230046948357%
Fold---3
Accuracy with on training 84.47789275634995%
Accuracy with on training 85.91549295774648%
Fold---4
Accuracy with on training 84.24270931326434%
Accuracy with on training 87.44075829383885%


# Confusion Matrix

In [None]:
def confusion_mat(confusion):
  true_labels = np.array(confusion["true_Labels"]).astype(int)
  predicted_labels = np.array(confusion["pred"]).astype(int)
  matrix = np.zeros((3,3))
  for i,j in zip(true_labels,predicted_labels):
    matrix[i-1,j-1]+=1
  print(matrix)
  return matrix

In [None]:
confusion = confusion_mat(confusion)

[[1428.  172.   55.]
 [  32.  245.   18.]
 [   6.   50.  120.]]


# Precision, Recall, F1-Score

In [None]:
#Precision
def precision0(cm):
    p = (cm[0][0]/((cm[1][0])+(cm[2][0])+(cm[0][0])))
    if (str(p) == 'nan'):
        return 0.00
    else:
        return round(p,2)

def precision1(cm):
    p1 = (cm[1][1]/((cm[0][1])+(cm[1][1])+(cm[2][1])))
    if (str(p1) == 'nan'):
        return 0.00
    else:
        return round(p1,2)

def precision2(cm):
    p2 = (cm[2][2]/((cm[1][2])+(cm[0][2])+(cm[2][2])))
    if (str(p2) == 'nan'):
        return 0.00
    else:
        return round(p2,2)

#recall
def recall0(cm):
    p = (cm[0][0]/((cm[0][1])+(cm[0][2])+(cm[0][0])))
    if (str(p) == 'nan'):
        return 0.00
    else:
        return round(p,2)

def recall1(cm):
    p1 = (cm[1][1]/((cm[1][0])+(cm[1][1])+(cm[1][2])))
    if (str(p1) == 'nan'):
        return 0.00
    else:
        return round(p1,2)

def recall2(cm):
    p2 = (cm[2][2]/((cm[2][1])+(cm[2][0])+(cm[2][2])))
    if (str(p2) == 'nan'):
        return 0.00
    else:
        return round(p2,2)

# f1 score
def f1_score(precision,recall):
    return (2*precision*recall)/ (precision + recall)

p0 = precision0(confusion)
p1 = precision1(confusion)
p2 = precision2(confusion)


r0 = recall0(confusion)
r1 = recall1(confusion)
r2 = recall2(confusion)

f0 = f1_score(p0,r0)
f1 = f1_score(p1,r1)
f2 = f1_score(p2,r2)

print('\nfirst class :\nprecision: ',p0,'recall: ',r0,'f1-score: ',f0)
print('\nsecond class :\nprecision: ',p1,'recall: ',r1,'f1-score: ',f1)
print('\nthird class :\nprecision: ',p2,'recall: ',r2,'f1-score: ',f2)


first class :
precision:  0.97 recall:  0.86 f1-score:  0.9116939890710382

second class :
precision:  0.52 recall:  0.83 f1-score:  0.6394074074074073

third class :
precision:  0.62 recall:  0.68 f1-score:  0.6486153846153846


# Proir Probability

In [None]:
#Proir Probability
def prior_probability_function(sep_class):
    total = len(sep_class[1.0]) + len(sep_class[2.0]) + len(sep_class[3.0])
    print("Prior probability of class 1:",len(sep_class[1.0])/total)
    print("Prior probability of class 2:",len(sep_class[2.0])/total)
    print("Prior probability of class 3:",len(sep_class[3.0])/total)

data,features = load_dataset()
sep_class = know_classes(data)
prior_probability_function(sep_class)

Prior probability of class 1: 0.7790861987753179
Prior probability of class 2: 0.13848327837965144
Prior probability of class 3: 0.08243052284503062


# Posterior Probability

In [None]:
#Posterior Probability
print("Posterior Probabilities")
posterior_list = []
for i in range(0,2126):
  posterior_dict = (calculate(summ_class,data[i],classes))
  posterior_list.append([posterior_dict[1],posterior_dict[2],posterior_dict[3]])
print(tabulate(posterior_list,headers=[1,2,3]))

Posterior Probabilities
          1             2            3
-----------  ------------  -----------
5.9013e-11   3.37153e-12   1.48464e-08
3.68154e-05  4.84264e-28   9.0123e-19
2.03017e-05  1.27122e-14   5.31195e-13
1.41282e-05  3.46249e-15   5.67025e-13
1.10135e-05  2.84326e-36   5.00706e-22
1.31379e-40  8.01061e-60   4.56923e-14
3.04947e-66  1.57353e-82   3.04606e-16
4.43683e-08  6.20522e-10   1.24489e-08
1.06723e-07  7.47542e-10   1.68935e-08
7.20732e-08  4.90348e-10   1.25464e-08
6.66786e-07  2.26769e-05   3.63188e-10
7.88332e-07  1.87272e-05   4.44246e-10
7.0358e-05   3.87618e-21   3.70617e-15
6.22243e-05  3.61236e-50   4.58262e-28
1.38571e-09  7.04349e-33   7.92886e-18
1.76593e-08  8.12015e-31   7.28214e-18
4.58566e-06  1.89255e-30   9.11902e-18
3.41995e-18  5.64417e-18   5.80549e-11
3.61616e-08  3.97152e-16   2.43595e-11
1.29746e-07  1.89953e-22   2.20914e-14
5.61628e-41  1.06853e-35   1.24708e-09
1.69256e-17  6.13792e-27   1.34396e-14
1.45841e-48  8.47063e-50   3.63381e-10
1.

# Contingency

In [None]:
def contingency(data):
  x = data[:,:-1]
  y = data[:,-1]
  unique_array = []

  for i in range(x.shape[1]):
    unique = np.unique(x[:,i])
    unique_dict = {}
    for i,j in enumerate(unique,0):
        unique_dict[j] = i
    unique_array.append(unique_dict)

  counter = []
  for i in range(11):
    unique = unique_array[i]
    temp = np.zeros((len(unique),4))
    for j,k in zip(x[:,i],y):
      temp[unique[j],int(k)]+=1
    for j in unique:
      temp[unique[j],0] = j
    counter.append(temp)
  return counter

counter = contingency(data)
for j,i in enumerate(features,0):
  print("contengency table for " + i + " and  classes is:\n")
  counter[j]
  print(tabulate(list(counter[j]),headers=[i,1,2,3]))
  print("")

contengency table for baseline value and  classes is:

  baseline value    1    2    3
----------------  ---  ---  ---
      -2.7752       7    0    0
      -2.36863     15    0    6
      -2.16535     16    0    0
      -1.96207     11    0    0
      -1.86043     28    0    0
      -1.75878      5    0    0
      -1.65714      2    0    0
      -1.5555       9    0    0
      -1.45386     17    0    0
      -1.35222     60    2   16
      -1.25058     37    1    6
      -1.14894    106    0    3
      -1.0473      41    7    8
      -0.945655     7    0    3
      -0.844014    86    4    1
      -0.742373    38    0    0
      -0.640732    58    2    0
      -0.53909     65    7   13
      -0.437449    41    4   12
      -0.335808    93    5   13
      -0.234167    50    1    5
      -0.132526    67    4    5
      -0.0308844   99   11   26
       0.0707568   50    1   16
       0.172398    52    7    6
       0.274039    69    2    1
       0.37568     41   17    1
       0.477322  