# Advanced Predictive Analysis 
# Theory Digital Assessment - 4
# NAME: PRATHIBAN V
# REG NO: 19MID0010
# Use Decision Tree, ANN, Naive Bayers and KNN algorithms to TCS, infosys, Reliance dataset and compare their accuracy 

### Import Libraries

In [70]:
import numpy as np
import pandas as pd
import sklearn

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

from sklearn.neighbors import KNeighborsClassifier

from sklearn.naive_bayes import GaussianNB

### Load TCS, Reliance, Infosys Datasets

In [80]:
def load_data():
    tcs_data=pd.read_csv("TCS.NS.csv")
    reliance_data=pd.read_csv("RELIANCE.NS.csv")
    infosys_data=pd.read_csv("INFY.NS.csv")
    
    return tcs_data,reliance_data,infosys_data

### Split Dataset

In [81]:
def split_data(data):
    X=data.values[:,0:5]
    y=data.values[:,5]
    
    return X,y

In [82]:
def train_test(X,y):
    
    X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)
    
    return X_train,X_test,y_train,y_test

# Algorithms 

### (i)Decision Tree

In [83]:
def DecisionTree(X_train,y_train,X_test):
    dt=DecisionTreeClassifier(criterion="entropy",random_state=100,max_depth=3,min_samples_leaf=5)
    dt.fit(X_train,y_train)
    y_pred=dt.predict(X_test)
    
    return (accuracy_score(y_test,y_pred)*100)

### (ii)Aritificial Neural Networks

In [91]:
def ANN(data):
    
    label=LabelEncoder()
    data['class']=label.fit_transform(data['class'])
    
    X=data.values[:,0:5]
    y=data.values[:,5]
    
    model = Sequential()
    model.add(Dense(12, input_shape=(5,), activation='sigmoid'))
    model.add(Dense(5, activation='sigmoid'))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    model.fit(X, y, epochs=100, batch_size=10)
    
    _, accuracy = model.evaluate(X, y)
    return (accuracy*100) 

### (iii)K-Nearest Neighbours

In [92]:
def KNN(X_train,y_train,X_test,y_test):
    knn = KNeighborsClassifier(n_neighbors=7)
  
    knn.fit(X_train, y_train)
    
    return (knn.score(X_test, y_test)*100)

### (iv)Naive Bayes

In [93]:
def NB(X_train,y_train,X_test,y_test):
    
    gnb = GaussianNB()
    gnb.fit(X_train, y_train)
    
    y_pred = gnb.predict(X_test)
    
    return (accuracy_score(y_test,y_pred)*100)

### Load Datasets

In [94]:
tcs_data,reliance_data,infosys_data=load_data()

# TCS Data

In [95]:
X,y=split_data(tcs_data)
X_train,X_test,y_train,y_test=train_test(X,y)

tcs_dt_acc=DecisionTree(X_train,y_train,X_test)
print("Accuracy of tcs data using DT :",tcs_dt_acc) 

tcs_knn_acc=KNN(X_train,y_train,X_test,y_test)
print("Accuracy of tcs data using KNN :",tcs_knn_acc)

tcs_nb_acc=NB(X_train,y_train,X_test,y_test)
print("Accuracy of tcs data using NB :",tcs_nb_acc)

tcs_ann_acc=ANN(tcs_data)
print("Accuracy of tcs data using ANN : %.2f"%tcs_ann_acc)

Accuracy of tcs data using DT : 43.66576819407008
Accuracy of tcs data using KNN : 48.24797843665768
Accuracy of tcs data using NB : 54.71698113207547
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100


# Reliance Data

In [96]:
X,y=split_data(reliance_data)
X_train,X_test,y_train,y_test=train_test(X,y)

rel_dt_acc=DecisionTree(X_train,y_train,X_test)
print("Accuracy of tcs data using DT :",rel_dt_acc)

rel_knn_acc=KNN(X_train,y_train,X_test,y_test)
print("Accuracy of Reliance data using KNN :",rel_knn_acc)                                                                                                                   

rel_nb_acc=NB(X_train,y_train,X_test,y_test)
print("Accuracy of Reliance data using NB :",rel_nb_acc)

rel_ann_acc=ANN(reliance_data)
print("Accuracy of Reliance data using ANN : %.2f"%rel_ann_acc)

Accuracy of tcs data using DT : 52.2911051212938
Accuracy of Reliance data using KNN : 52.83018867924528
Accuracy of Reliance data using NB : 52.56064690026954
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoc

# Infosys Data

In [97]:
X,y=split_data(infosys_data)
X_train,X_test,y_train,y_test=train_test(X,y)

info_dt_acc=DecisionTree(X_train,y_train,X_test)
print("Accuracy of tcs data using DT :",info_dt_acc)

info_knn_acc=KNN(X_train,y_train,X_test,y_test)
print("Accuracy of Infosys data using KNN :",info_knn_acc)                                                                                                                   

info_nb_acc=NB(X_train,y_train,X_test,y_test)
print("Accuracy of Infosys data using NB :",info_nb_acc)                  

info_ann_acc=ANN(infosys_data)
print("Accuracy of Infosys data using ANN : %.2f"%info_ann_acc)

Accuracy of tcs data using DT : 57.14285714285714
Accuracy of Infosys data using KNN : 49.865229110512125
Accuracy of Infosys data using NB : 53.09973045822103
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoc

### Accuracy

In [98]:
df=pd.DataFrame()
df['Decision Tree']=[tcs_dt_acc,rel_dt_acc,info_dt_acc]
df['ANN']=[tcs_ann_acc,rel_ann_acc,info_ann_acc]
df['KNN']=[tcs_knn_acc,rel_knn_acc,info_knn_acc]
df['Naive Bayes']=[tcs_nb_acc,rel_nb_acc,info_nb_acc]

In [102]:
df.index=['TCS','Reliance','Infosys']

In [103]:
df

Unnamed: 0,Decision Tree,ANN,KNN,Naive Bayes
TCS,43.665768,52.550608,48.247978,54.716981
Reliance,52.291105,52.307695,52.830189,52.560647
Infosys,57.142857,54.089069,49.865229,53.09973


# Displaying data with algorithm providing maximum accuracy

In [104]:
df.idxmax(axis=1)

TCS           Naive Bayes
Reliance              KNN
Infosys     Decision Tree
dtype: object

# By comparing the accuracy of the algorithms with three different dataset we can see that Naive Bayers gives a steady good accuracy 