In [1]:
import numpy as np
import pandas as pd

In [3]:
#Reading the dataset
df = pd.read_csv("Classified Data",index_col=0)
df

Unnamed: 0,WTT,PTI,EQW,SBI,LQE,QWG,FDJ,PJF,HQE,NXJ,TARGET CLASS
0,0.913917,1.162073,0.567946,0.755464,0.780862,0.352608,0.759697,0.643798,0.879422,1.231409,1
1,0.635632,1.003722,0.535342,0.825645,0.924109,0.648450,0.675334,1.013546,0.621552,1.492702,0
2,0.721360,1.201493,0.921990,0.855595,1.526629,0.720781,1.626351,1.154483,0.957877,1.285597,0
3,1.234204,1.386726,0.653046,0.825624,1.142504,0.875128,1.409708,1.380003,1.522692,1.153093,1
4,1.279491,0.949750,0.627280,0.668976,1.232537,0.703727,1.115596,0.646691,1.463812,1.419167,1
5,0.833928,1.523302,1.104743,1.021139,1.107377,1.010930,1.279538,1.280677,0.510350,1.528044,0
6,0.944705,1.251761,1.074885,0.286473,0.996440,0.428860,0.910805,0.755305,1.111800,1.110842,0
7,0.816174,1.088392,0.895343,0.243860,0.943123,1.045131,1.146536,1.341886,1.225324,1.425784,0
8,0.776551,1.463812,0.783825,0.337278,0.742215,1.072756,0.880300,1.312951,1.118165,1.225922,0
9,0.772280,0.515111,0.891596,0.940862,1.430568,0.885876,1.205231,0.596858,1.542580,0.981879,1


In [4]:
#Scaling the data so as to apply PCA
#Since Naive Bayes works well with independent attributes and PCA gives us independent attributes we apply PCA before
#applying Gaussian Naive Bayes
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(df.drop('TARGET CLASS',axis=1))
scaled_features = scaler.transform(df.drop('TARGET CLASS',axis=1))

In [7]:
df_feat = pd.DataFrame(scaled_features,columns=df.columns[:-1])
df_feat

Unnamed: 0,WTT,PTI,EQW,SBI,LQE,QWG,FDJ,PJF,HQE,NXJ
0,-0.123542,0.185907,-0.913431,0.319629,-1.033637,-2.308375,-0.798951,-1.482368,-0.949719,-0.643314
1,-1.084836,-0.430348,-1.025313,0.625388,-0.444847,-1.152706,-1.129797,-0.202240,-1.828051,0.636759
2,-0.788702,0.339318,0.301511,0.755873,2.031693,-0.870156,2.599818,0.285707,-0.682494,-0.377850
3,0.982841,1.060193,-0.621399,0.625299,0.452820,-0.267220,1.750208,1.066491,1.241325,-1.026987
4,1.139275,-0.640392,-0.709819,-0.057175,0.822886,-0.936773,0.596782,-1.472352,1.040772,0.276510
5,-0.399853,1.591707,0.928649,1.477102,0.308440,0.263270,1.239716,0.722608,-2.206816,0.809900
6,-0.017189,0.534949,0.826189,-1.723636,-0.147547,-2.010505,-0.206348,-1.096313,-0.158215,-1.233974
7,-0.461182,-0.100835,0.210071,-1.909291,-0.366695,0.396875,0.718122,0.934523,0.228458,0.308929
8,-0.598054,1.360189,-0.172618,-1.502292,-1.192485,0.504787,-0.325981,0.834346,-0.136536,-0.670199
9,-0.612806,-2.331876,0.197211,1.127356,1.636853,-0.225233,0.948308,-1.644881,1.309064,-1.865764


In [8]:
#Applying PCA to get independent attributes
from sklearn.decomposition import PCA
pca = PCA(n_components=8)
pca.fit(df_feat)
x_pca=pca.transform(df_feat)

In [9]:
#Creating a training,test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x_pca,df['TARGET CLASS'],
                                                    test_size=0.30)

In [10]:
#importing Gaussian Naive Bayes from sklearn
from sklearn.naive_bayes import GaussianNB
#Training a classifier
clf = GaussianNB()
clf.fit(X_train, y_train)
#Testing using Classifier
pred = clf.predict(X_test)


In [11]:
#Finding the different accuracy measures
from sklearn.metrics import classification_report,confusion_matrix
print(confusion_matrix(y_test,pred))


[[140  12]
 [  7 141]]


In [12]:
print(classification_report(y_test,pred))

             precision    recall  f1-score   support

          0       0.95      0.92      0.94       152
          1       0.92      0.95      0.94       148

avg / total       0.94      0.94      0.94       300



In [None]:
#Even though PCA gives us independent attributes which are very useful while applying Naive Bayes yet sometimes PCA 
#results in a lot of information loss which may not give us desired results.So it is always better to apply Naive Bayes 
#with and without PCA.

In [13]:
#Without PCA
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df_feat,df['TARGET CLASS'],
                                                    test_size=0.30)
from sklearn.naive_bayes import GaussianNB
#Training a classifier
clf = GaussianNB()
clf.fit(X_train, y_train)
#Testing using Classifier
pred = clf.predict(X_test)
from sklearn.metrics import classification_report,confusion_matrix
print(confusion_matrix(y_test,pred))

print(classification_report(y_test,pred))


[[149   9]
 [ 10 132]]
             precision    recall  f1-score   support

          0       0.94      0.94      0.94       158
          1       0.94      0.93      0.93       142

avg / total       0.94      0.94      0.94       300

