In [128]:
import pandas as pd
import numpy as np
import random
import tensorflow as tf
import keras
from keras.models import Sequential,Model,load_model
from keras.layers import Dense, Activation, Dropout



In [129]:
class NSL_KDD_handler:
    def dense_to_one_hot(self,labels_dense, num_classes=10):
        """Convert class labels from scalars to one-hot vectors."""
        num_labels = labels_dense.shape[0]
        index_offset = np.arange(num_labels) * num_classes
        labels_one_hot = np.zeros((num_labels, num_classes))
        labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
        return labels_one_hot

    
    def dataReaderAndEncoder(self,name_of_file):
        print('reading',name_of_file)
        data=pd.read_csv(name_of_file,sep=',',names=['duration','protocol_type','service','flag','src_bytes',
                                                                    'dst_bytes','land','wrong_fragment','urgent','hot',
                                                                    'num_failed_logins','logged_in','num_compromised',
                                                                    'root_shell','su_attempted','num_root','num_file_creations',
                                                                    'num_shells','num_access_files','num_outbound_cmds',
                                                                    'is_host_logi','is_guest_login','count','srv_count',
                                                                    'serror_rate','srv_serror_rate','rerror_rate',
                                                                    'srv_rerror_rate','same_srv_rate','diff_srv_rate',
                                                                    'srv_diff_host_rate','dst_host_count','dst_host_srv_count',
                                                                    'dst_host_same_srv_rate','dst_host_diff_srv_rate',
                                                                    'dst_host_same_src_port_rate','dst_host_srv_diff_host_rate',
                                                                    'dst_host_serror_rate','dst_host_srv_serror_rate',
                                                                    'dst_host_rerror_rate','dst_host_srv_rerror_rate',
                                                                    'class','diff'])

        data['protocol_type']=data['protocol_type'].map({'tcp':1,'udp':2, 'icmp':3})
        data['protocol_type']=NSL_KDD_handler.dense_to_one_hot(self,data['protocol_type'], num_classes=3)
        data['service']=data['service'].map({'ftp_data':1,'other':2,'private':3,'http':4,'remote_job':5,'name':6,
                                                 'netbios_ns':7,'eco_i':8,'mtp':9,'telnet':10,'finger':11,'domain_u':12,
                                                 'supdup':13,'uucp_path':14,'Z39_50':15,'smtp':16,'csnet_ns':17, 'uucp':18,
                                                 'netbios_dgm':19,'urp_i':20,'auth':21, 'domain':22, 'ftp':23, 'bgp':24,
                                                 'ldap':25, 'ecr_i':26,'gopher':27, 'vmnet':28, 'systat':29, 'http_443':30,
                                                 'efs':31, 'whois':32, 'imap4':33,'iso_tsap':34, 'echo':35, 'klogin':36,
                                                 'link':37, 'sunrpc':38, 'login':39,'kshell':40,'sql_net':41, 'time':42,
                                                 'hostnames':43 ,'exec':44, 'ntp_u':45, 'discard':46,'nntp':47, 'courier':48,
                                                 'ctf':49, 'ssh':50,'daytime':51, 'shell':52,'netstat':53,'pop_3':54,
                                                 'nnsp':55,'IRC':56, 'pop_2':57,'printer':58,'tim_i':59,'pm_dump':60,
                                                 'red_i':61, 'netbios_ssn':62, 'rje':63, 'X11':64, 'urh_i':65,'http_8001':66,
                                                 'aol':67,'http_2784':68, 'tftp_u':69,'harvest':70})
        data['flag']=data['flag'].map({'SF':1,'S0':2,'REJ':3,'RSTR':4,'SH':5,'RSTO':6,'S1':7,'RSTOS0':8,'S3':9,'S2':10,'OTH':11})
    
        return data
    
    
    def classEncoder(self,data,number_of_class,file):
        print("1-N Encoding done")
    #dos=1,probe=2,r2l=3,u2r=4
        if (file == 'train' and number_of_class == 23):
            data=data.map({'normal':0,'neptune':1,'warezclient':2,'ipsweep':3,'portsweep':4,'teardrop':5,
                                         'nmap':6,'satan':7,'smurf':8,'pod':9,'back':10,'guess_passwd':11,'ftp_write':12,
                                         'multihop':13,'rootkit':14,'buffer_overflow':15,'imap':16,'warezmaster':17,
                                         'phf':18,'land':19,'loadmodule':20,'spy':21,'perl':22})
    
        
        if (file=='train' and number_of_class==5):
   
            data=data.map({'normal':0,'neptune':1,'warezclient':3,'ipsweep':2,'portsweep':2,'teardrop':1,
                                             'nmap':2,'satan':2,'smurf':1,'pod':1,'back':1,'guess_passwd':3,'ftp_write':3,
                                             'multihop':3,'rootkit':4,'buffer_overflow':4,'imap':3,'warezmaster':3,
                                             'phf':3,'land':1,'loadmodule':4,'spy':3,'perl':4})

        if (file=='train' and number_of_class==2):

            data=data.map({'normal':0,'neptune':1,'warezclient':1,'ipsweep':1,'portsweep':1,'teardrop':1,
                                             'nmap':1,'satan':1,'smurf':1,'pod':1,'back':1,'guess_passwd':1,'ftp_write':1,
                                             'multihop':1,'rootkit':1,'buffer_overflow':1,'imap':1,'warezmaster':1,
                                             'phf':1,'land':1,'loadmodule':1,'spy':1,'perl':1})

        if (file=='test' and number_of_class==5):

            data=data.map({'guess_passwd':3,'snmpguess':3,'processtable':1,'normal':0,'nmap':2,
                                             'back':1,'neptune':1,'satan':2,'saint':2,'mscan':2,'apache2':1,'httptunnel':3,
                                             'warezmaster':3,'ipsweep':2,'smurf':1,'mailbomb':3,'teardrop':1,'portsweep':2,
                                             'snmpgetattack':3, 'multihop':3,'worm':1,'land':1,'sendmail':3,'buffer_overflow':4,
                                             'pod':1,'rootkit':4,'xlock':3,'xterm':4,'xsnoop':3,'ps':4,'named':3,
                                             'ftp_write':3,'loadmodule':4,'phf':3,'udpstorm':1,'perl':4,'sqlattack':4,'imap':3})
        if (file=='test' and number_of_class==2):
   
            data=data.map({'guess_passwd':1,'snmpguess':1,'processtable':1,'normal':0,'nmap':1,
                                             'back':1,'neptune':1,'satan':1,'saint':1,'mscan':1,'apache2':1,'httptunnel':1,
                                             'warezmaster':1,'ipsweep':1,'smurf':1,'mailbomb':1,'teardrop':1,'portsweep':1,
                                             'snmpgetattack':1, 'multihop':1,'worm':1,'land':1,'sendmail':1,'buffer_overflow':1,
                                             'pod':1,'rootkit':1,'xlock':1,'xterm':1,'xsnoop':1,'ps':1,'named':1,
                                             'ftp_write':1,'loadmodule':1,'phf':1,'udpstorm':1,'perl':1,'sqlattack':1,'imap':1})
        return data
    
    
    def getData(self,percent_of_data,number_of_class):
    #reading training data
        if(percent_of_data!=20):
            data_training=NSL_KDD_handler.dataReaderAndEncoder(self,'training_data_for_handler.txt')
        else:
            data_training=NSL_KDD_handler.dataReaderAndEncoder(self,'20%for_handler.txt')
        #reading testing data
        data_testing=NSL_KDD_handler.dataReaderAndEncoder(self,'testing_data_for_handler.txt')
        #splitting data in labels and features
        training_labels=data_training['class']
        training_labels=NSL_KDD_handler.classEncoder(self,training_labels,number_of_class,'train')
        training_features=data_training.drop('class',1)
        training_features=training_features.drop('diff',1)
        

        #splitting testing data
        testing_labels=data_testing['class']
        testing_labels=NSL_KDD_handler.classEncoder(self,testing_labels,number_of_class,'test')
        testing_features=data_testing.drop('class',1)
        testing_features=testing_features.drop('diff',1)


        return training_features,training_labels,testing_features,testing_labels

In [131]:
datasetObj=NSL_KDD_handler()
n_classes=5
xtrain,ytrain,xtest,ytest=datasetObj.getData(100,n_classes)



reading training_data_for_handler.txt
reading testing_data_for_handler.txt
1-N Encoding done
1-N Encoding done


In [132]:
class nids:
    def modelcreator(self,n_inputs,n_hidden_units):
        model=Sequential()
        model.add(Dense(input_dim=n_inputs, output_dim=n_hidden_units,bias=True))
        model.add(Activation("relu"))
        model.add(Dense(output_dim=n_hidden_units,bias=True))
        model.add(Activation("relu"))
        model.add(Dense(output_dim=n_hidden_units,bias=True))
        model.add(Activation("relu"))
        model.add(Dense(output_dim=1))
        model.add(Activation("sigmoid"))
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        print("Model created")
        print("Input units: ",n_inputs)
        print("Hidden units: ",n_hidden_units)
        return model
    
    def GetData(self,attack,names,datasetObj):
        data_training=datasetObj.dataReaderAndEncoder('training_data_for_handler.txt')
        data_testing=datasetObj.dataReaderAndEncoder('testing_data_for_handler.txt')
        if(attack=='dos'):
            print("DoS label encoding done")
            data_training['class']=data_training['class'].map({'normal':0,'neptune':1,'warezclient':0,'ipsweep':0,'portsweep':0,'teardrop':1,
                                                 'nmap':0,'satan':0,'smurf':1,'pod':1,'back':1,'guess_passwd':0,'ftp_write':0,
                                                 'multihop':0,'rootkit':0,'buffer_overflow':0,'imap':0,'warezmaster':0,
                                                 'phf':0,'land':1,'loadmodule':0,'spy':0,'perl':0})
            data_testing['class']=data_testing['class'].map({'guess_passwd':0,'snmpguess':0,'processtable':1,'normal':0,'nmap':0,
                                                 'back':1,'neptune':1,'satan':0,'saint':0,'mscan':0,'apache2':1,'httptunnel':0,
                                                 'warezmaster':0,'ipsweep':0,'smurf':1,'mailbomb':0,'teardrop':1,'portsweep':0,
                                                 'snmpgetattack':0, 'multihop':0,'worm':1,'land':1,'sendmail':0,'buffer_overflow':0,
                                                 'pod':1,'rootkit':0,'xlock':0,'xterm':0,'xsnoop':0,'ps':0,'named':0,
                                                 'ftp_write':0,'loadmodule':0,'phf':0,'udpstorm':1,'perl':0,'sqlattack':0,'imap':0})

        if(attack=='probe'):
            print("Probe label encoding done")
            data_training['class']=data_training['class'].map({'normal':0,'neptune':0,'warezclient':0,'ipsweep':1,'portsweep':1,'teardrop':0,
                                                 'nmap':1,'satan':1,'smurf':0,'pod':0,'back':0,'guess_passwd':0,'ftp_write':0,
                                                 'multihop':0,'rootkit':0,'buffer_overflow':0,'imap':0,'warezmaster':0,
                                                 'phf':0,'land':0,'loadmodule':0,'spy':0,'perl':0})
            data_testing['class']=data_testing['class'].map({'guess_passwd':0,'snmpguess':0,'processtable':0,'normal':0,'nmap':1,
                                                 'back':0,'neptune':0,'satan':1,'saint':1,'mscan':1,'apache2':0,'httptunnel':0,
                                                 'warezmaster':0,'ipsweep':1,'smurf':0,'mailbomb':0,'teardrop':0,'portsweep':1,
                                                 'snmpgetattack':0, 'multihop':0,'worm':0,'land':0,'sendmail':0,'buffer_overflow':0,
                                                 'pod':0,'rootkit':0,'xlock':0,'xterm':0,'xsnoop':0,'ps':0,'named':0,
                                                 'ftp_write':0,'loadmodule':0,'phf':0,'udpstorm':0,'perl':0,'sqlattack':0,'imap':0})
        if(attack=='r2l'):
            print("R2L label encoding done ")
            data_training['class']=data_training['class'].map({'normal':0,'neptune':0,'warezclient':1,'ipsweep':0,'portsweep':0,'teardrop':0,
                                                 'nmap':0,'satan':0,'smurf':0,'pod':0,'back':0,'guess_passwd':1,'ftp_write':1,
                                                 'multihop':1,'rootkit':0,'buffer_overflow':0,'imap':1,'warezmaster':1,
                                                 'phf':1,'land':0,'loadmodule':0,'spy':1,'perl':0})
            data_testing['class']=data_testing['class'].map({'guess_passwd':1,'snmpguess':1,'processtable':0,'normal':0,'nmap':0,
                                                 'back':0,'neptune':0,'satan':0,'saint':0,'mscan':0,'apache2':0,'httptunnel':1,
                                                 'warezmaster':1,'ipsweep':0,'smurf':0,'mailbomb':1,'teardrop':0,'portsweep':0,
                                                 'snmpgetattack':1, 'multihop':1,'worm':0,'land':0,'sendmail':1,'buffer_overflow':0,
                                                 'pod':0,'rootkit':0,'xlock':1,'xterm':0,'xsnoop':1,'ps':0,'named':1,
                                                 'ftp_write':1,'loadmodule':0,'phf':3,'udpstorm':0,'perl':0,'sqlattack':0,'imap':1})

        if(attack=='u2r'):
            print("U2R label encoding done")
            data_training['class']=data_training['class'].map({'normal':0,'neptune':0,'warezclient':0,'ipsweep':0,'portsweep':0,'teardrop':0,
                                                 'nmap':0,'satan':0,'smurf':0,'pod':0,'back':0,'guess_passwd':0,'ftp_write':0,
                                                 'multihop':0,'rootkit':1,'buffer_overflow':1,'imap':0,'warezmaster':0,
                                                 'phf':0,'land':0,'loadmodule':1,'spy':0,'perl':1})
            data_testing['class']=data_testing['class'].map({'guess_passwd':0,'snmpguess':0,'processtable':0,'normal':0,'nmap':0,
                                                 'back':0,'neptune':0,'satan':0,'saint':0,'mscan':0,'apache2':0,'httptunnel':0,
                                                 'warezmaster':0,'ipsweep':0,'smurf':0,'mailbomb':0,'teardrop':0,'portsweep':0,
                                                 'snmpgetattack':0, 'multihop':0,'worm':0,'land':0,'sendmail':0,'buffer_overflow':0,
                                                 'pod':0,'rootkit':1,'xlock':0,'xterm':1,'xsnoop':0,'ps':1,'named':0,
                                                 'ftp_write':0,'loadmodule':1,'phf':0,'udpstorm':0,'perl':1,'sqlattack':1,'imap':0})
        if(attack=='normal'):
            print("Normal label encoding done")
            data_training["class"]=data_training['class'].map({'normal':0,'neptune':1,'warezclient':1,'ipsweep':1,'portsweep':1,'teardrop':1,
                                                 'nmap':1,'satan':1,'smurf':1,'pod':1,'back':1,'guess_passwd':1,'ftp_write':1,
                                                 'multihop':1,'rootkit':1,'buffer_overflow':1,'imap':1,'warezmaster':1,
                                                 'phf':1,'land':1,'loadmodule':1,'spy':1,'perl':1})

            data_testing['class']=data_testing['class'].map({'guess_passwd':1,'snmpguess':1,'processtable':1,'normal':0,'nmap':1,
                                                 'back':1,'neptune':1,'satan':1,'saint':1,'mscan':1,'apache2':1,'httptunnel':1,
                                                 'warezmaster':1,'ipsweep':1,'smurf':1,'mailbomb':1,'teardrop':1,'portsweep':1,
                                                 'snmpgetattack':1, 'multihop':1,'worm':1,'land':1,'sendmail':1,'buffer_overflow':1,
                                                 'pod':1,'rootkit':1,'xlock':1,'xterm':1,'xsnoop':1,'ps':1,'named':1,
                                                 'ftp_write':1,'loadmodule':1,'phf':1,'udpstorm':1,'perl':1,'sqlattack':1,'imap':1})
        xtrain=data_training
        xtest=data_testing
        ytrain=xtrain['class']
        xtrain=xtrain[names]
        xtest=data_testing[names]
        ytest=data_testing['class']

        return xtrain,ytrain,xtest,ytest

In [133]:
nidsObj=nids()

#                                          SYSTEM ARCHITECTURE
![alt text](systemarchitecture.png "SYSTEM ARCHITECTURE")

# PHASE 1 TRAINING

In [134]:
#dos
dosnames=['duration','protocol_type','flag','src_bytes','count','dst_host_same_srv_rate','dst_host_serror_rate',
          'dst_host_srv_serror_rate','dst_host_rerror_rate']
xtrain,ytrain,xtest,ytest=nidsObj.GetData(attack="dos",names=dosnames,datasetObj=datasetObj)
dosDetector=nidsObj.modelcreator(n_inputs=9,n_hidden_units=7)

reading training_data_for_handler.txt
reading testing_data_for_handler.txt
DoS label encoding done
Model created
Input units:  9
Hidden units:  7


In [135]:
#probe
probenames=['duration','protocol_type','service','flag','src_bytes']
xtrain,ytrain,xtest,ytest=nidsObj.GetData(attack="probe",names=probenames,datasetObj=datasetObj)
ProbeDetector=nidsObj.modelcreator(5,3)


reading training_data_for_handler.txt
reading testing_data_for_handler.txt
Probe label encoding done
Model created
Input units:  5
Hidden units:  3


In [136]:
#u2r
u2rnames=['hot','num_compromised','root_shell','num_root','num_file_creations','num_shells','num_access_files','is_host_logi']
xtrain,ytrain,xtest,ytest=nidsObj.GetData(attack="u2r",names=u2rnames,datasetObj=datasetObj)
u2rdetector=nidsObj.modelcreator(8,6)
u2rdetector.fit(xtrain.as_matrix(),ytrain,nb_epoch=2)

reading training_data_for_handler.txt
reading testing_data_for_handler.txt
U2R label encoding done
Model created
Input units:  8
Hidden units:  6
Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0xad477e8898>

In [137]:
#r2l
r2lnames=['duration','protocol_type','service','flag','src_bytes','hot','num_failed_logins','logged_in','num_compromised'
         ,'num_file_creations','num_shells','num_access_files','is_host_logi','is_guest_login']
xtrain,ytrain,xtest,ytest=nidsObj.GetData(attack="r2l",names=r2lnames,datasetObj=datasetObj)
r2lDetector=nidsObj.modelcreator(14,10)


reading training_data_for_handler.txt
reading testing_data_for_handler.txt
R2L label encoding done 
Model created
Input units:  14
Hidden units:  10


In [138]:
#normal
normalnames=['duration','protocol_type','service','flag','src_bytes','hot','num_failed_logins','logged_in','num_compromised'
         ,'num_file_creations','num_shells','num_access_files','is_host_logi','is_guest_login','dst_host_same_srv_rate','dst_host_serror_rate',
          'dst_host_srv_serror_rate','dst_host_rerror_rate','root_shell','num_root','count']
xtrain,ytrain,xtest,ytest=nidsObj.GetData("normal",normalnames,datasetObj=datasetObj)
normalDetector=nidsObj.modelcreator(21,20)

reading training_data_for_handler.txt
reading testing_data_for_handler.txt
Normal label encoding done
Model created
Input units:  21
Hidden units:  20


In [139]:
dosDetector=load_model('all_dos.h5')
ProbeDetector=load_model('all_probe.h5')
r2lDetector=load_model('all_r2l.h5')
normalDetector=load_model('best_acc_normal_detector.h5')

# PHASE 2 Training

In [140]:
dos_xtrain,dos_ytrain,dos_xtest,dos_ytest=nidsObj.GetData(attack="dos",names=dosnames,datasetObj=datasetObj)
probe_xtrain,probe_ytrain,probe_xtest,probe_ytest=nidsObj.GetData(attack="probe",names=probenames,datasetObj=datasetObj)
u2r_xtrain,u2r_ytrain,u2r_xtest,u2r_ytest=nidsObj.GetData(attack="u2r",names=u2rnames,datasetObj=datasetObj)
r2l_xtrain,r2l_ytrain,r2l_xtest,r2l_ytest=nidsObj.GetData(attack="r2l",names=r2lnames,datasetObj=datasetObj)
normal_xtrain,normal_ytrain,normal_xtest,normal_ytest=nidsObj.GetData(attack="normal",names=normalnames,datasetObj=datasetObj)


finaltrainingProb=np.concatenate((normalDetector.predict_proba(normal_xtrain.as_matrix()),
                                 dosDetector.predict_proba(dos_xtrain.as_matrix()),
                                 ProbeDetector.predict_proba(probe_xtrain.as_matrix()),
                                 r2lDetector.predict_proba(r2l_xtrain.as_matrix()),
                                 u2rdetector.predict_proba(u2r_xtrain.as_matrix()),
                                 ),axis=1)

finaltestingProb=np.concatenate((normalDetector.predict_proba(normal_xtest.as_matrix()),
                                 dosDetector.predict_proba(dos_xtest.as_matrix()),
                                 ProbeDetector.predict_proba(probe_xtest.as_matrix()),
                                 r2lDetector.predict_proba(r2l_xtest.as_matrix()),
                                 u2rdetector.predict_proba(u2r_xtest.as_matrix()),
                                 ),axis=1)

reading training_data_for_handler.txt
reading testing_data_for_handler.txt
DoS label encoding done
reading training_data_for_handler.txt
reading testing_data_for_handler.txt
Probe label encoding done
reading training_data_for_handler.txt
reading testing_data_for_handler.txt
U2R label encoding done
reading training_data_for_handler.txt
reading testing_data_for_handler.txt
R2L label encoding done 
reading training_data_for_handler.txt
reading testing_data_for_handler.txt
Normal label encoding done

In [141]:
n_classes=5
xtrain,ytrain,xtest,ytest=datasetObj.getData(100,n_classes)
ytrain=datasetObj.dense_to_one_hot(ytrain,n_classes)
ytest=datasetObj.dense_to_one_hot(ytest,n_classes)
ensamble_model=Sequential()
ensamble_model.add(Dense(output_dim=1,input_dim=5,bias=True,activation='sigmoid'))
ensamble_model.add(Dense(15,activation='sigmoid'))
ensamble_model.add(Dropout(0.2))
ensamble_model.add(Dense(15,activation='sigmoid',))
ensamble_model.add(Dropout(0.2))
ensamble_model.add(Dense(n_classes,activation='sigmoid'))
ensamble_model.compile(metrics=["accuracy"],optimizer="adam",loss="categorical_crossentropy")

reading training_data_for_handler.txt
reading testing_data_for_handler.txt
1-N Encoding done
1-N Encoding done


In [142]:
ensamble_model.fit(finaltrainingProb,ytrain,nb_epoch=1)

Epoch 1/1


<keras.callbacks.History at 0xad4a9c57b8>

In [143]:
ensamble_model.evaluate(finaltrainingProb,ytrain)




[0.30154189813270293, 0.89016694053487655]

In [144]:
ensamble_model.evaluate(finaltestingProb,ytest)



[1.4780082116742943, 0.66376863023420862]

In [145]:
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

# BINARY CLASSIFIER

In [146]:
n_classes=2
xtrain,ytrain,xtest,ytest=datasetObj.getData(100,n_classes)

reading training_data_for_handler.txt
reading testing_data_for_handler.txt
1-N Encoding done
1-N Encoding done


In [147]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(finaltrainingProb,ytrain)
print("Training accuaracy: ",accuracy_score(gnb.predict(finaltrainingProb),ytrain))
print("Testing accuracy: ",accuracy_score(gnb.predict(finaltestingProb),ytest))

Training accuaracy:  0.990196311908
Testing accuracy:  0.813742015614


In [148]:
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression(max_iter=1000)
lr.fit(finaltrainingProb,ytrain)
print("Training accuaracy: ",accuracy_score(lr.predict(finaltrainingProb),ytrain))
print("Testing accuaracy: ",accuracy_score(lr.predict(finaltestingProb),ytest))

Training accuaracy:  0.991823644749
Testing accuaracy:  0.811834634493


In [149]:
from sklearn import tree
clf = tree.DecisionTreeClassifier()
clf.fit(finaltrainingProb,ytrain)
print("Training accuaracy: ",accuracy_score(clf.predict(finaltrainingProb),ytrain))
print("Testing accuaracy: ",accuracy_score(clf.predict(finaltestingProb),ytest))

Training accuaracy:  0.999023600295
Testing accuaracy:  0.791297019163


In [150]:
a=classification_report(y_pred=lr.predict(finaltrainingProb),y_true=ytrain)
print(a)

             precision    recall  f1-score   support

          0       1.00      0.99      0.99     67343
          1       0.99      1.00      0.99     58630

avg / total       0.99      0.99      0.99    125973



# 5-class ClASSIFIER

In [151]:
n_classes=5
xtrain,ytrain,xtest,ytest=datasetObj.getData(100,n_classes)

reading training_data_for_handler.txt
reading testing_data_for_handler.txt
1-N Encoding done
1-N Encoding done


In [152]:
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
gnb.fit(finaltrainingProb,ytrain)
print("Training accuaracy: ",accuracy_score(gnb.predict(finaltrainingProb),ytrain))
print("Testing accuracy: ",accuracy_score(gnb.predict(finaltestingProb),ytest))

Training accuaracy:  0.96618322974
Testing accuracy:  0.724449964514


In [153]:
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression(max_iter=1000)
lr.fit(finaltrainingProb,ytrain)
print("Training accuaracy: ",accuracy_score(lr.predict(finaltrainingProb),ytrain))
print("Testing accuaracy: ",accuracy_score(lr.predict(finaltestingProb),ytest))

Training accuaracy:  0.981392838148
Testing accuaracy:  0.725736337828


In [154]:
from sklearn import tree
clf = tree.DecisionTreeClassifier()
clf.fit(finaltrainingProb,ytrain)
print("Training accuaracy: ",accuracy_score(clf.predict(finaltrainingProb),ytrain))
print("Testing accuaracy: ",accuracy_score(clf.predict(finaltestingProb),ytest))

Training accuaracy:  0.998928341788
Testing accuaracy:  0.746806245564


In [155]:
a=classification_report(y_pred=lr.predict(finaltrainingProb),y_true=ytrain)
print(a)

             precision    recall  f1-score   support

          0       0.98      1.00      0.99     67343
          1       1.00      0.98      0.99     45927
          2       0.93      0.98      0.96     11656
          3       0.81      0.07      0.13       995
          4       0.00      0.00      0.00        52

avg / total       0.98      0.98      0.98    125973



  'precision', 'predicted', average, warn_for)
