# Supervised ML Classifiers

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from tensorflow.keras import regularizers
from sklearn.model_selection import train_test_split

from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

In [None]:
df = pd.read_csv('9_5day_resampled.csv')#using 100% labelled resampled data
X=df.iloc[:,:-1]
y=df.iloc[:,-1]
Xtrain,Xval,ytrain,yval=train_test_split(X,y,random_state=0,test_size=0.2)

### Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
train0=time.time()
clf=RandomForestClassifier().fit(Xtrain,ytrain)
train1=time.time()
print('train-time: '+str(train1-train0))

In [None]:
print("Train accuracy : "+str(clf.score(Xtrain,ytrain))) #train accuracy
test0=time.time()
print("Test accuracy : "+str(clf.score(Xval,yval)))# test accuracy
test1=time.time()
print('test-time: '+str(test1-test0))

In [None]:
y_pred=clf.predict(Xval)
print('weighted F1 Score : ' + str(f1_score(y_pred,yval,average = 'weighted')))
print('weighted Precision : ' + str(precision_score(y_pred,yval,average = 'weighted',zero_division=1)))
print('weighted Recall : ' + str(recall_score(y_pred,yval,average = 'weighted',zero_division=1)))

In [None]:
y_pred=clf.predict(Xval)
print('macro F1 Score : ' + str(f1_score(y_pred,yval,average = 'macro')))
print('macro Precision : ' + str(precision_score(y_pred,yval,average = 'macro',zero_division=1)))
print('macro Recall : ' + str(recall_score(y_pred,yval,average = 'macro',zero_division=1)))

### Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier
train0=time.time()
clf=DecisionTreeClassifier().fit(Xtrain,ytrain)
train1=time.time()
print('train-time: '+str(train1-train0))

In [None]:
print("Train accuracy : "+str(clf.score(Xtrain,ytrain))) #train accuracy
test0=time.time()
print("Test accuracy : "+str(clf.score(Xval,yval)))# test accuracy
test1=time.time()
print('test-time: '+str(test1-test0))

In [None]:
y_pred=clf.predict(Xval)
print('weighted F1 Score : ' + str(f1_score(y_pred,yval,average = 'weighted')))
print('weighted Precision : ' + str(precision_score(y_pred,yval,average = 'weighted',zero_division=1)))
print('weighted Recall : ' + str(recall_score(y_pred,yval,average = 'weighted',zero_division=1)))

In [None]:
y_pred=clf.predict(Xval)
print('macro F1 Score : ' + str(f1_score(y_pred,yval,average = 'macro')))
print('macro Precision : ' + str(precision_score(y_pred,yval,average = 'macro',zero_division=1)))
print('macro Recall : ' + str(recall_score(y_pred,yval,average = 'macro',zero_division=1)))

### XGBOOST

In [None]:
from xgboost import XGBClassifier
from sklearn.multiclass import OneVsRestClassifier

In [None]:
train0=time.time()
clf = OneVsRestClassifier(XGBClassifier(verbosity=0)).fit(Xtrain, ytrain)
train1=time.time()
print('train-time: '+str(train1-train0))

In [None]:
clf.score(Xtrain,ytrain)

In [None]:
clf.score(Xval,yval)

In [None]:
test0=time.time()
z= clf.score(Xval,yval)
test1=time.time()
print('test-time: '+str(test1-test0))

In [None]:
y_pred=clf.predict(Xval)
print('weighted F1 Score : ' + str(f1_score(y_pred,yval,average = 'weighted')))
print('weighted Precision : ' + str(precision_score(y_pred,yval,average = 'weighted',zero_division=1)))
print('weighted Recall : ' + str(recall_score(y_pred,yval,average = 'weighted',zero_division=1)))

In [None]:
#y_pred=clf.predict(Xval)
print('macro F1 Score : ' + str(f1_score(y_pred,yval,average = 'macro')))
print('macro Precision : ' + str(precision_score(y_pred,yval,average = 'macro',zero_division=1)))
print('macro Recall : ' + str(recall_score(y_pred,yval,average = 'macro',zero_division=1)))

In [None]:
y_pred=clf.predict(Xval)

In [None]:
print('F1 Scores : ' )
pd.DataFrame(f1_score(y_pred,yval,labels=yval.unique(),average = None),columns=['F1-score'],index=yval.unique())

In [None]:
print('Precision Scores : ' )
pd.DataFrame(precision_score(y_pred,yval,labels=yval.unique(),average = None),columns=['Precision-score'],index=yval.unique())

In [None]:
print('Recall Scores : ' )
pd.DataFrame(recall_score(y_pred,yval,labels=yval.unique(),average = None),columns=['recall-score'],index=yval.unique())

#### Confusion Matrix with 10 chosen well-known classes
Classes : AMAZON, FACEBOOK, GMAIL, GOOGLE, HTTP, OFFICE_365, SKYPE, TWITTER, WHATSAPP, YOUTUBE

In [None]:
from sklearn.metrics import confusion_matrix
apps=['AMAZON','FACEBOOK','GMAIL','GOOGLE','HTTP','OFFICE_365','SKYPE','TWITTER','WHATSAPP','YOUTUBE']#reporting for these well-known Apps
yval[yval.isin(apps)]
cm=confusion_matrix(yval[yval.isin(apps)],y_pred[yval.isin(apps)],labels=yval[yval.isin(apps)].unique())

In [None]:
cm_pc=pd.DataFrame((cm.T/np.sum(cm,axis=1)).T,columns=yval[yval.isin(apps)].unique(),index=yval[yval.isin(apps)].unique())
cm_pc=cm_pc.replace(np.nan,0)

In [None]:
import seaborn as sns
plt.figure(figsize=(15,10))
df_cm=pd.DataFrame(cm,columns=yval[yval.isin(apps)].unique(),index=yval[yval.isin(apps)].unique())
sns.set(font_scale=1.1)
sns.heatmap(cm_pc, annot=True,fmt='.1%',cmap=sns.color_palette("Reds",150)) # font size
plt.show()

### SVM

In [None]:
from sklearn.svm import SVC
train0=time.time()
clf=SVC(random_state=0,kernel='rbf').fit(Xtrain,ytrain)
train1=time.time()
print('train-time: '+str(train1-train0))

In [None]:
print("Train accuracy : "+str(clf.score(Xtrain,ytrain))) #train accuracy
test0=time.time()
print("Test accuracy : "+str(clf.score(Xval,yval)))# test accuracy
test1=time.time()
print('test-time: '+str(test1-test0))

In [None]:
y_pred=clf.predict(Xval)
print('weighted F1 Score : ' + str(f1_score(y_pred,yval,average = 'weighted')))
print('weighted Precision : ' + str(precision_score(y_pred,yval,average = 'weighted',zero_division=1)))
print('weighted Recall : ' + str(recall_score(y_pred,yval,average = 'weighted',zero_division=1)))

In [None]:
y_pred=clf.predict(Xval)
print('macro F1 Score : ' + str(f1_score(y_pred,yval,average = 'macro')))
print('macro Precision : ' + str(precision_score(y_pred,yval,average = 'macro',zero_division=1)))
print('macro Recall : ' + str(recall_score(y_pred,yval,average = 'macro',zero_division=1)))

## MLP

In [None]:
from sklearn.neural_network import MLPClassifier

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
Xtrain=scaler.fit_transform(Xtrain)
Xval=scaler.transform(Xval)

In [None]:
train0=time.time()
clf = MLPClassifier(hidden_layer_sizes=(100,200,400,50),random_state=0, max_iter=700,learning_rate_init=0.001).fit(Xtrain, ytrain)
train1=time.time()
print('train-time: '+str(train1-train0))

In [None]:
print("Train accuracy : "+str(clf.score(Xtrain,ytrain))) #train accuracy
test0=time.time()
print("Test accuracy : "+str(clf.score(Xval,yval)))# test accuracy
test1=time.time()
print('test-time: '+str(test1-test0))

In [None]:
y_pred=clf.predict(Xval)
print('weighted F1 Score : ' + str(f1_score(y_pred,yval,average = 'weighted')))
print('weighted Precision : ' + str(precision_score(y_pred,yval,average = 'weighted',zero_division=1)))
print('weighted Recall : ' + str(recall_score(y_pred,yval,average = 'weighted',zero_division=1)))

In [None]:
y_pred=clf.predict(Xval)
print('macro F1 Score : ' + str(f1_score(y_pred,yval,average = 'macro')))
print('macro Precision : ' + str(precision_score(y_pred,yval,average = 'macro',zero_division=1)))
print('macro Recall : ' + str(recall_score(y_pred,yval,average = 'macro',zero_division=1)))

In [None]:
len(Xtrain)