In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pylab import rcParams 
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.preprocessing import MinMaxScaler,StandardScaler,LabelEncoder
from sklearn.pipeline import make_pipeline,Pipeline
from sklearn.naive_bayes import GaussianNB,ComplementNB,MultinomialNB
from sklearn.ensemble import RandomForestClassifier,VotingClassifier,AdaBoostClassifier,GradientBoostingClassifier,IsolationForest
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.cluster import KMeans
from sklearn.linear_model import SGDClassifier,LogisticRegression
from sklearn.neighbors import KNeighborsClassifier,LocalOutlierFactor
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report,accuracy_score, f1_score
from sklearn import metrics 
from sklearn.calibration import CalibratedClassifierCV

In [None]:
df = pd.read_csv(r"C:\Users\aryan\Desktop\Projects\Traffic-Trojan-Detector\Trojan_Detection.csv")

In [None]:
df.head()

In [None]:
df.select_dtypes(exclude='object')

In [None]:
df2 = df.select_dtypes(exclude='object')


In [None]:
scaler = MinMaxScaler()
X = scaler.fit_transform(df2.iloc[:, :-1].values)
y = df2.iloc[:, -1].values

In [None]:
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.3,stratify=y)

In [None]:
models = dict()
models['Linear Regression'] = LogisticRegression(max_iter=1000)
models['XGB']= XGBClassifier(n_estimators=5000)
models['Support Vector Machine'] = SVC(kernel = 'sigmoid', gamma='scale')
models['Decision Tree'] = DecisionTreeClassifier(max_depth=100)
models['Random Forest'] = RandomForestClassifier()
models['kNN'] = KNeighborsClassifier()
models['SGD']=SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, random_state=42, max_iter=100, tol=None)
for model in models:
    models[model].fit(x_train, y_train)
    print(model + " model fitting completed.")

In [None]:
print("Test Set Prediction:\n")
for x in models:
    print('-'*20+x+'-'*20)
    y_pred = models[x].predict(x_test)
    print(classification_report(y_test, y_pred,digits=5))
    cf_matrix = confusion_matrix(y_test, y_pred)
    ax = sns.heatmap(cf_matrix, annot=True, cmap='Blues')
    ax.set_xlabel('\nPredicted Values')
    ax.set_ylabel('Actual Values ');
    ax.xaxis.set_ticklabels(['False','True'])
    ax.yaxis.set_ticklabels(['False','True'])
    plt.show()

In [None]:
LogisticRegression(class_weight='balanced')
logistic_regression = LogisticRegression(max_iter=5000);
RF=RandomForestClassifier()
XGB= XGBClassifier(n_estimators=1000)
model = VotingClassifier (estimators=[('lr',logistic_regression), ('RF',RF),('XGB',XGB)], voting='soft')
model.fit(x_train, y_train);
y_pred = model.predict(x_test);
x='VotingClassifier'
print('-'*20+x+'-'*20)
print(classification_report(y_test, y_pred,digits=5))
y_pred = model.predict(x_test)
cf_matrix = confusion_matrix(y_test, y_pred)
ax = sns.heatmap(cf_matrix, annot=True, cmap='Blues')
ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values ');
ax.xaxis.set_ticklabels(['False','True'])
ax.yaxis.set_ticklabels(['False','True'])
plt.show()

In [None]:
p = Pipeline([('Normalizing',MinMaxScaler()),('MultinomialNB',MultinomialNB())])
p.fit(x_train,y_train) 

In [None]:
print('-'*20+"MultinomialNB"+'-'*20)
y_pred = p.predict(x_test)
print(classification_report(y_test, y_pred,digits=5))
cf_matrix = confusion_matrix(y_test, y_pred)
ax = sns.heatmap(cf_matrix, annot=True, cmap='Blues')
ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values ');
ax.xaxis.set_ticklabels(['False','True'])
ax.yaxis.set_ticklabels(['False','True'])
plt.show()

In [None]:
Mnp = Pipeline([('Normalizing',MinMaxScaler()),('ComplementNB',ComplementNB())])
Mnp.fit(x_train,y_train) 

In [None]:
print('-'*20+"Complement NB"+'-'*20)
y_pred = Mnp.predict(x_test)
print(classification_report(y_test, y_pred,digits=5))
cf_matrix = confusion_matrix(y_test, y_pred)
ax = sns.heatmap(cf_matrix, annot=True, cmap='Blues')
ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values ');
ax.xaxis.set_ticklabels(['False','True'])
ax.yaxis.set_ticklabels(['False','True'])
plt.show()