In [1]:
import numpy as np
import pandas as pd
import networkx as nx

In [2]:
from joblib import dump, load

In [18]:
xtrain = pd.read_csv("xtrain.csv").values.tolist()
xtest = pd.read_csv("xtest.csv").values.tolist()
ytrain = pd.read_csv("ytrain.csv").values.tolist()
ytest = pd.read_csv("ytest.csv").values.tolist()

In [19]:
ytrain = [i[0] for i in ytrain]
ytest = [i[0] for i in ytest]

In [37]:
from sklearn.metrics import confusion_matrix, roc_auc_score
from sklearn.metrics import accuracy_score

## Logistic Regression

In [21]:
from sklearn.linear_model import LogisticRegression

In [22]:
lr = LogisticRegression()
lr.fit(xtrain, ytrain)

In [65]:
dump(lr,"lr.joblib")

['lr.joblib']

In [24]:
lr_predictions = lr.predict_proba(xtest)

In [25]:
lr_pred = lr.predict(xtest)

In [27]:
lr_score = roc_auc_score(ytest, lr_predictions[:,1])
lr_acc = accuracy_score(ytest, lr_pred)
print("roc_auc_score for logistic regression is:", lr_score)
print("accuracy for logistic regression is:",lr_acc)

roc_auc_score for logistic regression is: 0.629822758478767
accuracy for logistic regression is: 0.5923758865248226


## AdaBoosted Decision Trees

In [28]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

In [29]:
bdt = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=2), n_estimators=300, learning_rate=1.5
)

In [30]:
bdt.fit(xtrain, ytrain)

In [31]:
dump(bdt, 'bdt.joblib') 

['bdt.joblib']

In [32]:
bdt_predictions = bdt.predict_proba(xtest)
bdt_pred = bdt.predict(xtest)

In [33]:
unique, counts = np.unique(bdt_pred, return_counts=True)
print(unique,counts)

[0 1] [8414 8506]


In [35]:
bdt_score = roc_auc_score(ytest, bdt_predictions[:,1])
bdt_acc = accuracy_score(ytest,bdt_pred)
print("roc_auc_score for adaboosted decision tree is:", bdt_score)
print("accuracy score for adaboosted decision tree is:", bdt_acc)

roc_auc_score for adaboosted decision tree is: 0.6616181769801228
accuracy score for adaboosted decision tree is: 0.6215130023640661


In [38]:
confusion_matrix(ytest, bdt_pred)

array([[5261, 3251],
       [3153, 5255]])

## Nueral Network

In [48]:
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes=(5,),learning_rate='adaptive', max_iter=2000)
mlp.fit(xtrain, ytrain)

In [51]:
mlp_predictions = mlp.predict_proba(xtest)
mlp_pred = mlp.predict(xtest)

In [52]:
mlp_score = roc_auc_score(ytest, mlp_predictions[:,1])
mlp_acc = accuracy_score(ytest,mlp_pred)
print("roc_auc_score for adaboosted decision tree is:", mlp_score)
print("accuracy score for fully connected neural network is", mlp_acc)

roc_auc_score for adaboosted decision tree is: 0.6922070168582731
accuracy score for fully connected neural network is 0.6387115839243499


In [53]:
confusion_matrix(ytest, mlp_pred)

array([[4977, 3535],
       [2578, 5830]])

In [54]:
dump(mlp, 'mlp.joblib') 

['mlp.joblib']

## Naive Bayes

In [55]:
from sklearn.naive_bayes import GaussianNB

In [56]:
gnb = GaussianNB()
gnb.fit(xtrain,ytrain)

In [57]:
gnb_predictions = gnb.predict_proba(xtest)
gnb_pred = gnb.predict(xtest)

In [58]:
gnb_score = roc_auc_score(ytest, gnb_predictions[:,1])
gnb_acc = accuracy_score(ytest,gnb_pred)
print("roc_auc_score for adaboosted decision tree is:", gnb_score)
print("accuracy score for fully connected neural network is", gnb_acc)

roc_auc_score for adaboosted decision tree is: 0.6332735033386571
accuracy score for fully connected neural network is 0.5978132387706856


## SVM

In [59]:
from sklearn.svm import SVC
svc = SVC()
svc.fit(xtrain, ytrain)

In [61]:
svc_pred = svc.predict(xtest)

In [62]:
#svc_score = roc_auc_score(ytest, svc_predictions[:,1])
svc_acc = accuracy_score(ytest,svc_pred)
#print("roc_auc_score for adaboosted decision tree is:", svc_score)
print("accuracy score for fully connected neural network is", svc_acc)

accuracy score for fully connected neural network is 0.6984633569739953


In [63]:
confusion_matrix(ytest,svc_pred)

array([[5689, 2823],
       [2279, 6129]])

In [64]:
dump(svc, "svc.joblib")

['svc.joblib']