In [1]:
import pandas as pd
import numpy as np


from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, accuracy_score, roc_auc_score, confusion_matrix, precision_recall_fscore_support
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import preprocessing, neighbors


In [2]:
df_new = pd.read_csv("../Data/BMT_balanced_encoded_address.csv",index_col= 0)

In [None]:
df_new.describe()

In [4]:
df_new.drop(columns=["length","weight","count","looped","neighbors","income"], inplace=True)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(df_new.drop(columns=['label','address']), df_new['label'], 
                                                    test_size = 0.25, random_state=42)


In [6]:
classifier = SGDClassifier(loss='log', max_iter=100) 
model = classifier.fit(X_train, y_train)
y_pred = model.predict(X_test)



In [7]:
accuracy = accuracy_score(y_test,y_pred)
conf_mat = confusion_matrix(y_test,y_pred)
print("The accuracy of the model is :", round(accuracy,2)*100,"%")
print("Confusion Matrix:\n", conf_mat)

The accuracy of the model is : 50.0 %
Confusion Matrix:
 [[    0 10339]
 [    0 10368]]


In [8]:
precision, recall, fscore, support = precision_recall_fscore_support(y_test, y_pred, average='binary')
print('Precision = ',round(precision,4),'\nRecall = ', round(recall,4), '\nF-Score = ',round(fscore,4))

Precision =  0.5007 
Recall =  1.0 
F-Score =  0.6673


In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)


In [10]:
classifier = SGDClassifier(loss='log', max_iter=100) 
model = classifier.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

In [11]:
accuracy = accuracy_score(y_test,y_pred)
conf_mat = confusion_matrix(y_test,y_pred)
print("The accuracy of the model is :", round(accuracy,2)*100,"%")
print("Confusion Matrix:\n", conf_mat)

The accuracy of the model is : 57.99999999999999 %
Confusion Matrix:
 [[5090 5249]
 [3420 6948]]


In [12]:
precision, recall, fscore, support = precision_recall_fscore_support(y_test, y_pred, average='binary')
print('Precision = ',round(precision,4),'\nRecall = ', round(recall,4), '\nF-Score = ',round(fscore,4))

Precision =  0.5696 
Recall =  0.6701 
F-Score =  0.6158


In [13]:
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)


In [14]:
classifier = SGDClassifier(loss='log', max_iter=100) 
model = classifier.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

In [15]:
accuracy = accuracy_score(y_test,y_pred)
conf_mat = confusion_matrix(y_test,y_pred)
print("The accuracy of the model is :", round(accuracy,2)*100,"%")
print("Confusion Matrix:\n", conf_mat)

The accuracy of the model is : 56.00000000000001 %
Confusion Matrix:
 [[6364 3975]
 [5225 5143]]


In [16]:
precision, recall, fscore, support = precision_recall_fscore_support(y_test, y_pred, average='binary')
print('Precision = ',round(precision,4),'\nRecall = ', round(recall,4), '\nF-Score = ',round(fscore,4))

Precision =  0.564 
Recall =  0.496 
F-Score =  0.5279


### SVM model

In [17]:
# Scale
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

In [19]:
# work on Balanced DATA (Random sampling)
model = SVC(kernel='rbf', random_state = 2)

In [None]:
model.fit(X_train, y_train)

In [None]:
Y_pred = model.predict(X_test)

In [None]:
Y_pred

In [None]:
cm = confusion_matrix(y_test,Y_pred)
accuracy = float(cm.diagonal().sum())/len(y_test)
print("\nSVM Accuracy: ", accuracy)

In [None]:
precision, recall, fscore, support = precision_recall_fscore_support(y_test, Y_pred, average='binary')
print('Precision = ',round(precision,4),'\nRecall = ', round(recall,4), '\nF-Score = ',round(fscore,4))

In [None]:
model = model.fit(X_train_scaled, y_train)
y_pred_ = model.predict(X_test_scaled)

In [None]:
precision, recall, fscore, support = precision_recall_fscore_support(y_test, y_pred_, average='binary')
print('Precision = ',round(precision,4),'\nRecall = ', round(recall,4), '\nF-Score = ',round(fscore,4))

In [None]:
cm = confusion_matrix(y_test,y_pred_)
accuracy = float(cm.diagonal().sum())/len(y_test)
print("\nSVM Accuracy: ", accuracy)

In [None]:
accuracy = accuracy_score(y_test,y_pred_)
conf_mat = confusion_matrix(y_test,y_pred_)
print("The accuracy of the model is :", round(accuracy,2)*100,"%")
print("Confusion Matrix:\n", conf_mat)

In [None]:
model.score(X_test,y_test)

### Random Forest model

In [17]:
# work on Balanced DATA (Random sampling)
model = RandomForestClassifier(max_depth=3,random_state=2)

In [19]:
model = model.fit(X_train,y_train)

In [20]:
model.score(X_test,y_test)

0.7459796204182161

In [21]:
Y_pred = model.predict(X_test)

In [22]:
precision, recall, fscore, support = precision_recall_fscore_support(y_test, Y_pred, average='binary')
print('Precision = ',round(precision,4),'\nRecall = ', round(recall,4), '\nF-Score = ',round(fscore,4))

Precision =  0.6946 
Recall =  0.8791 
F-Score =  0.7761


### Try the model with transformed data 

In [133]:
X_train, X_test, y_train, y_test = train_test_split(df_transformed.drop(columns=['length', 'weight', 'count', 'looped',
       'neighbors', 'income','label']), df_transformed['label'], 
                                                    test_size = 0.25, random_state=42)

In [135]:
model = RandomForestClassifier(max_depth=5,random_state=42)
model = model.fit(X_train,y_train)
print(model.score(X_test,y_test))
Y_pred = model.predict(X_test)
precision, recall, fscore, support = precision_recall_fscore_support(y_test, Y_pred, average='binary')
print('Precision = ',round(precision,4),'\nRecall = ', round(recall,4), '\nF-Score = ',round(fscore,4))

0.7815231564205342
Precision =  0.7202 
Recall =  0.9219 
F-Score =  0.8086


# All Models

In [None]:
# Create Function to fit on All models and Calculat it's score, F1, Prec, Recall ... etc. 