In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_style("whitegrid")
from time import time

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import sys, os
sys.path.insert(0, os.path.abspath('..'))

In [None]:
df=pd.read_csv("data.csv")

In [None]:
df

In [None]:
df.shape

In [None]:
df.describe()

# Target and Features

In [None]:
#Target
y=df['label']

In [None]:
y

In [None]:
X=df.drop('label', axis=1)

In [None]:
X.shape

In [None]:
for i in X.isnull():
    if i == True:
        print (i)
print('no missing values')

# Feauture selection

In [None]:
!pip install yellowbrick

In [None]:
from yellowbrick.features import Rank2D

# Instantiate the visualizer with the Pearson ranking algorithm
visualizer = Rank2D(features=X.columns, algorithm='pearson')

visualizer.fit(X, y)                # Fit the data to the visualizer
visualizer.transform(X)             # Transform the data
visualizer.poof()                   # Draw/show/poof the data

In [None]:
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import Ridge, Lasso, ElasticNet

In [None]:
model = Lasso(alpha=0.5)
sfm = SelectFromModel(model)
sfm.fit(X, y)
print(list(X.iloc[:, sfm.get_support(indices=True)]))

In [None]:
df.shape

In [None]:
model = Ridge(alpha=0.5)
sfm = SelectFromModel(model)
sfm.fit(X, y)
print(list(X.iloc[:, sfm.get_support(indices=True)]))

In [None]:
model = ElasticNet()
sfm = SelectFromModel(model)
sfm.fit(X, y)
print(list(X.iloc[:, sfm.get_support(indices=True)]))

In [None]:
from yellowbrick.features import Rank1D
from yellowbrick.model_selection import FeatureImportances

visualizer = Rank1D(features=X.columns, algorithm='shapiro')

visualizer.fit(X, y)                
visualizer.transform(X)         
visualizer.poof()

In [None]:
!pip install yellowbrick

In [None]:
from yellowbrick.model_selection import FeatureImportances
#from yellowbrick.feature.importances import FeatureImportances
from sklearn.linear_model import LogisticRegression

fig = plt.figure()
ax = fig.add_subplot()

viz = FeatureImportances(LogisticRegression(), ax=ax)
viz.fit(X, y)
viz.poof()


In [None]:
from sklearn.ensemble import GradientBoostingClassifier

fig = plt.figure()
ax = fig.add_subplot()

viz = FeatureImportances(GradientBoostingClassifier(), ax=ax)
viz.fit(X, y)
viz.poof()

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

fig = plt.figure()
ax = fig.add_subplot()

viz = FeatureImportances(GradientBoostingClassifier(), ax=ax)
viz.fit(X, y)
viz.poof()

# Train and Test

In [None]:
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import MaxAbsScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2, random_state=1)

In [None]:
#scaler=StandardScaler()
#X_train=scaler.fit_transform(X_train)
#X_test=scaler.transform(X_test)

# Check balance in train and test data

In [None]:
y.value_counts(normalize=True)

In [None]:
y_train.value_counts(normalize=True)

In [None]:
y_test.value_counts(normalize=True)

# Model 1: Naive Approach

In [None]:
from sklearn.naive_bayes import GaussianNB

In [None]:
nb = GaussianNB()

In [None]:
nb.fit(X_train, y_train)

In [None]:
expected   = y_test
predicted  = nb.predict(X_test)
classificationReport = classification_report(expected, predicted, target_names=['label'])
print(classificationReport)

# Model 2: Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
lr = LogisticRegression(C=0.01)

In [None]:
lr.fit(X_train, y_train)

In [None]:
expected   = y_test
predicted  = lr.predict(X_test)
classificationReport = classification_report(expected, predicted, target_names=['label'])
print(classificationReport)

In [None]:
from yellowbrick.classifier import ClassificationReport
fig = plt.figure()
ax = fig.add_subplot()
visualizer = ClassificationReport(lr, ax=ax, classes=['label'], support=True)
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.poof()

In [None]:
from yellowbrick.classifier import ROCAUC

visualizer = ROCAUC(LogisticRegression(), classes=['label'])

visualizer.fit(X_train, y_train)  
visualizer.score(X_test, y_test)
g = visualizer.poof()

In [None]:
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score
lr=LogisticRegression()
params={'C':np.logspace(-4, 4, 5)}
clf = GridSearchCV(lr, params, scoring='neg_log_loss', refit='True', n_jobs=-1, cv=5)
clf.fit(X_train, y_train)
print("best params: " + str(clf.best_params_))
print("best scores: " + str(clf.best_score_))
estimates = clf.predict_proba(X_test)
acc = accuracy_score(y_test, clf.predict(X_test))
print("Accuracy: {:.4%}".format(acc))

# Model 3: Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rf = RandomForestClassifier(n_estimators=100)

In [None]:
rf.fit(X_train, y_train)

In [None]:
expected   = y_test
predicted  = rf.predict(X_test)
classificationReport = classification_report(expected, predicted, target_names=['label'])
print(classificationReport)

# Model 4: SVM

In [None]:
from sklearn.svm import LinearSVC

In [None]:
svc = LinearSVC()

In [None]:
svc.fit(X_train, y_train)

expected   = y_test
predicted  = svc.predict(X_test)

classificationReport = classification_report(expected, predicted, target_names=['FOL','MTF'])
print(classificationReport)

# Model 5: LDA

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

In [None]:
lda = LDA(n_components=2)

In [None]:
lda.fit(X_train,y_train)

In [None]:
expected   = y_test
predicted  = lda.predict(X_test)
classificationReport = classification_report(expected, predicted, target_names=['label'])
print(classificationReport)

# Model 6: Gradient Boosting

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
gbc = GradientBoostingClassifier()

In [None]:
gbc.fit(X_train,y_train)

In [None]:
expected = y_test
predicted  = gbc.predict(X_test)
classificationReport = classification_report(expected, predicted, target_names=['label'])
print(classificationReport)

In [None]:
from yellowbrick.classifier import ClassificationReport
fig = plt.figure()
ax = fig.add_subplot()
visualizer = ClassificationReport(gbc, ax=ax, classes=['label'], support=True)
visualizer.fit(X_train, y_train)
visualizer.score(X_test, y_test)
visualizer.poof()

# Model 8: Bagging

In [None]:
from sklearn.ensemble import BaggingClassifier

In [None]:
bc=BaggingClassifier(n_estimators=100, oob_score=10)

In [None]:
bc.fit(X_train,y_train)

In [None]:
expected = y_test
predicted  = bc.predict(X_test)
classificationReport = classification_report(expected, predicted, target_names=['label'])
print(classificationReport)