In [1]:
import sys
import numpy as np
import scipy as sp
import sklearn as sk
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, roc_auc_score, precision_score, recall_score
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline

In [2]:
x_train_tfidf = np.load(r"..\data\x_train_tfidf.npy", allow_pickle = True)
x_test_tfidf = np.load(r"..\data\x_test_tfidf.npy", allow_pickle = True)
y_train = np.load(r"..\data\y_train.npy", allow_pickle = True)
y_test = np.load(r"..\data\y_test.npy", allow_pickle = True)

## Logistic Regression Model

In [2]:
model_1 = Pipeline(
    steps = [
        ("classifier", LogisticRegression())
    ])

## Training of Logistic Regression model

In [6]:
%%time
model_1.fit(x_train_tfidf, y_train)

CPU times: total: 3.62 s
Wall time: 2.73 s


In [8]:
y_pred_test = model_1.predict(x_test_tfidf)
y_pred_train = model_1.predict(x_train_tfidf)

## Evaluation on test and train dataset

In [9]:
%%time
print("Precision Score on training dateset for Logistic Regression: %s" % precision_score(y_train,y_pred_train,average='micro'))
print("AUC Score on training dateset for Logistic Regression: %s" % roc_auc_score(y_train,model_1.predict_proba(x_train_tfidf)[:,1],multi_class='ovo',average='macro'))
f1_score_train_1 =f1_score(y_train,y_pred_train,average="weighted")
print("F1 Score ftraining dateset for Logistic Regression: %s" % f1_score_train_1)
print("Precision Score on test for Logistic Regression: %s" % precision_score(y_test,y_pred_test,average='micro'))
print("AUC Score on test for Logistic Regression: %s" % roc_auc_score(y_test,model_1.predict_proba(x_test_tfidf)[:,1],multi_class='ovo',average='macro'))
f1_score_1 =f1_score(y_test,y_pred_test,average="weighted")
print("F1 Score for Logistic Regression: %s" % f1_score_1)

Precision Score on training dateset for Logistic Regression: 0.8209255533199196
AUC Score on training dateset for Logistic Regression: 0.9031046217459942
F1 Score ftraining dateset for Logistic Regression: 0.8209152673654605
Precision Score on test for Logistic Regression: 0.8154341907883771
AUC Score on test for Logistic Regression: 0.8980153356644363
F1 Score for Logistic Regression: 0.8154319597973213
CPU times: total: 3.91 s
Wall time: 5.52 s


## Decision Tree Classifier

In [10]:
model_2 = Pipeline(
    steps = [
        ("classifier", DecisionTreeClassifier())
    ])

## Training Decision tree classifier

In [11]:
%%time
model_2.fit(x_train_tfidf, y_train)

CPU times: total: 39.9 s
Wall time: 1min 19s


## Evaluation on test data and train data of Decision Tree Classifier

In [12]:
%%time
print("Precision Score on training dateset for Decision Tree Classifier: %s" % precision_score(y_train,model_2.predict(x_train_tfidf),average='micro'))
print("AUC Score on training dateset for Decision Tree Classifier: %s" % roc_auc_score(y_train,model_2.predict_proba(x_train_tfidf)[:,1],multi_class='ovo',average='macro'))
f1_score_train_2 =f1_score(y_train,model_2.predict(x_train_tfidf),average="weighted")
print("F1 Score training dateset for Decision Tree Classifier: %s" % f1_score_train_2)
print("Precision Score on test for Decision Tree Classifier: %s" % precision_score(y_test,model_2.predict(x_test_tfidf),average='micro'))
print("AUC Score on test for Decision Tree Classifier: %s" % roc_auc_score(y_test,model_2.predict_proba(x_test_tfidf)[:,1],multi_class='ovo',average='macro'))
f1_score_2 =f1_score(y_test,model_2.predict(x_test_tfidf),average="weighted")
print("F1 Score for Decision Tree Classifier: %s" % f1_score_2)

Precision Score on training dateset for Decision Tree Classifier: 0.999464241067708
AUC Score on training dateset for Decision Tree Classifier: 0.9999991597122366
F1 Score training dateset for Decision Tree Classifier: 0.9994642419146246
Precision Score on test for Decision Tree Classifier: 0.6838157675426413
AUC Score on test for Decision Tree Classifier: 0.6840022292422882
F1 Score for Decision Tree Classifier: 0.6838202209863015
CPU times: total: 3.36 s
Wall time: 6.21 s


## Decision Tree Classifier with max depth 11 to fix overfit

In [4]:
model_3 = Pipeline(
    steps = [
        ("classifier", DecisionTreeClassifier(criterion='gini', max_depth=11, min_samples_split=2, min_samples_leaf=1))
    ])

In [5]:
%%time
model_3.fit(x_train_tfidf, y_train)

CPU times: total: 13.9 s
Wall time: 22.1 s


In [6]:
%%time
print("Precision Score on training dateset for Decision Tree Classifier: %s" % precision_score(y_train,model_3.predict(x_train_tfidf),average='micro'))
print("AUC Score on training dateset for Decision Tree Classifier: %s" % roc_auc_score(y_train,model_3.predict_proba(x_train_tfidf)[:,1],multi_class='ovo',average='macro'))
f1_score_train_3 =f1_score(y_train,model_3.predict(x_train_tfidf),average="weighted")
print("F1 Score training dateset for Decision Tree Classifier: %s" % f1_score_train_3)
print("Precision Score on test for Decision Tree Classifier: %s" % precision_score(y_test,model_3.predict(x_test_tfidf),average='micro'))
print("AUC Score on test for Decision Tree Classifier: %s" % roc_auc_score(y_test,model_3.predict_proba(x_test_tfidf)[:,1],multi_class='ovo',average='macro'))
f1_score_3 =f1_score(y_test,model_3.predict(x_test_tfidf),average="weighted")
print("F1 Score for Decision Tree Classifier: %s" % f1_score_3)

Precision Score on training dateset for Decision Tree Classifier: 0.7167859226364102
AUC Score on training dateset for Decision Tree Classifier: 0.8023143432948144
F1 Score training dateset for Decision Tree Classifier: 0.714316783362288
Precision Score on test for Decision Tree Classifier: 0.6934551919551086
AUC Score on test for Decision Tree Classifier: 0.7573288609161214
F1 Score for Decision Tree Classifier: 0.6907759645745841
CPU times: total: 3.44 s
Wall time: 4.64 s


## Random Forest Classifier

In [7]:
model_4 = Pipeline(
    steps=[
        ("classifier", RandomForestClassifier())
    ])

## Training Random Forest Classifier

In [8]:
%%time
model_4.fit(x_train_tfidf, y_train)

CPU times: total: 2min 28s
Wall time: 3min 48s


## Evaluation on test and train data of Randodom Forest Classifier

In [9]:
%%time
print("Precision Score on training dateset for Decision Tree Classifier: %s" % precision_score(y_train,model_4.predict(x_train_tfidf),average='micro'))
print("AUC Score on training dateset for Decision Tree Classifier: %s" % roc_auc_score(y_train,model_4.predict_proba(x_train_tfidf)[:,1],multi_class='ovo',average='macro'))
f1_score_train_4 =f1_score(y_train,model_4.predict(x_train_tfidf),average="weighted")
print("F1 Score training dateset for Decision Tree Classifier: %s" % f1_score_train_4)
print("Precision Score on test for Decision Tree Classifier: %s" % precision_score(y_test,model_4.predict(x_test_tfidf),average='micro'))
print("AUC Score on test for Decision Tree Classifier: %s" % roc_auc_score(y_test,model_4.predict_proba(x_test_tfidf)[:,1],multi_class='ovo',average='macro'))
f1_score_4 =f1_score(y_test,model_4.predict(x_test_tfidf),average="weighted")
print("F1 Score for Decision Tree Classifier: %s" % f1_score_4)

Precision Score on training dateset for Decision Tree Classifier: 0.999464241067708
AUC Score on training dateset for Decision Tree Classifier: 0.9999857343858771
F1 Score training dateset for Decision Tree Classifier: 0.9994642416541405
Precision Score on test for Decision Tree Classifier: 0.7927662647924885
AUC Score on test for Decision Tree Classifier: 0.8725622979822492
F1 Score for Decision Tree Classifier: 0.7927584983431157
CPU times: total: 21.4 s
Wall time: 34.1 s


## Ada Boost Classifier

In [10]:
model_5 = Pipeline(
    steps = [
        ("classifier", AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=4),
                                         n_estimators=100,
                                         learning_rate=0.8))
    ])

## Training Ada Boost Classifier

In [11]:
%%time
model_5.fit(x_train_tfidf, y_train)



CPU times: total: 11min 20s
Wall time: 17min 38s


## Evaluation on test data and training data of Ada boost Classifier

In [12]:
%%time
print("Precision Score on training dateset for Decision Tree Classifier: %s" % precision_score(y_train,model_5.predict(x_train_tfidf),average='micro'))
print("AUC Score on training dateset for Decision Tree Classifier: %s" % roc_auc_score(y_train,model_5.predict_proba(x_train_tfidf)[:,1],multi_class='ovo',average='macro'))
f1_score_train_5 =f1_score(y_train,model_5.predict(x_train_tfidf),average="weighted")
print("F1 Score training dateset for Decision Tree Classifier: %s" % f1_score_train_5)
print("Precision Score on test for Decision Tree Classifier: %s" % precision_score(y_test,model_5.predict(x_test_tfidf),average='micro'))
print("AUC Score on test for Decision Tree Classifier: %s" % roc_auc_score(y_test,model_5.predict_proba(x_test_tfidf)[:,1],multi_class='ovo',average='macro'))
f1_score_5 =f1_score(y_test,model_5.predict(x_test_tfidf),average="weighted")
print("F1 Score for Decision Tree Classifier: %s" % f1_score_5)

Precision Score on training dateset for Decision Tree Classifier: 0.8473087042967866
AUC Score on training dateset for Decision Tree Classifier: 0.9326196480756911
F1 Score training dateset for Decision Tree Classifier: 0.8473065296911569
Precision Score on test for Decision Tree Classifier: 0.7913772987388188
AUC Score on test for Decision Tree Classifier: 0.8679240530794523
F1 Score for Decision Tree Classifier: 0.7913784958773983
CPU times: total: 57.1 s
Wall time: 1min 31s


## Hyperparameter tuning with Grid Search

In [15]:
import numpy as np
import pandas as pd
from sklearn import ensemble
from sklearn import metrics
from sklearn import model_selection

def hyperparamtune(classifier, param_grid, metric, verbose_value, cv):
    model = model_selection.GridSearchCV(
    estimator=classifier,
    param_grid = param_grid,
    scoring = metric,
    verbose = verbose_value,
    cv = cv)
    
    model.fit(x_train_tfidf, y_train)
    print("Best Score is %s" % {model.best_score_})
    print("Best hyperparameter set:")
    best_parameters = model.best_estimator_.get_params()
    for param_name in sorted(param_grid.keys()):
        print(f"\t{param_name}: {best_parameters[param_name]}")
        
    return model, best_parameters

## Hyperparameter tuning of Logistic Regression

In [16]:
%%time
param_gd = {"penalty":["l2", "l1"],
           "C":[0.01, 0.1, 1.0, 10],
           "tol":[0.0001, 0.001, 0.01],
           "max_iter":[100, 200]}

model_7, best_param = hyperparamtune(LogisticRegression(), param_gd, "accuracy", 10, 5)

Fitting 5 folds for each of 48 candidates, totalling 240 fits
[CV 1/5; 1/48] START C=0.01, max_iter=100, penalty=l2, tol=0.0001...............
[CV 1/5; 1/48] END C=0.01, max_iter=100, penalty=l2, tol=0.0001;, score=0.806 total time=   1.6s
[CV 2/5; 1/48] START C=0.01, max_iter=100, penalty=l2, tol=0.0001...............
[CV 2/5; 1/48] END C=0.01, max_iter=100, penalty=l2, tol=0.0001;, score=0.807 total time=   1.6s
[CV 3/5; 1/48] START C=0.01, max_iter=100, penalty=l2, tol=0.0001...............
[CV 3/5; 1/48] END C=0.01, max_iter=100, penalty=l2, tol=0.0001;, score=0.810 total time=   1.1s
[CV 4/5; 1/48] START C=0.01, max_iter=100, penalty=l2, tol=0.0001...............
[CV 4/5; 1/48] END C=0.01, max_iter=100, penalty=l2, tol=0.0001;, score=0.810 total time=   1.4s
[CV 5/5; 1/48] START C=0.01, max_iter=100, penalty=l2, tol=0.0001...............
[CV 5/5; 1/48] END C=0.01, max_iter=100, penalty=l2, tol=0.0001;, score=0.804 total time=   1.3s
[CV 1/5; 2/48] START C=0.01, max_iter=100, penal

[CV 2/5; 10/48] END C=0.01, max_iter=200, penalty=l1, tol=0.0001;, score=nan total time=   0.0s
[CV 3/5; 10/48] START C=0.01, max_iter=200, penalty=l1, tol=0.0001..............
[CV 3/5; 10/48] END C=0.01, max_iter=200, penalty=l1, tol=0.0001;, score=nan total time=   0.0s
[CV 4/5; 10/48] START C=0.01, max_iter=200, penalty=l1, tol=0.0001..............
[CV 4/5; 10/48] END C=0.01, max_iter=200, penalty=l1, tol=0.0001;, score=nan total time=   0.0s
[CV 5/5; 10/48] START C=0.01, max_iter=200, penalty=l1, tol=0.0001..............
[CV 5/5; 10/48] END C=0.01, max_iter=200, penalty=l1, tol=0.0001;, score=nan total time=   0.0s
[CV 1/5; 11/48] START C=0.01, max_iter=200, penalty=l1, tol=0.001...............
[CV 1/5; 11/48] END C=0.01, max_iter=200, penalty=l1, tol=0.001;, score=nan total time=   0.0s
[CV 2/5; 11/48] START C=0.01, max_iter=200, penalty=l1, tol=0.001...............
[CV 2/5; 11/48] END C=0.01, max_iter=200, penalty=l1, tol=0.001;, score=nan total time=   0.0s
[CV 3/5; 11/48] START

[CV 4/5; 19/48] END C=0.1, max_iter=200, penalty=l2, tol=0.0001;, score=0.816 total time=   1.6s
[CV 5/5; 19/48] START C=0.1, max_iter=200, penalty=l2, tol=0.0001...............
[CV 5/5; 19/48] END C=0.1, max_iter=200, penalty=l2, tol=0.0001;, score=0.814 total time=   1.9s
[CV 1/5; 20/48] START C=0.1, max_iter=200, penalty=l2, tol=0.001................
[CV 1/5; 20/48] END C=0.1, max_iter=200, penalty=l2, tol=0.001;, score=0.814 total time=   1.6s
[CV 2/5; 20/48] START C=0.1, max_iter=200, penalty=l2, tol=0.001................
[CV 2/5; 20/48] END C=0.1, max_iter=200, penalty=l2, tol=0.001;, score=0.816 total time=   1.5s
[CV 3/5; 20/48] START C=0.1, max_iter=200, penalty=l2, tol=0.001................
[CV 3/5; 20/48] END C=0.1, max_iter=200, penalty=l2, tol=0.001;, score=0.821 total time=   1.5s
[CV 4/5; 20/48] START C=0.1, max_iter=200, penalty=l2, tol=0.001................
[CV 4/5; 20/48] END C=0.1, max_iter=200, penalty=l2, tol=0.001;, score=0.816 total time=   1.9s
[CV 5/5; 20/48] S

[CV 1/5; 29/48] START C=1.0, max_iter=100, penalty=l1, tol=0.001................
[CV 1/5; 29/48] END C=1.0, max_iter=100, penalty=l1, tol=0.001;, score=nan total time=   0.1s
[CV 2/5; 29/48] START C=1.0, max_iter=100, penalty=l1, tol=0.001................
[CV 2/5; 29/48] END C=1.0, max_iter=100, penalty=l1, tol=0.001;, score=nan total time=   0.0s
[CV 3/5; 29/48] START C=1.0, max_iter=100, penalty=l1, tol=0.001................
[CV 3/5; 29/48] END C=1.0, max_iter=100, penalty=l1, tol=0.001;, score=nan total time=   0.0s
[CV 4/5; 29/48] START C=1.0, max_iter=100, penalty=l1, tol=0.001................
[CV 4/5; 29/48] END C=1.0, max_iter=100, penalty=l1, tol=0.001;, score=nan total time=   0.0s
[CV 5/5; 29/48] START C=1.0, max_iter=100, penalty=l1, tol=0.001................
[CV 5/5; 29/48] END C=1.0, max_iter=100, penalty=l1, tol=0.001;, score=nan total time=   0.0s
[CV 1/5; 30/48] START C=1.0, max_iter=100, penalty=l1, tol=0.01.................
[CV 1/5; 30/48] END C=1.0, max_iter=100, pen

[CV 3/5; 38/48] END C=10, max_iter=100, penalty=l2, tol=0.001;, score=0.823 total time=   1.9s
[CV 4/5; 38/48] START C=10, max_iter=100, penalty=l2, tol=0.001.................
[CV 4/5; 38/48] END C=10, max_iter=100, penalty=l2, tol=0.001;, score=0.817 total time=   2.3s
[CV 5/5; 38/48] START C=10, max_iter=100, penalty=l2, tol=0.001.................
[CV 5/5; 38/48] END C=10, max_iter=100, penalty=l2, tol=0.001;, score=0.813 total time=   2.1s
[CV 1/5; 39/48] START C=10, max_iter=100, penalty=l2, tol=0.01..................
[CV 1/5; 39/48] END C=10, max_iter=100, penalty=l2, tol=0.01;, score=0.815 total time=   1.9s
[CV 2/5; 39/48] START C=10, max_iter=100, penalty=l2, tol=0.01..................
[CV 2/5; 39/48] END C=10, max_iter=100, penalty=l2, tol=0.01;, score=0.815 total time=   1.9s
[CV 3/5; 39/48] START C=10, max_iter=100, penalty=l2, tol=0.01..................
[CV 3/5; 39/48] END C=10, max_iter=100, penalty=l2, tol=0.01;, score=0.823 total time=   2.3s
[CV 4/5; 39/48] START C=10, 

[CV 5/5; 47/48] END C=10, max_iter=200, penalty=l1, tol=0.001;, score=nan total time=   0.0s
[CV 1/5; 48/48] START C=10, max_iter=200, penalty=l1, tol=0.01..................
[CV 1/5; 48/48] END C=10, max_iter=200, penalty=l1, tol=0.01;, score=nan total time=   0.0s
[CV 2/5; 48/48] START C=10, max_iter=200, penalty=l1, tol=0.01..................
[CV 2/5; 48/48] END C=10, max_iter=200, penalty=l1, tol=0.01;, score=nan total time=   0.0s
[CV 3/5; 48/48] START C=10, max_iter=200, penalty=l1, tol=0.01..................
[CV 3/5; 48/48] END C=10, max_iter=200, penalty=l1, tol=0.01;, score=nan total time=   0.0s
[CV 4/5; 48/48] START C=10, max_iter=200, penalty=l1, tol=0.01..................
[CV 4/5; 48/48] END C=10, max_iter=200, penalty=l1, tol=0.01;, score=nan total time=   0.0s
[CV 5/5; 48/48] START C=10, max_iter=200, penalty=l1, tol=0.01..................
[CV 5/5; 48/48] END C=10, max_iter=200, penalty=l1, tol=0.01;, score=nan total time=   0.1s


120 fits failed out of a total of 240.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
120 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\pheno\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\pheno\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1162, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\pheno\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 54, in _check_solver
    raise ValueError(
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

 0.80748395 0.80748395 0.8

Best Score is {0.8167704180680619}
Best hyperparameter set:
	C: 1.0
	max_iter: 100
	penalty: l2
	tol: 0.0001
CPU times: total: 4min 45s
Wall time: 4min 12s


## Evaluation of Fine tuned Logistic Regression

In [18]:
%%time
print("Precision Score on training dateset for Decision Tree Classifier: %s" % precision_score(y_train,model_7.predict(x_train_tfidf),average='micro'))
print("AUC Score on training dateset for Decision Tree Classifier: %s" % roc_auc_score(y_train,model_7.predict_proba(x_train_tfidf)[:,1],multi_class='ovo',average='macro'))
f1_score_train_7 =f1_score(y_train,model_7.predict(x_train_tfidf),average="weighted")
print("F1 Score training dateset for Decision Tree Classifier: %s" % f1_score_train_7)
print("Precision Score on test for Decision Tree Classifier: %s" % precision_score(y_test,model_7.predict(x_test_tfidf),average='micro'))
print("AUC Score on test for Decision Tree Classifier: %s" % roc_auc_score(y_test,model_7.predict_proba(x_test_tfidf)[:,1],multi_class='ovo',average='macro'))
f1_score_7 =f1_score(y_test,model_7.predict(x_test_tfidf),average="weighted")
print("F1 Score for Decision Tree Classifier: %s" % f1_score_7)

Precision Score on training dateset for Decision Tree Classifier: 0.8209255533199196
AUC Score on training dateset for Decision Tree Classifier: 0.9031046217459942
F1 Score training dateset for Decision Tree Classifier: 0.8209152673654605
Precision Score on test for Decision Tree Classifier: 0.8154341907883771
AUC Score on test for Decision Tree Classifier: 0.8980153356644363
F1 Score for Decision Tree Classifier: 0.8154319597973213
CPU times: total: 4.55 s
Wall time: 5.9 s


## Hyperparameter Tunning for Random Forest Classifier

In [None]:
# This will take around 2 hours of time
%%time
#Define grid of hyper parameters
param_gd={"n_estimators":[100,200,300],
         "max_depth":[11,13,17,19,23],
         "criterion":["gini","entropy"],
         "min_samples_split":[3,7,11],
         "min_samples_leaf":[3,5],
         "max_features":["sqrt", "log2"]}

model_8, best_param_8 = hyperparamtune(RandomForestClassifier(),param_gd,"accuracy",10,5)

## Evaluation of fine tuned Random Forest Classifier

In [None]:
%%time
print("Precision Score on training dateset for Finetuned Random Forest Classifier: %s" % precision_score(y_train,model_8.predict(x_train_tfidf),average='micro'))
print("AUC Score on training dateset for Finetuned Random Forest Classifier: %s" % roc_auc_score(y_train,model_8.predict_proba(x_train_tfidf)[:,1],multi_class='ovo',average='macro'))
f1_score_train_8 =f1_score(y_train,model_8.predict(x_train_tfidf),average="weighted")
print("F1 Score training dateset for Finetuned Random Forest Classifier: %s" % f1_score_train_8)
print("Precision Score on test for Finetuned Random Forest Classifier: %s" % precision_score(y_test,model_8.predict(x_test_tfidf),average='micro'))
print("AUC Score on test for Finetuned Random Forest Classifier: %s" % roc_auc_score(y_test,model_8.predict_proba(x_test_tfidf)[:,1],multi_class='ovo',average='macro'))
f1_score_8 =f1_score(y_test,model_8.predict(x_test_tfidf),average="weighted")
print("F1 Score for Finetuned Random Forest Classifier: %s" % f1_score_8)

In [None]:
# hyperparameter tuning of ada boost will take around 8-9 hours of time
best_score = 0
model = None
if ((f1_score_1>f1_score_2) & (f1_score_1>f1_score_3) & (f1_score_1>f1_score_4) & (f1_score_1>f1_score_5)& (f1_score_1>f1_score_7)&(f1_score_1>f1_score_8)):
    model = model_1
    print("Logsitics Regression is providing best F1 score: %f" % f1_score_1)
elif((f1_score_2>f1_score_1) & (f1_score_2>f1_score_3) & (f1_score_2>f1_score_4) & (f1_score_2>f1_score_5)& (f1_score_2>f1_score_7)&(f1_score_2>f1_score_8)):
    model = model_2
    print(" Over fit Decision Tree is providing best F1 score: %f" % f1_score_2)
elif((f1_score_3>f1_score_1) & (f1_score_3>f1_score_2) & (f1_score_3>f1_score_4)&(f1_score_3>f1_score_5)& (f1_score_3>f1_score_7)&(f1_score_3>f1_score_8)):
    model = model_3
    print("Decision Tree is providing best F1 score: %f" % f1_score_3)
elif((f1_score_4>f1_score_1) & (f1_score_4>f1_score_2) & (f1_score_4>f1_score_3)&(f1_score_4>f1_score_5)& (f1_score_4>f1_score_7)&(f1_score_4>f1_score_8)):
    model = model_4
    print("Random Forest is providing best F1 score: %f" % f1_score_4)
elif((f1_score_5>f1_score_1) & (f1_score_5>f1_score_2) & (f1_score_5>f1_score_4)&(f1_score_5>f1_score_3)& (f1_score_5>f1_score_7)&(f1_score_5>f1_score_8)):
    model = model_5
    print("Adaboost Classifier is providing best F1 score: %f" % f1_score_5)
elif((f1_score_7>f1_score_1) & (f1_score_7>f1_score_2) & (f1_score_7>f1_score_4)&(f1_score_7>f1_score_3)& (f1_score_7>f1_score_5)&(f1_score_7>f1_score_8)):
    model = model_7
    print("Finetuned Logsitics Regression Classifier is providing best F1 score: %f" % f1_score_7)
elif((f1_score_8>f1_score_1) & (f1_score_8>f1_score_2) & (f1_score_8>f1_score_4)&(f1_score_8>f1_score_3)& (f1_score_8>f1_score_7)&(f1_score_8>f1_score_5)):
    model = model_8
    print("Finetuned Random Forest Classifier is providing best F1 score: %f" % f1_score_8)
else:
    print("No Model is selected, Train again")