In [1]:
import pandas as pd
import numpy as np

from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score

from sklearn.pipeline import make_pipeline,Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

X,y = make_classification(n_samples = 1000,n_classes = 2,n_features = 10,n_redundant=0,random_state=42)
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42,test_size=0.3)

In [2]:
# Step 1       :pipeline

pipeline = {
      'l1':make_pipeline(StandardScaler(),  LogisticRegression(penalty='l1',random_state=42, solver = 'liblinear')),
      'l2':make_pipeline(StandardScaler(),  LogisticRegression(penalty='l2',random_state=42, solver = 'liblinear')),
      'rf': make_pipeline(StandardScaler(), RandomForestClassifier(random_state=1)),
      'gb': make_pipeline(StandardScaler(), GradientBoostingClassifier(random_state=1))
    
}
pipeline

# Step  parameters seperately
l1_hyperparameters = {
        'logisticregression__C':[0.1,1,10]
}

l2_hyperparameters = {
        'logisticregression__C':[0.1,1,10]
}

rf_hyperparameters = {
    'randomforestclassifier__n_estimators' : [100, 200],
    'randomforestclassifier__max_features' : ['auto', 0.3, 0.6],
     'randomforestclassifier__criterion' : ['entropy','gini']
    
}

gb_hyperparameters = {
    'gradientboostingclassifier__n_estimators' : [100, 200],
    'gradientboostingclassifier__learning_rate' : [0.05, 0.1, 0.2], #0.05,0.1,0.2
    'gradientboostingclassifier__max_depth' : [1, 3, 5]
}

#Step 3 Join parameters
hyperparameters = {
'l1' : l1_hyperparameters,
'l2' : l2_hyperparameters ,
'rf' : rf_hyperparameters,
'gb' : gb_hyperparameters
}
hyperparameters 

{'l1': {'logisticregression__C': [0.1, 1, 10]},
 'l2': {'logisticregression__C': [0.1, 1, 10]},
 'rf': {'randomforestclassifier__n_estimators': [100, 200],
  'randomforestclassifier__max_features': ['auto', 0.3, 0.6],
  'randomforestclassifier__criterion': ['entropy', 'gini']},
 'gb': {'gradientboostingclassifier__n_estimators': [100, 200],
  'gradientboostingclassifier__learning_rate': [0.05, 0.1, 0.2],
  'gradientboostingclassifier__max_depth': [1, 3, 5]}}

In [3]:
#Step 4 merge pipeline & parameters
 #store values in models dictionary
models = {}
for key in pipeline.keys() :
    models[key] = GridSearchCV(pipeline[key], hyperparameters[key], cv=5)

In [4]:
#Step 5 Train the model
for key in models.keys():
    models[key].fit(X_train, y_train)
    print(key, ' is trained and tuned.')

l1  is trained and tuned.
l2  is trained and tuned.
rf  is trained and tuned.
gb  is trained and tuned.


In [5]:
from sklearn.metrics import classification_report

In [6]:
from sklearn.metrics import confusion_matrix,accuracy_score
print('\n-----------------Logistic Regression 1------------------------------')
print('Confusion Matrix: \n',confusion_matrix(y_test,models['l1'].predict(X_test)))
print('\nAccuracy_score: \n',accuracy_score(y_test,models['l1'].predict(X_test)))
print('\nclassification_report\n',classification_report(y_test,models['l1'].predict(X_test)))


-----------------Logistic Regression 1------------------------------
Confusion Matrix: 
 [[125  17]
 [ 20 138]]

Accuracy_score: 
 0.8766666666666667

classification_report
               precision    recall  f1-score   support

           0       0.86      0.88      0.87       142
           1       0.89      0.87      0.88       158

    accuracy                           0.88       300
   macro avg       0.88      0.88      0.88       300
weighted avg       0.88      0.88      0.88       300



In [7]:
print('\n-----------------Logistic Regression 2------------------------------')
print('Confusion Matrix: \n',confusion_matrix(y_test,models['l2'].predict(X_test)))
print('\nAccuracy_score: \n',accuracy_score(y_test,models['l2'].predict(X_test)))
print('\nclassification_report\n',classification_report(y_test,models['l2'].predict(X_test)))


-----------------Logistic Regression 2------------------------------
Confusion Matrix: 
 [[126  16]
 [ 17 141]]

Accuracy_score: 
 0.89

classification_report
               precision    recall  f1-score   support

           0       0.88      0.89      0.88       142
           1       0.90      0.89      0.90       158

    accuracy                           0.89       300
   macro avg       0.89      0.89      0.89       300
weighted avg       0.89      0.89      0.89       300



In [8]:
print('\n-----------------Random Forest Classifier------------------------------')
print('Confusion Matrix: \n',confusion_matrix(y_test,models['rf'].predict(X_test)))
print('\nAccuracy_score: \n',accuracy_score(y_test,models['rf'].predict(X_test)))
print('\nclassification_report\n',classification_report(y_test,models['rf'].predict(X_test)))


-----------------Random Forest Classifier------------------------------
Confusion Matrix: 
 [[136   6]
 [ 16 142]]

Accuracy_score: 
 0.9266666666666666

classification_report
               precision    recall  f1-score   support

           0       0.89      0.96      0.93       142
           1       0.96      0.90      0.93       158

    accuracy                           0.93       300
   macro avg       0.93      0.93      0.93       300
weighted avg       0.93      0.93      0.93       300



In [9]:
print('\n-----------------Gradient Boosting Classifier------------------------------')
print('Confusion Matrix: \n',confusion_matrix(y_test,models['gb'].predict(X_test)))
print('\nAccuracy_score: \n',accuracy_score(y_test,models['gb'].predict(X_test)))
print('\nclassification_report\n',classification_report(y_test,models['gb'].predict(X_test)))


-----------------Gradient Boosting Classifier------------------------------
Confusion Matrix: 
 [[133   9]
 [ 15 143]]

Accuracy_score: 
 0.92

classification_report
               precision    recall  f1-score   support

           0       0.90      0.94      0.92       142
           1       0.94      0.91      0.92       158

    accuracy                           0.92       300
   macro avg       0.92      0.92      0.92       300
weighted avg       0.92      0.92      0.92       300

