# Classification

In [1]:
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score, roc_auc_score, ConfusionMatrixDisplay


In [3]:
models = {
    "Logistic Regression": LogisticRegression(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier()
}

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train, y_train)    #Train Model

    # Make Predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # Training Set Performance
    model_train_accuracy = accuracy_score(y_train, y_train_pred)        # Calculate Accuracy
    model_train_f1 = f1_score(y_train, y_train_pred, average='weighted')  # Calculate F1 Score
    model_train_precision = precision_score(y_train, y_train_pred)      # Calculate Precision Score
    model_train_recall = recall_score(y_train, y_train_pred)            # Calculate Recall Score
    model_train_rocauc_score = roc_auc_score(y_train, y_train_pred)     # Calculate ROC AUC Score

    # Test Set Performance
    model_test_accuracy = accuracy_score(y_test, y_test_pred)        # Calculate Accuracy
    model_test_f1 = f1_score(y_test, y_test_pred, average='weighted')  # Calculate F1 Score
    model_test_precision = precision_score(y_test, y_test_pred)      # Calculate Precision Score
    model_test_recall = recall_score(y_test, y_test_pred)            # Calculate Recall Score
    model_test_rocauc_score = roc_auc_score(y_test, y_test_pred)     # Calculate ROC AUC Score

    print(list(models.keys())[i])

    print('Model Performance for Training Set')
    print("- Accuracy: {:.4f}".format(model_train_accuracy))
    print("- F1: {:.4f}".format(model_train_f1))
    print("- Precision: {:.4f}".format(model_train_precision))
    print("- Recall: {:.4f}".format(model_train_recall))
    print("- ROC AUC Score: {:.4f}".format(model_train_rocauc_score))



    print('------------------------------------------------')

    print('Model Performance for Test Set')
    print("- Accuracy: {:.4f}".format(model_test_accuracy))
    print("- F1: {:.4f}".format(model_test_f1))
    print("- Precision: {:.4f}".format(model_test_precision))
    print("- Recall: {:.4f}".format(model_test_recall))
    print("- ROC AUC Score: {:.4f}".format(model_test_rocauc_score))



    print('='*40)
    print('\n')

Logistic Regression
Decision Tree
Random Forest


In [4]:
## Hyperparameter Training

# Whichever model gave us good accuracy, we will get hyperparameters of it from sklearn and tweak it further
# Random Forest
rf_params = {'max_depth':[5,8,15,None,10],
             'max_features': [5,7,'auto',8],
             'min_samples_split': [2,8,15,20],
             'n_estimators': [100,200,500,1000]
             }

In [5]:
rf_params

{'max_depth': [5, 8, 15, None, 10],
 'max_features': [5, 7, 'auto', 8],
 'min_samples_split': [2, 8, 15, 20],
 'n_estimators': [100, 200, 500, 1000]}

In [6]:
# Models list for Hyperparameter Tuning

randomcv_models = [
    ('RF', RandomForestClassifier(), rf_params)
]

In [7]:
randomcv_models

[('RF',
  RandomForestClassifier(),
  {'max_depth': [5, 8, 15, None, 10],
   'max_features': [5, 7, 'auto', 8],
   'min_samples_split': [2, 8, 15, 20],
   'n_estimators': [100, 200, 500, 1000]})]

In [None]:
# Training Stage

from sklearn.model_selection import RandomizedSearchCV

model_param = {}
for name, model, params in randomcv_models:
    random = RandomizedSearchCV(estimator=model,
                                param_distributions=params,
                                n_iter=100,
                                cv=3,
                                verbose=2,
                                n_jobs=-1)
    random.fit(X_train, y_train)
    model_param[name] =  random.best_params_

for model_name in model_param:
    print(f"--------------Best Params for {model_name} ---------------")
    print(model_param[model_name])

> ## Repeat Model Training step using the model which performed well. Let say Random forest Worked well, then we will use it again this way ....

In [None]:
models = {
    'Random Forest' : RandomForestClassifier(n_estimators=1000, min_samples_split=2,
                                             max_features=7, max_depth=None)
}

for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train, y_train)   # Train model

    # Make Predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # Training Set Performance
    model_train_accuracy = accuracy_score(y_train, y_train_pred)
    model_train_f1 = f1_score(y_train, y_train_pred, average='weighted')
    model_train_precision = precision_score(y_train, y_train_pred)
    model_train_recall = recall_score(y_train, y_train_pred)            # Calculate Recall Score
    model_train_rocauc_score = roc_auc_score(y_train, y_train_pred)     # Calculate ROC AUC Score

    # Test Set Performance
    model_test_accuracy = accuracy_score(y_test, y_test_pred)        # Calculate Accuracy
    model_test_f1 = f1_score(y_test, y_test_pred, average='weighted')  # Calculate F1 Score
    model_test_precision = precision_score(y_test, y_test_pred)      # Calculate Precision Score
    model_test_recall = recall_score(y_test, y_test_pred)            # Calculate Recall Score
    model_test_rocauc_score = roc_auc_score(y_test, y_test_pred)     # Calculate ROC AUC Score

    print(list(models.keys())[i])

    print('Model Performance for Training Set')
    print("- Accuracy: {:.4f}".format(model_train_accuracy))
    print("- F1: {:.4f}".format(model_train_f1))
    print("- Precision: {:.4f}".format(model_train_precision))
    print("- Recall: {:.4f}".format(model_train_recall))
    print("- ROC AUC Score: {:.4f}".format(model_train_rocauc_score))



    print('------------------------------------------------')

    print('Model Performance for Test Set')
    print("- Accuracy: {:.4f}".format(model_test_accuracy))
    print("- F1: {:.4f}".format(model_test_f1))
    print("- Precision: {:.4f}".format(model_test_precision))
    print("- Recall: {:.4f}".format(model_test_recall))
    print("- ROC AUC Score: {:.4f}".format(model_test_rocauc_score))



    print('='*40)
    print('\n')

In [None]:
# Plot ROC AUC Curve

from sklearn.metrics import roc_auc_score, roc_curve
plt.figure()

# Add the models to the list that you want to view on the ROC plot
auc_models = [
    {
        'label': 'Random Forest Classifier',
        'model': RandomForestClassifier(n_estimators=1000, min_samples_split=2,
                                        max_features=7, max_depth=None),
        'auc': 0.8325
    }
]

# Create loop through all model
for algo in auc_models:
    model = algo['model']   # select the model
    model.fit(X_train, y_train)   # Train the model

    # Compute False positive rate, and True positive rate
    fpr, tpr, thresholds =  roc_curve(y_test, model.predict_proba(X_test))[:,1]

    # Calculate Area under the curve to display on the plot
    plt.plot(fpr, tpr, label='%s ROC (area = %0.2f)' % (algo['label'], algo['auc']))

# Custom Settings for the plot
plt.plot([0,1], [0,1], 'r--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('1-Specificity (False Positive Rate)')
plt.ylabel('Sensitivity (True Positive Rate)')
plt.title('Receiver Operating Characteristic')
plt.legend(loc='lower right')
plt.savefig('auc.png')
plt.show()