# Evaluation Metrics

### ROC Curve

In [25]:
def plot_roc_curve(fpr, tpr, roc_auc):
    plt.title('Receiver Operating Characteristic')
    plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
    plt.legend(loc = 'lower right')
    plt.plot([0, 1], [0, 1],'r--')
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()

### PR Curve

In [26]:
def plot_pr_curve(precision, recall, average_precision):
    plt.step(recall, precision, color='b', alpha=0.2, where='post')
    plt.fill_between(recall, precision, step='post', alpha=0.2, color='b')   
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('2-class Precision-Recall curve: AP={0:0.2f}'.format(average_precision))
    plt.show()

### Classification Score

In [27]:
def clf_score(clf, X_train, y_train, X_val, y_val, train=True):
    if train:
        print("Train Result:\n")
        print("accuracy score: {0:.4f}\n".format(accuracy_score(y_train, clf.predict(X_train))))
        print("Classification Report: \n {}\n".format(classification_report(y_train, clf.predict(X_train))))
        print("Confusion Matrix: \n {}\n".format(confusion_matrix(y_train, clf.predict(X_train))))

        res = cross_val_score(clf, X_train, y_train, cv=10, scoring='accuracy')
        print("Average Accuracy: \t {0:.4f}".format(np.mean(res)))
        print("Accuracy SD: \t\t {0:.4f}".format(np.std(res)))

    elif train == False:
        print("Validation Result:\n")
        print("accuracy score: {0:.4f}\n".format(accuracy_score(y_val, clf.predict(X_val))))
        
        precision, recall, _ = precision_recall_curve(y_val, clf.predict(X_val))
        average_precision = average_precision_score(y_val, clf.predict(X_val))
        plot_pr_curve(precision, recall, average_precision)
        
        fpr, tpr, _ = roc_curve(y_val, clf.predict(X_val))
        roc_auc = roc_auc_score(y_val, clf.predict(X_val))
        print("roc auc score: {}\n".format(roc_auc))
        plot_roc_curve(fpr, tpr, roc_auc)
        
        print("Classification Report: \n {}\n".format(classification_report(y_val, clf.predict(X_val))))
        print("Confusion Matrix: \n {}\n".format(confusion_matrix(y_val, clf.predict(X_val))))
        ConfusionMatrixDisplay.from_estimator(clf, X_val, y_val)
        print("End of validation Result\n")

### Classification Metrics

In [28]:
def evaluation_metrics(y_actual, y_pred):
            
        precision, recall, _ = precision_recall_curve(y_actual, y_pred)
        average_precision = average_precision_score(y_actual, y_pred)
        plot_pr_curve(precision, recall, average_precision)
        
        fpr, tpr, _ = roc_curve(y_actual, y_pred)
        roc_auc = roc_auc_score(y_actual, y_pred)
        print("roc auc score: {}\n".format(roc_auc))
        plot_roc_curve(fpr, tpr, roc_auc)
        
        print("Classification Report: \n {}\n".format(classification_report(y_actual, y_pred)))
        print("Confusion Matrix: \n {}\n".format(confusion_matrix(y_actual, y_pred)))

# AutoML

In [42]:
#!pip install tpot

Collecting tpot
  Downloading TPOT-0.12.2-py3-none-any.whl.metadata (2.0 kB)
Collecting deap>=1.2 (from tpot)
  Downloading deap-1.4.1-cp310-cp310-win_amd64.whl.metadata (13 kB)
Collecting update-checker>=0.16 (from tpot)
  Downloading update_checker-0.18.0-py3-none-any.whl.metadata (2.3 kB)
Collecting stopit>=1.1.1 (from tpot)
  Downloading stopit-1.1.2.tar.gz (18 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Downloading TPOT-0.12.2-py3-none-any.whl (87 kB)
   ---------------------------------------- 0.0/87.4 kB ? eta -:--:--
   ---------------------------------------- 87.4/87.4 kB 5.1 MB/s eta 0:00:00
Downloading deap-1.4.1-cp310-cp310-win_amd64.whl (109 kB)
   ---------------------------------------- 0.0/109.3 kB ? eta -:--:--
   ---------------------------------------- 109.3/109.3 kB 6.2 MB/s eta 0:00:00
Downloading update_checker-0.18.0-py3-none-any.whl (7.0 kB)
Building wheels for collected packages: stopit
  Building wh

In [43]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from yellowbrick.target import ClassBalance
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score, classification_report
import xgboost as xgb
from tpot import TPOTClassifier

In [44]:
# Load dataset
df = pd.read_csv('bank_marketing_dataset.csv')

# Convert target variable to binary
df['target'] = (df['subscribed'] == 'yes').astype(int)
df.drop(columns=['subscribed'], inplace=True)

# Remove 'duration' column
df.drop(columns=['duration'], inplace=True)

# One-hot encode categorical variables
df = pd.get_dummies(df)

In [45]:
# Split features and target
X = df.drop(columns=['target'])
y = df['target']

# Split data into train, validation, and test sets
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.25, random_state=42)  # 0.25 * 0.8 = 0.2


In [46]:
# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Balance classes using SMOTE
smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train_scaled, y_train)

In [47]:
# Initialize TPOTClassifier
tpot = TPOTClassifier(generations=5, population_size=20, verbosity=2, random_state=42)

In [48]:
# Fit TPOTClassifier to the training data
tpot.fit(X_train_balanced, y_train_balanced)

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Optimization Progress:   0%|          | 0/120 [00:00<?, ?pipeline/s]


Generation 1 - Current best internal CV score: 0.9420036349153161

Generation 2 - Current best internal CV score: 0.9420036349153161

Generation 3 - Current best internal CV score: 0.9420036349153161

Generation 4 - Current best internal CV score: 0.9420036349153161

Generation 5 - Current best internal CV score: 0.9442159530313912

Best pipeline: ExtraTreesClassifier(VarianceThreshold(input_matrix, threshold=0.001), bootstrap=False, criterion=entropy, max_features=0.5, min_samples_leaf=1, min_samples_split=3, n_estimators=100)


In [51]:
best_pipeline = tpot.fitted_pipeline_
best_pipeline

In [52]:
print("Best pipeline steps:")
for idx, (name, transform) in enumerate(best_pipeline.steps, start=1):
    print(f"{idx}. {name}")

Best pipeline steps:
1. variancethreshold
2. extratreesclassifier


In [49]:
# Evaluate TPOTClassifier on validation set
val_accuracy = tpot.score(X_val_scaled, y_val)
print(f'Validation Accuracy: {val_accuracy:.4f}')

# Evaluate TPOTClassifier on test set
test_accuracy = tpot.score(X_test_scaled, y_test)
print(f'Test Accuracy: {test_accuracy:.4f}')

# Print the best pipeline found by TPOT
print("Best pipeline steps:")
for idx, (name, transform) in enumerate(tpot.fitted_pipeline_.steps, start=1):
    print(f"{idx}. {name}")

# Print classification report on test set
y_pred = tpot.predict(X_test_scaled)
print("Classification Report:")
print(classification_report(y_test, y_pred))

Validation Accuracy: 0.8833
Test Accuracy: 0.8748
Best pipeline steps:
1. variancethreshold
2. extratreesclassifier
Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.94      0.93      7303
           1       0.43      0.33      0.37       935

    accuracy                           0.87      8238
   macro avg       0.67      0.64      0.65      8238
weighted avg       0.86      0.87      0.87      8238

