#  <font color = 'blue'> Data Modeling

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from imblearn.over_sampling import RandomOverSampler
from collections import Counter
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import Perceptron


from sklearn.model_selection import train_test_split


import tqdm
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc

In [None]:
# Load bank_mkt_important_features.csv csv file
bank_mkt = pd.read_csv('dt_top14_imp_features.csv')

In [None]:
bank_mkt.shape

In [None]:
# View the first 5 rows of the data frame
bank_mkt.head(5)

In [None]:
# Check the structure of the dataset
# bank_mkt.info()

In [None]:
# Check for missing values in the dataset
# bank_mkt.isna().sum()

In [None]:
# Subset predictor variables and store them in X
X = bank_mkt.iloc[:,0:14]
X.shape

In [None]:
# Subset target variable and store it in y
y = bank_mkt.loc[:,'subscription']
y.shape

### <font color = 'blue'> Data Splitting

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# View the first 5 rows of X_train
X_train.head()

In [None]:
# Print the shapes of the training and testing sets
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

### <font color = 'blue'> Handling Unbalanced Data: Random Oversampling

This technique involves randomly duplicating samples from the minority class to balance the number of samples in each class. 

In [None]:
# # Create a random oversampler object
# oversampler = RandomOverSampler(random_state=42)

In [None]:
# Resample the training data
# X_resampled, y_resampled = oversampler.fit_resample(X_train, y_train)

In [None]:
# Print the class distribution before and after oversampling
# print("Before oversampling:", Counter(y_train))
# print("After oversampling:", Counter(y_resampled))

In [None]:
# Plot a bar chart to visualize the target variable after random oversampling
# plt.hist(y_resampled);

### <font color = 'blue'> Handling Unbalanced Data: SMOTE

Synthetic Minority Oversampling Technique (SMOTE) is a statistical technique for increasing the number of cases in your dataset in a balanced way. The component works by generating new instances from existing minority cases that you supply as input. 

In [None]:
# Create a smote object
# smote = SMOTE(random_state=42)
# Resample the training data
# X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

In [None]:
# Print the class distribution before and after oversampling
# print("Before oversampling:", Counter(y_train))
# print("After oversampling:", Counter(y_resampled))

In [None]:
# Plot a bar chart to visualize the target variable after random oversampling
# plt.hist(y_resampled);

### <font color="blue"> Perceptron

In [None]:
# Create a Perceptron object
perceptron = Perceptron(tol=1e-3, random_state=42)

In [None]:
# Train the Perceptron model
perceptron.fit(X_train, y_train)

In [None]:
# Make predictions on the test set
y_pred = perceptron.predict(X_test)

In [None]:
# Evaluate the model
cm = confusion_matrix(y_test, y_pred)
cr = classification_report(y_test, y_pred)
print("Confusion Matrix:")
print(cm)
print("\nClassification Report:")
print(cr)

In [None]:
# Calculate Sensitivity, Specificity, Precision, Gmean, Accuracy, and AUC
tn, fp, fn, tp = cm.ravel()
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)
precision = tp / (tp + fp)
gmean = np.sqrt(sensitivity * specificity)
accuracy = accuracy_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_pred)
print("Sensitivity:", sensitivity)
print("Specificity:", specificity)
print("Precision:", precision)
print("Gmean:", gmean)
print("Accuracy:", accuracy)
print("AUC:", auc)