In [28]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier


df = pd.read_csv('bank-full.csv', sep=';')


# Binary map yes/no columns
df['housing']=df['housing'].map({'yes' : 1 , 'no' : 0})
df['loan']=df['loan'].map({'yes' : 1 , 'no' : 0})
df['default']=df['default'].map({'yes' : 1 , 'no' : 0})
df['y']=df['y'].map({'yes' : 1 , 'no' : 0})

# One-hot encoding multiclass categorical columns
encoder = pd.get_dummies(df, columns=['job', 'marital', 'education', 'contact', 'month', 'poutcome'])

# Scaling numerical features
scaler = MinMaxScaler()
values = scaler.fit_transform(df[['age','balance','day','duration','campaign','pdays','previous']])
df[['age','balance','day','duration','campaign','pdays','previous']] = values

# Creates features and labels
X= encoder.drop('y', axis=1)
y= encoder['y']

#Splitting data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression
model = LogisticRegression(class_weight ='balanced', max_iter=1000, solver='liblinear')
model.fit(X_train,y_train)

# Evaluate Logistic Regression
cmatrix = confusion_matrix(y_test, model.predict(X_test))
print(cmatrix)
print(classification_report(y_test, model.predict(X_test)))

# Train and evaluate Random Forest
forest = RandomForestClassifier(max_depth=2, class_weight='balanced')
RForest = forest.fit(X_train, y_train)
print(confusion_matrix(y_test, RForest.predict(X_test)))
print(classification_report(y_test, RForest.predict(X_test)))










[[6718 1234]
 [ 184  907]]
              precision    recall  f1-score   support

           0       0.97      0.84      0.90      7952
           1       0.42      0.83      0.56      1091

    accuracy                           0.84      9043
   macro avg       0.70      0.84      0.73      9043
weighted avg       0.91      0.84      0.86      9043

[[6203 1749]
 [ 291  800]]
              precision    recall  f1-score   support

           0       0.96      0.78      0.86      7952
           1       0.31      0.73      0.44      1091

    accuracy                           0.77      9043
   macro avg       0.63      0.76      0.65      9043
weighted avg       0.88      0.77      0.81      9043

