In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


data = pd.read_csv(r"C:\Users\Hamza\Downloads\bank-full.csv", sep=';')


data['y'] = data['y'].map({'yes': 1, 'no': 0})

categorical_columns = ['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'poutcome']
data = pd.get_dummies(data, columns=categorical_columns, drop_first=True)

numerical_columns = ['age', 'balance', 'day', 'duration', 'campaign', 'pdays', 'previous']
scaler = StandardScaler()
data[numerical_columns] = scaler.fit_transform(data[numerical_columns])

X = data.drop('y', axis=1)  
y = data['y'] 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)


logistic_model = LogisticRegression(max_iter=1000)
logistic_model.fit(X_train, y_train)
logistic_preds = logistic_model.predict(X_test)


tree_model = DecisionTreeClassifier()
tree_model.fit(X_train, y_train)
tree_preds = tree_model.predict(X_test)


forest_model = RandomForestClassifier(n_estimators=100, random_state=42)
forest_model.fit(X_train, y_train)
forest_preds = forest_model.predict(X_test)


print("Logistic Regression Accuracy:", accuracy_score(y_test, logistic_preds))
print("Decision Tree Accuracy:", accuracy_score(y_test, tree_preds))
print("Random Forest Accuracy:", accuracy_score(y_test, forest_preds))


print("\nLogistic Regression Report:\n", classification_report(y_test, logistic_preds))
print("\nDecision Tree Report:\n", classification_report(y_test, tree_preds))
print("\nRandom Forest Report:\n", classification_report(y_test, forest_preds))


feature_importances = forest_model.feature_importances_
feature_importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': feature_importances
}).sort_values(by='Importance', ascending=False)


print("\nTop 10 Most Important Features:\n", feature_importance_df.head(10))


Logistic Regression Accuracy: 0.902241226776762
Decision Tree Accuracy: 0.8707608375110587
Random Forest Accuracy: 0.9058537304629902

Logistic Regression Report:
               precision    recall  f1-score   support

           0       0.92      0.98      0.95     11977
           1       0.65      0.35      0.45      1587

    accuracy                           0.90     13564
   macro avg       0.79      0.66      0.70     13564
weighted avg       0.89      0.90      0.89     13564


Decision Tree Report:
               precision    recall  f1-score   support

           0       0.93      0.93      0.93     11977
           1       0.45      0.46      0.45      1587

    accuracy                           0.87     13564
   macro avg       0.69      0.69      0.69     13564
weighted avg       0.87      0.87      0.87     13564


Random Forest Report:
               precision    recall  f1-score   support

           0       0.92      0.97      0.95     11977
           1       0.67  