# Bank Marketing Term Deposit Prediction
Machine Learning project implemented in Python.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, accuracy_score


In [None]:
# Load dataset
df = pd.read_csv("bank 2.csv")
df.head()

In [None]:
# Replace 'unknown' with NaN
df.replace('unknown', pd.NA, inplace=True)

# Fill missing categorical values with mode
for col in df.select_dtypes(include='object').columns:
    df[col] = df[col].fillna(df[col].mode()[0])

# Encode categorical columns
le = LabelEncoder()
for col in df.select_dtypes(include='object').columns:
    df[col] = le.fit_transform(df[col])

In [None]:
# Scale selected numerical features
scaler = StandardScaler()
scaled_cols = ['age', 'balance', 'duration', 'campaign', 'pdays', 'previous']
df[scaled_cols] = scaler.fit_transform(df[scaled_cols])

In [None]:
# Train-test split
X = df.drop('deposit', axis=1)
y = df['deposit']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Initialize models
log_model = LogisticRegression()
tree_model = DecisionTreeClassifier(random_state=42)
nb_model = GaussianNB()

# Train models
log_model.fit(X_train, y_train)
tree_model.fit(X_train, y_train)
nb_model.fit(X_train, y_train)

In [None]:
# Evaluation function
def evaluate_model(name, model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None
    auc = roc_auc_score(y_test, y_prob) if y_prob is not None else "N/A"
    report = classification_report(y_test, y_pred, output_dict=True)
    return {
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": report["1"]["precision"],
        "Recall": report["1"]["recall"],
        "F1-Score": report["1"]["f1-score"],
        "AUC": auc
    }

# Collect results
results = [
    evaluate_model("Logistic Regression", log_model, X_test, y_test),
    evaluate_model("Decision Tree", tree_model, X_test, y_test),
    evaluate_model("Naive Bayes", nb_model, X_test, y_test)
]

# Create DataFrame of results
pd.DataFrame(results)