In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import OneHotEncoder

# Load data
df = pd.read_csv("bank.csv")

# Separate target
y = df["deposit"]
X = df.drop("deposit", axis=1)

# One-hot encode categorical columns
X = pd.get_dummies(X, drop_first=True)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Tuned Random Forest
model = RandomForestClassifier(
    n_estimators=600,
    max_depth=None,
    min_samples_split=2,
    min_samples_leaf=1,
    random_state=42,
    n_jobs=-1
)

model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.8593819973130318
              precision    recall  f1-score   support

          no       0.89      0.83      0.86      1175
         yes       0.83      0.89      0.86      1058

    accuracy                           0.86      2233
   macro avg       0.86      0.86      0.86      2233
weighted avg       0.86      0.86      0.86      2233



GEMINI...

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# 1. Load the dataset
df = pd.read_csv('bank.csv')

# 2. Preprocess the Data using One-Hot Encoding
# This converts categorical text variables (like 'job', 'marital') into numerical columns
df_encoded = pd.get_dummies(df, drop_first=True)

# 3. Define Features (X) and Target (y)
X = df_encoded.drop('deposit_yes', axis=1)
y = df_encoded['deposit_yes']

# 4. Split the data into Training (80%) and Testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Initialize and Train the Random Forest Model
# n_estimators=300 uses 300 decision trees to ensure high accuracy
rf = RandomForestClassifier(n_estimators=300, random_state=42)
rf.fit(X_train, y_train)

# 6. Evaluate Accuracy
train_acc = accuracy_score(y_train, rf.predict(X_train)) * 100
test_acc = accuracy_score(y_test, rf.predict(X_test)) * 100
overall_acc = accuracy_score(y, rf.predict(X)) * 100

print(f"Training Accuracy: {train_acc:.2f}%")
print(f"Testing Accuracy: {test_acc:.2f}%")
print(f"Overall Dataset Accuracy: {overall_acc:.2f}%")

# Generate Classification Report on test data
print("\nClassification Report (Test Data):")
print(classification_report(y_test, rf.predict(X_test)))

Training Accuracy: 100.00%
Testing Accuracy: 84.01%
Overall Dataset Accuracy: 96.80%

Classification Report (Test Data):
              precision    recall  f1-score   support

       False       0.87      0.81      0.84      1166
        True       0.81      0.87      0.84      1067

    accuracy                           0.84      2233
   macro avg       0.84      0.84      0.84      2233
weighted avg       0.84      0.84      0.84      2233

