In [14]:
# Importing required libraries for data handling, modeling, and evaluation
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [15]:
# Reading the dataset
df = pd.read_csv('Bank.csv')

In [4]:
# Quick look at the column names before any transformation
print("Missing values before handling:")
print(df.isnull().sum())

Missing values before handling:
customer_id         0
credit_score        0
country             0
gender              0
age                 0
tenure              0
balance             0
products_number     0
credit_card         0
active_member       0
estimated_salary    0
churn               0
dtype: int64


In [5]:
# Mapping categorical string columns to numerical categori
df['country'] = df['country'].astype('category').cat.codes
df['gender'] = df['gender'].astype('category').cat.codes

In [16]:
# Split dataset into input (X) and output (y)
X = df.drop(['customer_id', 'churn'], axis=1)
y = df['churn']


In [7]:
# Split into training and testing datasets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Initialize and train the Random Forest classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)


In [9]:
# Predict on test data
y_pred = model.predict(X_test)
# Print classification report to evaluate model
print("\nModel evaluation:")
print(classification_report(y_test, y_pred))


Model evaluation:
              precision    recall  f1-score   support

           0       0.88      0.96      0.92      1607
           1       0.75      0.47      0.57       393

    accuracy                           0.86      2000
   macro avg       0.82      0.71      0.75      2000
weighted avg       0.85      0.86      0.85      2000



In [10]:
# Function to take custom input and return predictions

def get_customer_input():
    print("\nEnter customer details:")
    credit_score = int(input("Credit score: "))
    country = input("Country (France/Spain/Germany): ").strip().lower()
    gender = input("Gender (Male/Female): ").strip().lower()
    age = int(input("Age: "))
    tenure = int(input("Tenure (years with bank): "))
    balance = float(input("Balance: "))
    products_number = int(input("Number of products: "))
    credit_card = int(input("Has credit card (1=Yes, 0=No): "))
    active_member = int(input("Is active member (1=Yes, 0=No): "))
    estimated_salary = float(input("Estimated salary: "))

    # Encode country and gender
    country_map = {'france': 0, 'spain': 1, 'germany': 2}
    gender_map = {'male': 1, 'female': 0}
    country = country_map[country]
    gender = gender_map[gender]

    data = {
        'credit_score': credit_score,
        'country': country,
        'gender': gender,
        'age': age,
        'tenure': tenure,
        'balance': balance,
        'products_number': products_number,
        'credit_card': credit_card,
        'active_member': active_member,
        'estimated_salary': estimated_salary
    }
    return data

In [12]:
# Take custom input from user
customer_data = get_customer_input()
df_new = pd.DataFrame([customer_data])


Enter customer details:


Credit score:  10
Country (France/Spain/Germany):  France
Gender (Male/Female):  Male
Age:  23
Tenure (years with bank):  5
Balance:  1000000
Number of products:  5
Has credit card (1=Yes, 0=No):  1
Is active member (1=Yes, 0=No):  0
Estimated salary:  156600


In [17]:
# Predict for the input customer
df_new = df_new[X_train.columns]
prediction = model.predict(df_new)
print("\nPrediction:")
print("Will churn? (1=Yes, 0=No):", prediction[0])


Prediction:
Will churn? (1=Yes, 0=No): 1
