In [142]:
import pandas as pd

In [143]:
df=pd.read_csv("Churn_Modelling.csv")

In [144]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [145]:
# Separate features (X) and target variable (y)
X = df.drop(['RowNumber', 'CustomerId', 'Surname', 'Exited'], axis=1)
y = df['Exited']

In [146]:
print(X)
print(y)

      CreditScore Geography  Gender  Age  Tenure    Balance  NumOfProducts  \
0             619    France  Female   42       2       0.00              1   
1             608     Spain  Female   41       1   83807.86              1   
2             502    France  Female   42       8  159660.80              3   
3             699    France  Female   39       1       0.00              2   
4             850     Spain  Female   43       2  125510.82              1   
...           ...       ...     ...  ...     ...        ...            ...   
9995          771    France    Male   39       5       0.00              2   
9996          516    France    Male   35      10   57369.61              1   
9997          709    France  Female   36       7       0.00              1   
9998          772   Germany    Male   42       3   75075.31              2   
9999          792    France  Female   28       4  130142.79              1   

      HasCrCard  IsActiveMember  EstimatedSalary  
0           

In [147]:
# Convert categorical variables to dummy/indicator variables
X = pd.get_dummies(X, columns=['Geography', 'Gender'], drop_first=True)

In [148]:
X

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Geography_Germany,Geography_Spain,Gender_Male
0,619,42,2,0.00,1,1,1,101348.88,False,False,False
1,608,41,1,83807.86,1,0,1,112542.58,False,True,False
2,502,42,8,159660.80,3,1,0,113931.57,False,False,False
3,699,39,1,0.00,2,0,0,93826.63,False,False,False
4,850,43,2,125510.82,1,1,1,79084.10,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,39,5,0.00,2,1,0,96270.64,False,False,True
9996,516,35,10,57369.61,1,1,1,101699.77,False,False,True
9997,709,36,7,0.00,1,0,1,42085.58,False,False,False
9998,772,42,3,75075.31,2,1,0,92888.52,True,False,True


In [149]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [150]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [151]:
# Standardize the features (optional but can be beneficial for some algorithms)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [152]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [153]:
# Create a Logistic Regression model
model = LogisticRegression(random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

In [154]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

In [155]:
print(f"Accuracy: {accuracy:.2f}")
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

Accuracy: 0.81
Confusion Matrix:
 [[1543   64]
 [ 314   79]]
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.96      0.89      1607
           1       0.55      0.20      0.29       393

    accuracy                           0.81      2000
   macro avg       0.69      0.58      0.59      2000
weighted avg       0.78      0.81      0.77      2000



In [165]:
# Function to predict churn for new inputs
import numpy as np
def predict_churn(model, scaler):
    print("Enter customer information to predict churn:")
    
    # Collect user input
    credit_score = float(input("Credit Score: "))
    age = int(input("Age: "))
    tenure = int(input("Tenure (number of years with the bank): "))
    balance = float(input("Account Balance: "))
    num_of_products = int(input("Number of Bank Products: "))
    has_credit_card = int(input("Has Credit Card (1 for Yes, 0 for No): "))
    is_active_member = int(input("Is Active Member (1 for Yes, 0 for No): "))
    estimated_salary = float(input("Estimated Salary: "))
    
    geography = input("Geography (France, Spain, Germany): ").capitalize()
    gender = input("Gender (Male or Female): ").capitalize()

    # Create a DataFrame with the user input
    user_data = pd.DataFrame({
        'CreditScore': [credit_score],
        'Age': [age],
        'Tenure': [tenure],
        'Balance': [balance],
        'NumOfProducts': [num_of_products],
        'HasCrCard': [has_credit_card],
        'IsActiveMember': [is_active_member],
        'EstimatedSalary': [estimated_salary],
        'Geography_France': [1 if geography == 'France' else 0],
        'Geography_Spain': [1 if geography == 'Spain' else 0],
        'Gender_Male': [1 if gender == 'Male' else 0]
    },columns=feature_names)

    # Standardize the user input using the same scaler used for training
    user_data_scaled = scaler.transform(user_data)
    
    if np.isnan(user_data_scaled).any():
        print("Error: User input contains missing values. Please provide valid input.")
        return None

    # Make a prediction using the model
    prediction = model.predict(user_data_scaled)

    # Return the prediction result
    return prediction[0]

feature_names = X.columns.tolist()
# Call the function with the trained model and scaler
result = predict_churn(model, scaler)

# Display the prediction result
if result == 1:
    print("The model predicts that the customer is likely to churn.")
else:
    print("The model predicts that the customer is not likely to churn.")


Enter customer information to predict churn:
Credit Score: 612
Age: 45
Tenure (number of years with the bank): 3
Account Balance: 45600
Number of Bank Products: 2
Has Credit Card (1 for Yes, 0 for No): 1
Is Active Member (1 for Yes, 0 for No): 1
Estimated Salary: 14500
Geography (France, Spain, Germany): spain
Gender (Male or Female): male
Error: User input contains missing values. Please provide valid input.
The model predicts that the customer is not likely to churn.
