# Import necessary libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


# Load the data and preprocessing

In [None]:
file_path = 'Churn_Modelling.csv'  # Replace with the actual path
data = pd.read_csv(file_path)

# Drop unnecessary columns
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

# Encode categorical variables
label_encoder = LabelEncoder()
data['Geography'] = label_encoder.fit_transform(data['Geography'])
data['Gender'] = label_encoder.fit_transform(data['Gender'])

# Encode the target variable 'Exited' using Label Encoding
label_encoder_y = LabelEncoder()
y = label_encoder_y.fit_transform(data['Exited'])

# Split the data into features (X) and target variable (y)
X = data.drop('Exited', axis=1)


# Split the data into training and testing sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the feature values
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


# Train Random Forest Classifier

In [None]:
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train, y_train)


# Make Predictions and Make Predictions

In [None]:
y_pred = rf_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Print the results
print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(classification_rep)


Accuracy: 0.8645
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.96      0.92      1607
           1       0.75      0.47      0.58       393

    accuracy                           0.86      2000
   macro avg       0.81      0.71      0.75      2000
weighted avg       0.85      0.86      0.85      2000



# New Data for Testing (not churn)

In [None]:
new_data_low_churn = pd.DataFrame({
    'CreditScore': [750],
    'Geography': ['Germany'],
    'Gender': ['Male'],
    'Age': [40],
    'Tenure': [5],
    'Balance': [120000],
    'NumOfProducts': [2],
    'HasCrCard': [1],
    'IsActiveMember': [1],
    'EstimatedSalary': [100000]
})


# Preprocess the New Data

In [None]:
new_data_low_churn['Geography'] = label_encoder.transform(new_data_low_churn['Geography'])
new_data_low_churn['Gender'] = label_encoder.transform(new_data_low_churn['Gender'])
new_data_low_churn = scaler.transform(new_data_low_churn)


# Make Predictions

In [None]:
prediction_low_churn = rf_classifier.predict(new_data_low_churn.reshape(1, -1))

if prediction_low_churn[0] == 0:
    print("The model predicts that the customer is not likely to churn.")
else:
    print("The model predicts that the customer is likely to churn.")


The model predicts that the customer is not likely to churn.
