In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('bmh')
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
data = pd.read_csv('Churn_Modelling.csv')

In [None]:
data

In [None]:
data.shape

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.isnull().sum()

In [None]:
data.describe()

In [None]:
# Exploratory Data Analysis #

# Select only numeric columns for correlation calculation
data = data.select_dtypes(include=np.number)

# Calculate and display the correlation matrix
correlation_matrix = data.corr()
display(correlation_matrix)

In [None]:
plt.figure(figsize=(15,10))
corr = data.corr()
sns.heatmap(corr, annot=True, cmap = 'Accent')
plt.title('Correlation Matrix')
plt.show()

In [None]:
corr

In [None]:
features = corr['Exited'].sort_values(ascending=False)[1:]

In [None]:
features.index

In [None]:
features.values

In [None]:
plt.figure(figsize=(20,10))
sns.barplot(x=features.index, y=features.values, palette = 'viridis')
plt.xlabel('Features')
plt.ylabel('Correlation with Exited')
plt.title('Correlation with Exited')
plt.show()

In [None]:
# Data Visualization #

data.head()

In [None]:
print(data['Exited'].value_counts())

sns.countplot(x = 'Exited', data = data, palette = 'viridis')
plt.show()

In [None]:
data = pd.read_csv('Churn_Modelling.csv')

In [None]:
print(data['Gender'].value_counts())

sns.countplot(x = 'Gender', data = data, palette = 'viridis')
plt.show()

In [None]:
print(data['Tenure'].value_counts())

sns.countplot(x = 'Tenure', data = data, palette = 'viridis')
plt.show()

In [None]:
print(data['NumOfProducts'].value_counts())

sns.countplot(x = 'NumOfProducts', data = data, palette = 'viridis')
plt.show()

In [None]:
print(data['HasCrCard'].value_counts())

sns.countplot(x = 'HasCrCard', data = data, palette = 'viridis')
plt.show()

In [None]:
# Finding Relations #

sns.countplot(x = 'Exited', hue = 'Gender', data = data, palette = 'viridis')
plt.show()

In [None]:
sns.countplot(x = 'Exited', hue = 'Geography', data = data, palette = 'viridis')
plt.show()

In [None]:
sns.countplot(x = 'Exited', hue = 'NumOfProducts', data = data, palette = 'viridis')
plt.show()

In [None]:
plt.figure(figsize=(20,10))
sns.FacetGrid(data, hue = 'Exited', palette = 'viridis').map(sns.kdeplot, 'Age').add_legend()

In [None]:
# Data Preprocessing #

data.head()

In [None]:
data = pd.read_csv('Churn_Modelling.csv')
data.drop(columns = ['RowNumber', 'CustomerId', 'Surname'], inplace = True)

In [None]:
data.head()

In [None]:
data['Geography'] = data['Geography'].astype('category').cat.codes

In [None]:
data['Gender'] = np.where(data['Gender'] == 'Male', 1, 0)

In [None]:
data.head()

In [None]:
X = data.drop(columns = ['Exited']).values
y = data['Exited'].values

In [None]:
X.shape

In [None]:
y.shape

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 42)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report, f1_score

In [None]:
from xgboost import XGBClassifier

In [None]:
basic_model = XGBClassifier()
basic_model.fit(X_train, y_train)

In [None]:
def evaluate_model(model):
  print('Training Accuracy : ', model.score(X_train, y_train))
  print('Testing Accuracy : ', model.score(X_test, y_test))

  y_pred = model.predict(X_test)
  print(confusion_matrix(y_test, y_pred))
  print(classification_report(y_test, y_pred))
  print('F1 Score : ', f1_score(y_test, y_pred))

In [None]:
evaluate_model(basic_model)

In [None]:
# Hyperparameter Tunning #

from sklearn.model_selection import GridSearchCV

In [None]:
xgb_params = {
    'n_estimators' : [100, 200,],
    'learning_rate' : [0.01, 0.1],
    'max_depth' : [4, 5]
}

In [None]:
grid = GridSearchCV(XGBClassifier(), param_grid = xgb_params, cv = 10, scoring = 'accuracy')
grid.fit(X_train, y_train)

In [None]:
print("Best Score", grid.best_score_)
print("Best Params", grid.best_params_)

In [None]:
model_1 = XGBClassifier(max_depth = 4)
model_1.fit(X_train, y_train)

In [None]:
evaluate_model(model_1)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
model_2 = GradientBoostingClassifier(n_estimators = 500)
model_2.fit(X_train, y_train)

In [None]:
evaluate_model(model_2)