In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Data Science/Project-45 Bank Customer Churn Prediction Using H2O Auto ML/Churn_Modelling.csv")

In [None]:
df.shape

In [None]:
df.info()

In [None]:
# checking null values
df.isna().sum()

In [None]:
df.head()

In [None]:
# droping some columns

df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1, inplace=True)

In [None]:
df.head()

In [None]:
df.nunique()

In [None]:
size = df['Exited'].value_counts()

In [None]:
fig, ax = plt.subplots(figsize=(8, 6))
ax.pie(size, autopct='%1.1f%%', labels=['Stay', 'Leave'], shadow=True, startangle=45 )
plt.show()

In [None]:
fig, axx = plt.subplots(2, 2, figsize=(10, 8))
sns.countplot(data=df, x='Geography', hue='Exited', ax= axx[0][0])
sns.countplot(data=df, x='Gender', hue='Exited', ax= axx[0][1])
sns.countplot(data=df, x='HasCrCard', hue='Exited', ax= axx[1][0])
sns.countplot(data=df, x='IsActiveMember', hue='Exited', ax= axx[1][1])
plt.show()

In [None]:
df.head()

In [None]:
fig, axx = plt.subplots(3, 2, figsize=(12, 8))
sns.boxplot(data=df, x='Exited', y='CreditScore', hue='Exited', ax= axx[0][0])
sns.boxplot(data=df, x='Exited', y='Age', hue='Exited', ax= axx[0][1])
sns.boxplot(data=df, x='Exited', y='Tenure', hue='Exited', ax= axx[1][0])
sns.boxplot(data=df, x='Exited', y='Balance', hue='Exited', ax= axx[1][1])
sns.boxplot(data=df, x='Exited', y='NumOfProducts', hue='Exited', ax= axx[2][0])
sns.boxplot(data=df, x='Exited', y='EstimatedSalary', hue='Exited', ax= axx[2][1])
fig.tight_layout()
plt.show()

## Feature Engineering

In [None]:
df['BalanceSalaryRatio'] = df['Balance'] / df['EstimatedSalary']

In [None]:
sns.boxplot(data=df, x='Exited', y='BalanceSalaryRatio', hue='Exited')
plt.ylim(-1, 4)
plt.show()

In [None]:
df['TenureByAge'] = df['Tenure'] / df['Age']

In [None]:
sns.boxplot(data=df, x='Exited', y='TenureByAge', hue='Exited')
plt.ylim(-1, 1)
plt.show()

In [None]:
df.drop(['BalanceSalaryRatio', 'TenureByAge'], axis=1, inplace=True)

In [None]:
# getting object columns
obj_col = df.select_dtypes('object').columns

for i in obj_col:
  print(df[i].value_counts())
  print("----"*20)

# label_encoding (Gender)
df['Gender'] = df['Gender'].map({'Male': 0, "Female": 1})

In [None]:
# one hot Encoding
df_ = pd.get_dummies(data=df, columns=['Geography'])

In [None]:
df.head()

In [None]:
scaler_col = ['CreditScore', 'Balance', 'Age', 'EstimatedSalary', 'Tenure', 'NumOfProducts']

In [None]:
# Scaling the data
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler()

In [None]:
df_[scaler_col] = scaler.fit_transform(df_[scaler_col])

In [None]:
df_.head()

In [None]:
# splitting data into dependent and independent variable

x = df_.drop('Exited', axis=1)
y = df_['Exited'].values

In [None]:
# splitting data into training and testing datasets

from sklearn.model_selection import train_test_split

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=44)

In [None]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

## Buiding ANN model

In [None]:
from tensorflow import keras

In [None]:
model = keras.Sequential()

In [None]:
model.add(keras.layers.Dense(units=32, activation='relu', input_shape=(12,)))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(units=16, activation='relu'))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.Dense(units=8, activation='relu'))

model.add(keras.layers.Dense(units=1, activation='sigmoid'))

In [None]:
# Compile the model

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
history = model.fit(x_train, y_train, epochs=20, batch_size=32, verbose=2, validation_data=(x_test, y_test))

In [None]:
y_pred = model.predict(x_test)

In [None]:
pred = np.where(y_pred > 0.5, 1, 0)

In [None]:
pred

In [None]:
from sklearn import metrics

In [None]:
print(metrics.accuracy_score(y_test, pred))

In [None]:
sns.heatmap(metrics.confusion_matrix(y_test, pred), annot=True, fmt='d')
plt.show()

## H2O Auto ML

In [None]:
# ! pip install requests
# !pip install tabulate
# !pip install future

In [None]:
# ! pip install -f http://h2o-release.s3.amazonaws.com/h2o/latest_stable_Py.html h2o

In [None]:
import h2o
from h2o.automl import H2OAutoML
h2o.init(max_mem_size='16G')

In [None]:
df_2 = h2o.import_file('/content/drive/MyDrive/Data Science/Project-45 Bank Customer Churn Prediction Using H2O Auto ML/Churn_Modelling.csv')

In [None]:
df_2.head()

In [None]:
train_df, test_df = df_2.split_frame(ratios=[0.8])

In [None]:
y = 'Exited'
x = df_2.columns

In [None]:
x.remove(y)
x.remove('RowNumber')
x.remove('Surname')
x.remove('CustomerId')

In [None]:
# define model

aml = H2OAutoML(max_runtime_secs=1000, max_models=10, seed=10, verbosity='info', nfolds=2)

In [None]:
aml.train(x=x, y=y, training_frame=train_df)

In [None]:
lb = aml.leaderboard

In [None]:
lb

In [None]:
# getting all models

model_ids = list(aml.leaderboard['model_id'].as_data_frame().iloc[:, 0])

In [None]:
model_ids

In [None]:
aml.leader.model_performance(test_df)

In [None]:
h2o.get_model('StackedEnsemble_AllModels_1_AutoML_1_20221106_194329')

In [None]:
aml.leader

In [None]:
pred = aml.leader.predict(test_df)

In [None]:
pred