In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.pipeline import Pipeline
from scikeras.wrappers import KerasClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense 
from tensorflow.keras.callbacks import EarlyStopping
import pickle

In [2]:
df = pd.read_csv('Churn_Modelling.csv')
df.head()
df.columns

Index(['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography',
       'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited'],
      dtype='object')

In [3]:
## drop the columns
df = df.drop(['RowNumber', 'CustomerId', 'Surname'],axis=1)

In [4]:
## encode the categorical variable
label_encoder = LabelEncoder()
df['Gender'] = label_encoder.fit_transform(df['Gender'])

In [5]:
## One-hot encoder
#create encoder
onehotencoder = OneHotEncoder(sparse_output=False)

# fit and transform
encoded = onehotencoder.fit_transform(df[['Geography']])

## get feature names
feature_names = onehotencoder.get_feature_names_out(['Geography'])

## create data frame with encoded columns
encoded_df = pd.DataFrame(encoded, columns=feature_names, index=df.index)

## drop original column and concatenate encoded columns
df = df.drop('Geography',axis=1)
df = pd.concat([df, encoded_df], axis=1)

df.head()

Unnamed: 0,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain
0,619,0,42,2,0.0,1,1,1,101348.88,1,1.0,0.0,0.0
1,608,0,41,1,83807.86,1,0,1,112542.58,0,0.0,0.0,1.0
2,502,0,42,8,159660.8,3,1,0,113931.57,1,1.0,0.0,0.0
3,699,0,39,1,0.0,2,0,0,93826.63,0,1.0,0.0,0.0
4,850,0,43,2,125510.82,1,1,1,79084.1,0,0.0,0.0,1.0


In [9]:
## train test split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

## scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
## define a function to create the model and try different parameters

def create_model(neurons=32,layers=1):
    model = Sequential()
    model.add(Dense(neurons, activation='relu', input_shape=X_train.shape[1],))

    for _ in range(layers-1):
        model.add(Dense(neurons, activation='relu'))

    model.add(Dense(1,activation='sigmoid'))
    model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

    return model

In [None]:
## create a keras classifier

model = KerasClassifier(build_fn=create_model, epochs=50, batch_size=10, verbose=0)

## define grid search parameters

param_grid = {
    'neurons': [16,32,64,128],
    'layers': [1,2],
    'epochs': [50,100]
}

## perform the grid search
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1,cv=3)
grid_res = grid.fit(X_train, y_train)

# print the best parameters
print(f"Best params are: {grid_res.best_score_} and {grid_res.best_params_}")

In [None]:
## save the pickle files
with open('encoder.pkl', 'wb') as file:
    pickle.dump(label_encoder, file)

with open('onehotencoder.pkl','wb') as file:
    pickle.dump(onehotencoder,file)

with open('scaler.pkl','wb') as file:
    pickle.dump(scaler, file)