In [28]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Load the data
data = pd.read_csv("Churn_Modelling.csv")

# Encode 'Gender' column
l = LabelEncoder()
data['Gender'] = l.fit_transform(data['Gender'])

# ONE HOT ENCODING
o = OneHotEncoder(handle_unknown='ignore')
geo = o.fit_transform(data[['Geography']]).toarray()
geo_df = pd.DataFrame(geo, columns=o.get_feature_names_out(['Geography']))

data = pd.concat([data.drop(['Geography'], axis=1), geo_df], axis=1)

# Separate features and target
x = data.drop(['Exited', 'RowNumber', 'CustomerId', 'Surname'], axis=1)
y = data['Exited']

# Split the data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

# CREATING A MODEL FROM SCRATCH
def create_model(neurons=32, layers=1):
    model = Sequential()
    model.add(Dense(neurons, activation='relu', input_shape=(x_train.shape[1],)))
    
    for _ in range(layers-1):
        model.add(Dense(neurons, activation='relu'))
        
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Define the parameter grid
param_grid = {
    'neurons': [16, 32, 64],
    'layers': [1, 2, 3],
    'epochs': [50, 75],
    'batch_size': [32, 64]
}

# Perform manual grid search
best_score = 0
best_params = {}

for neurons in param_grid['neurons']:
    for layers in param_grid['layers']:
        for epochs in param_grid['epochs']:
            for batch_size in param_grid['batch_size']:
                model = create_model(neurons=neurons, layers=layers)
                history = model.fit(
                    x_train_scaled, y_train,
                    epochs=epochs,
                    batch_size=batch_size,
                    validation_split=0.2,
                    verbose=0
                )
                val_accuracy = max(history.history['val_accuracy'])
                
                if val_accuracy > best_score:
                    best_score = val_accuracy
                    best_params = {
                        'neurons': neurons,
                        'layers': layers,
                        'epochs': epochs,
                        'batch_size': batch_size
                    }

# Print best performance
print("Best: %f using %s" % (best_score, best_params))

# Train the final model with best parameters
final_model = create_model(neurons=best_params['neurons'], layers=best_params['layers'])
final_model.fit(
    x_train_scaled, y_train,
    epochs=best_params['epochs'],
    batch_size=best_params['batch_size'],
    verbose=0
)

# Evaluate on test set
test_loss, test_accuracy = final_model.evaluate(x_test_scaled, y_test)
print(f"Test accuracy: {test_accuracy}")




Best: 0.865625 using {'neurons': 16, 'layers': 2, 'epochs': 75, 'batch_size': 64}
Test accuracy: 0.8560000061988831
