In [3]:
# ------------------------------ Importing the Required Libraries ------------------------------
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# ------------------------------ Importing KaggleHub for Dataset Download ------------------------------
import kagglehub  # Import KaggleHub to download datasets from Kaggle

# Download the dataset 'Churn_Modelling.csv' from Kaggle using KaggleHub
path = kagglehub.dataset_download("aakash50897/churn-modellingcsv")

# Print the path where the dataset has been saved after download
print("Path to dataset files:", path)

# ------------------------------ Fixing the File Path ------------------------------
# The dataset contains a CSV file. We need to specify the correct CSV file inside the downloaded folder.
csv_file_path = path + '/Churn_Modelling.csv'  # Add the filename to the directory path

# Load the dataset into pandas DataFrame
dataset = pd.read_csv(csv_file_path)

# ------------------------------ Preparing the Data for Model ------------------------------
X = dataset.iloc[:, 3:13].values  # Selecting feature columns
y = dataset.iloc[:, 13].values    # Selecting the target variable column 'Exited'

# ------------------------------ Part-1: Data Preprocessing ------------------------------
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

# Encoding categorical data for 'Geography' and 'Gender' columns
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])  # Encoding 'Geography' column

labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])  # Encoding 'Gender' column

# OneHotEncoding for 'Geography' column using ColumnTransformer
ct = ColumnTransformer(
    transformers=[('one_hot', OneHotEncoder(), [1])],  # Apply OneHotEncoder to the 'Geography' column (index 1)
    remainder='passthrough'  # Keep the other columns as is
)

X = ct.fit_transform(X)

# Avoid the first column to prevent multicollinearity (Dummy variable trap)
X = X[:, 1:]

# ------------------------------ Splitting the Data into Training and Test Sets ------------------------------
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# ------------------------------ Feature Scaling ------------------------------
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

# ------------------------------ Building the ANN ------------------------------
import keras
from keras.models import Sequential
from keras.layers import Dense

# Initializing the ANN
classifier = Sequential()

# Adding the input layer and the first hidden layer
classifier.add(Dense(activation='relu', input_dim=11, kernel_initializer='uniform', units=6))

# Adding the second hidden layer
classifier.add(Dense(activation='relu', kernel_initializer='uniform', units=6))

# Adding the output layer
classifier.add(Dense(activation='sigmoid', kernel_initializer='uniform', units=1))

# Compiling the ANN
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Fitting the ANN to the training set
classifier.fit(X_train, y_train, batch_size=10, epochs=100)

# Predicting the Test set results
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)

# Making the confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

print("Confusion Matrix:")
print(cm)


Path to dataset files: /kaggle/input/churn-modellingcsv


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7779 - loss: 0.5735
Epoch 2/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7952 - loss: 0.4257
Epoch 3/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7997 - loss: 0.4133
Epoch 4/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8237 - loss: 0.4115
Epoch 5/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8237 - loss: 0.4268
Epoch 6/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8263 - loss: 0.4256
Epoch 7/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8314 - loss: 0.4044
Epoch 8/100
[1m800/800[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8296 - loss: 0.4158
Epoch 9/100
[1m800/800[0m [32