In [1]:
# Part 1 - Data Preprocessing

# Importing the libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

# Importing the dataset
# Make sure your CSV file is in the same directory or provide the correct path
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:13]  # Features, including categorical ones
y = dataset.iloc[:, 13]   # Target variable 'Exited'

# Encoding categorical data
# Use a ColumnTransformer for a cleaner approach
ct = ColumnTransformer(
    transformers=[('encoder', OneHotEncoder(), [1, 2])],  # Columns 1 ('Geography') and 2 ('Gender')
    remainder='passthrough'
)
X = ct.fit_transform(X)

# A common issue is a 'Female' column and a 'Male' column after encoding Gender.
# Drop one of them to avoid multicollinearity.
X = X[:, 1:]

# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# ---------------------------------------------------------------------------------------

# Part 2 - Building and Training the Logistic Regression Model

# Importing the Logistic Regression class
from sklearn.linear_model import LogisticRegression

# Creating the classifier object
classifier = LogisticRegression(random_state=0)

# Training the model on the training set
classifier.fit(X_train, y_train)

# ---------------------------------------------------------------------------------------

# Part 3 - Predicting and Evaluating the Model

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score

# Predicting the Test set results
y_pred = classifier.predict(X_test)

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)
print()
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

Confusion Matrix:
[[1526   69]
 [ 309   96]]

Model Accuracy: 0.81
