Support Vector Machine Model

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

Load the dataset

In [None]:
df = pd.read_csv('Bank Customer Churn Prediction Classification Dataset.csv')

# Display the first few rows of the DataFrame
df.head(10)

Unnamed: 0,customer_id,credit_score,country,gender,age,tenure (From how many years he/she is having bank acc in ABC Bank),Account balance,products_number (Number of Product from bank),credit_card (Is this customer have credit card ?),active_member (Is he/she is active Member of bank ?),estimated_salary,churn (Churn Status)
0,15634602,619,France,Female,42,2,0.0,1,1,1,,1
1,15647311,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,15619304,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,15701354,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,15737888,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0
5,15574012,645,Spain,Male,44,8,113755.78,2,1,0,149756.71,1
6,15592531,822,France,Male,50,7,0.0,2,1,1,10062.8,0
7,15656148,376,Germany,Female,29,4,115046.74,4,1,0,119346.88,1
8,15792365,501,,Male,44,4,142051.07,2,0,1,74940.5,0
9,15592389,684,France,Male,27,2,134603.88,1,1,1,71725.73,0


Data Preprocessing

In [None]:
# Drop irrelevant columns
df = df.drop(columns='customer_id', axis=1)

# One-hot encode categorical variables
categorical_columns = ['country', 'gender']
df = pd.get_dummies(df, columns=categorical_columns, drop_first=True)

# Fill missing values in numerical columns with the mean
numerical_columns = df.select_dtypes(include=['float64', 'int64']).columns
df[numerical_columns] = df[numerical_columns].fillna(df[numerical_columns].mean())

# Split the data into features (X) and target variable (y)
X = df.drop(columns=['churn (Churn Status)'])
y = df['churn (Churn Status)']

# Scale numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Display the first few rows of the DataFrame
df.head(10)

Unnamed: 0,credit_score,age,tenure (From how many years he/she is having bank acc in ABC Bank),Account balance,products_number (Number of Product from bank),credit_card (Is this customer have credit card ?),active_member (Is he/she is active Member of bank ?),estimated_salary,churn (Churn Status),country_?,country_France,country_Germany,country_Spain,gender_Male
0,619,42,2,0.0,1,1,1,100063.579469,1,0,1,0,0,0
1,608,41,1,83807.86,1,0,1,112542.58,0,0,0,0,1,0
2,502,42,8,159660.8,3,1,0,113931.57,1,0,1,0,0,0
3,699,39,1,0.0,2,0,0,93826.63,0,0,1,0,0,0
4,850,43,2,125510.82,1,1,1,79084.1,0,0,0,0,1,0
5,645,44,8,113755.78,2,1,0,149756.71,1,0,0,0,1,1
6,822,50,7,0.0,2,1,1,10062.8,0,0,1,0,0,1
7,376,29,4,115046.74,4,1,0,119346.88,1,0,0,1,0,0
8,501,44,4,142051.07,2,0,1,74940.5,0,0,0,0,0,1
9,684,27,2,134603.88,1,1,1,71725.73,0,0,1,0,0,1


Training the model

In [None]:
# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Model Implementation - SVM model
svm_model = SVC(kernel='rbf', random_state=42) # Radial Basis Function

# Train SVM Model
svm_model.fit(X_train, y_train)

# Make Predictions
svm_pred = svm_model.predict(X_test)

Evaluation

In [None]:
# Evaluation
accuracy = accuracy_score(y_test, svm_pred)
conf_matrix = confusion_matrix(y_test, svm_pred)
classification_rpt = classification_report(y_test, svm_pred)

# Print the evaluation metrics
print(f"Accuracy: {accuracy}")
print(f"Confusion Matrix:\n{conf_matrix}")
print(f"Classification Report:\n{classification_rpt}")

Accuracy: 0.8656343656343657
Confusion Matrix:
[[1587   42]
 [ 227  146]]
Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.97      0.92      1629
           1       0.78      0.39      0.52       373

    accuracy                           0.87      2002
   macro avg       0.83      0.68      0.72      2002
weighted avg       0.86      0.87      0.85      2002



Hyperparameter Tuning

In [None]:
# Define hyperparameters to tune
param_grid = {
    'C': [0.1, 1, 10],  # Regularization parameter
    'kernel': ['linear', 'rbf'],  # Kernel function
    'gamma': ['scale', 'auto'],  # Kernel coefficient for RBF kernel
}

# Initialize GridSearchCV
grid_search = GridSearchCV(SVC(random_state=42), param_grid, cv=5, scoring='accuracy')

# Perform Grid Search to find the best hyperparameters
grid_search.fit(X_train, y_train)

Evaluation using the best model

In [None]:
# Get best hyperparameters and best score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

# Print best hyperparameters and best score
print("Best Hyperparameters:", best_params)
print("Best Score:", best_score)

Best Hyperparameters: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}
Best Score: 0.8541979231730169


In [None]:
# Get predictions from the best model
best_model = grid_search.best_estimator_
best_pred = best_model.predict(X_test)

# Evaluation for the best model
accuracy_best = accuracy_score(y_test, best_pred)
conf_matrix_best = confusion_matrix(y_test, best_pred)
classification_rpt_best = classification_report(y_test, best_pred)

# Print the best hyperparameters and evaluation metrics
print(f"Accuracy with Best Model: {accuracy_best}")
print(f"Confusion Matrix with Best Model:\n{conf_matrix_best}")
print(f"Classification Report with Best Model:\n{classification_rpt_best}")

Accuracy with Best Model: 0.8656343656343657
Confusion Matrix with Best Model:
[[1587   42]
 [ 227  146]]
Classification Report with Best Model:
              precision    recall  f1-score   support

           0       0.87      0.97      0.92      1629
           1       0.78      0.39      0.52       373

    accuracy                           0.87      2002
   macro avg       0.83      0.68      0.72      2002
weighted avg       0.86      0.87      0.85      2002

