In [20]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.preprocessing import ( StandardScaler,
                                    OneHotEncoder, OrdinalEncoder
                                    )
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import(   
                                Pipeline,
                                make_pipeline
                            )
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from imblearn.pipeline import Pipeline as ImbPipeline

In [21]:
df= pd.concat([
    pd.read_csv('customer_churn_dataset-training-master.csv'),
    pd.read_csv('customer_churn_dataset-testing-master.csv')
],
    axis=0)

df.reset_index(drop= True, inplace= True)
df= df.dropna()

In [22]:
subscription_map = {
    'Basic': 3,
    'Premium': 1,
    'Standard': 2
}

contract_map = {
    'Annual': 2,
    'Monthly': 3,
    'Quarterly': 1
}
df['Subscription Type']=df['Subscription Type'].map(subscription_map)
df['Contract Length']=df['Contract Length'].map(contract_map)

df['Subscription_Contract'] = (df['Subscription Type']  + (df['Contract Length']-1)**2) 

In [23]:
Irr_cols= ['CustomerID', 'Subscription Type', 'Contract Length']
df=df.drop(columns= Irr_cols, axis=1)

In [24]:
numerical_features = ['Age', 'Tenure', 'Usage Frequency', 'Support Calls', 'Payment Delay', 'Total Spend', 'Last Interaction', 'Subscription_Contract']
categorical_features_ohe = ['Gender']

num_pipeline = Pipeline(steps=[
    ('scale', StandardScaler())
])

ohe_pipeline = Pipeline(steps=[
    ('one-hot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])

column_transformer = ColumnTransformer(
    transformers=[
        ('numeric_pipeline', num_pipeline, numerical_features),
        ('one_hot_pipeline', ohe_pipeline, categorical_features_ohe)
    ],
    remainder='drop',
    n_jobs=-1
)

pipeline = ImbPipeline(steps=[
    ('preprocessor', column_transformer),  # Preprocess the data
    ('smote', SMOTE()),                   # Apply SMOTE
    ('classifier', RandomForestClassifier(random_state=42))  # Classifier
])

In [25]:
X = df.drop('Churn', axis=1)
y = df['Churn']

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,  random_state=42,  stratify=y) 

In [27]:
pipeline.fit(X_train, y_train)

In [28]:
pipeline.score(X_test, y_test)

0.9364620652797846

In [29]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [30]:
y_pred= pipeline.predict(X_test)

In [31]:
accuracy_score(y_test, y_pred)

0.9364620652797846

In [32]:
precision_score(y_test, y_pred)

0.8982795388586913

In [33]:
recall_score(y_test, y_pred)

0.998645252143532