In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder,OrdinalEncoder,LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from feature_engineering import FeatureEngineering
from sklearn.pipeline import Pipeline
import pickle

In [None]:
df=pd.read_csv("telecom_customer_churn.csv")

In [3]:
df.drop(columns=['Customer ID'],inplace=True)
df.drop(columns=['Churn Category','Churn Reason'],inplace=True)
df.drop(columns=['City'],inplace=True)

In [4]:
numerical_cols=['Avg Monthly Long Distance Charges','Avg Monthly GB Download']
categorical_cols=['Offer','Multiple Lines','Internet Type','Online Security','Online Backup','Device Protection Plan','Premium Tech Support','Streaming TV','Streaming Movies','Streaming Music','Unlimited Data']
for col in numerical_cols:
    df[col].fillna(df[col].mean(),inplace=True)
for col in categorical_cols:
    df[col].fillna(df[col].mode()[0],inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mean(),inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0],inplace=True)


In [5]:
df=df[df['Customer Status']!='Joined']

In [6]:
le=LabelEncoder()
le.fit(df['Customer Status'])
df['Customer Status']=le.transform(df['Customer Status'])

In [7]:
X=df.drop(columns=['Customer Status'])
y=df['Customer Status']

In [8]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.1,random_state=42)

Creating Pipeline

In [9]:
nominal_cols=['Gender','Married','Offer','Phone Service','Multiple Lines','Internet Service','Internet Type','Online Security','Online Backup','Device Protection Plan','Premium Tech Support','Streaming TV','Streaming Movies','Streaming Music','Unlimited Data','Paperless Billing','Payment Method']
ordinal_cols=['Contract']

trf1=ColumnTransformer(transformers=[('tnf1',OrdinalEncoder(categories=[['Month-to-Month','One Year','Two Year']],dtype=int),ordinal_cols),('tnf2',OneHotEncoder(drop='first',sparse_output=False,dtype=int),nominal_cols)],remainder='passthrough')
trf1.set_output(transform="pandas")

trf2=FeatureEngineering()

trf3=RandomForestClassifier(class_weight='balanced',max_depth=30,max_features='log2', min_samples_split=5,n_estimators=200)

pipe=Pipeline([
    ('trf1',trf1),
    ('trf2',trf2),
    ('trf3',trf3)
])
    
pipe.fit(X_train,y_train)

The format of the columns of the 'remainder' transformer in ColumnTransformer.transformers_ will change in version 1.7 to match the format of the other transformers.
At the moment the remainder columns are stored as indices (of type int). With the same ColumnTransformer configuration, in the future they will be stored as column names (of type str).



In [10]:
with open("churn_model.pkl","wb") as f:
    pickle.dump(pipe,f)