In [52]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [53]:
df=pd.read_csv('WA_Fn-UseC_-Telco-Customer-Churn.csv')

In [54]:
df.head()

Unnamed: 0,Customer Gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges
0,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85


In [55]:
df.columns

Index(['Customer Gender', 'SeniorCitizen', 'Partner', 'Dependents', 'tenure',
       'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
       'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV',
       'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod',
       'MonthlyCharges', 'TotalCharges'],
      dtype='object')

In [56]:
categorical_columns = {
            'Customer Gender': ['Female', 'Male'],
            'Partner': ['No', 'Yes'],
            'Dependents': ['No', 'Yes'],
            'PhoneService': ['No', 'Yes'],
            'MultipleLines': ['No', 'No phone service', 'Yes'],
            'InternetService': ['DSL', 'Fiber optic', 'No'],
            'OnlineSecurity': ['No', 'No internet service', 'Yes'],
            'OnlineBackup': ['No', 'No internet service', 'Yes'],
            'DeviceProtection': ['No', 'No internet service', 'Yes'],
            'TechSupport': ['No', 'No internet service', 'Yes'],
            'StreamingTV': ['No', 'No internet service', 'Yes'],
            'StreamingMovies': ['No', 'No internet service', 'Yes'],
            'Contract': ['Month-to-month', 'One year', 'Two year'],
            'PaperlessBilling': ['No', 'Yes'],
            'PaymentMethod': ['Bank transfer (automatic)', 'Credit card (automatic)', 'Electronic check', 'Mailed check'],
            'tenure_group': ['tenure_group_1 - 12', 'tenure_group_13 - 24', 'tenure_group_25 - 36', 'tenure_group_37 - 48', 'tenure_group_49 - 60', 'tenure_group_61 - 72']
        }

In [57]:
def tenure_group(tenure):
    if 1 <= tenure <= 12:
        return 'tenure_group_1 - 12'
    elif 13 <= tenure <= 24:
        return 'tenure_group_13 - 24'
    elif 25 <= tenure <= 36:
        return 'tenure_group_25 - 36'
    elif 37 <= tenure <= 48:
        return 'tenure_group_37 - 48'
    elif 49 <= tenure <= 60:
        return 'tenure_group_49 - 60'
    elif 61 <= tenure <= 72:
        return 'tenure_group_61 - 72'
    else:
        return 'tenure_group_61 - 72'

# Applying the function to create a new column
df['tenure_group'] = df['tenure'].apply(tenure_group)

In [58]:
df.head()

Unnamed: 0,Customer Gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,tenure_group
0,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,tenure_group_1 - 12


In [61]:
df_dummies = pd.get_dummies(df, columns=categorical_columns.keys(), drop_first=False)

In [62]:
df_dummies.head()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,TotalCharges,Customer Gender_Female,Partner_Yes,Dependents_No,PhoneService_No,MultipleLines_No phone service,InternetService_DSL,OnlineSecurity_No,OnlineBackup_Yes,DeviceProtection_No,TechSupport_No,StreamingTV_No,StreamingMovies_No,Contract_Month-to-month,PaperlessBilling_Yes,PaymentMethod_Electronic check,tenure_group_tenure_group_1 - 12
0,0,1,29.85,29.85,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True,True


In [63]:
all_possible_columns = []

In [64]:
for col, categories in categorical_columns.items():
    all_possible_columns.extend([f"{col}_{category}" for category in categories])

In [65]:
final_df = df_dummies.reindex(columns=all_possible_columns, fill_value=0)

In [66]:
non_categorical_columns = [
    'SeniorCitizen', 'tenure', 'MonthlyCharges', 'TotalCharges'
]
final_df = pd.concat([df[non_categorical_columns], final_df], axis=1)

In [67]:
final_df.head()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,TotalCharges,Customer Gender_Female,Customer Gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_tenure_group_1 - 12,tenure_group_tenure_group_13 - 24,tenure_group_tenure_group_25 - 36,tenure_group_tenure_group_37 - 48,tenure_group_tenure_group_49 - 60,tenure_group_tenure_group_61 - 72
0,0,1,29.85,29.85,True,0,0,True,True,0,...,0,0,True,0,True,0,0,0,0,0


In [68]:
final_df = final_df.replace({True: 1, False: 0})

In [69]:
final_df.head()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,TotalCharges,Customer Gender_Female,Customer Gender_Male,Partner_No,Partner_Yes,Dependents_No,Dependents_Yes,...,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,tenure_group_tenure_group_1 - 12,tenure_group_tenure_group_13 - 24,tenure_group_tenure_group_25 - 36,tenure_group_tenure_group_37 - 48,tenure_group_tenure_group_49 - 60,tenure_group_tenure_group_61 - 72
0,0,1,29.85,29.85,1,0,0,1,1,0,...,0,0,1,0,1,0,0,0,0,0


In [70]:
scaler=MinMaxScaler()
final_df=scaler.fit_transform(final_df)

In [72]:
final_df.head()

AttributeError: 'numpy.ndarray' object has no attribute 'head'