# Import necessary libraries

In [16]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Mount Google Drive to access files

In [17]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [18]:
with open ('/content/drive/MyDrive/Telco-Customer-Churn - Cleaned.csv') as file:
  df = pd.read_csv(file,sep=',')

In [19]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


# Feature Engineering

### Convert binary categorical features to numerical values

In [20]:
binary_features = ['gender', 'Partner', 'Dependents', 'PhoneService', 'PaperlessBilling', 'Churn']
for feature in binary_features:
    df[feature] = df[feature].apply(lambda x: 1 if x in ['Male', 'Yes'] else 0)


In [21]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,0,0,1,0,1,0,No phone service,DSL,No,...,No,No,No,No,Month-to-month,1,Electronic check,29.85,29.85,0
1,5575-GNVDE,1,0,0,0,34,1,No,DSL,Yes,...,Yes,No,No,No,One year,0,Mailed check,56.95,1889.5,0
2,3668-QPYBK,1,0,0,0,2,1,No,DSL,Yes,...,No,No,No,No,Month-to-month,1,Mailed check,53.85,108.15,1
3,7795-CFOCW,1,0,0,0,45,0,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,0,Bank transfer (automatic),42.3,1840.75,0
4,9237-HQITU,0,0,0,0,2,1,No,Fiber optic,No,...,No,No,No,No,Month-to-month,1,Electronic check,70.7,151.65,1


### One-Hot Encoding for categorical variables

In [22]:
categorical_features = ['MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaymentMethod']
df = pd.get_dummies(df, columns=categorical_features)

In [24]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,PaperlessBilling,MonthlyCharges,TotalCharges,...,StreamingMovies_No,StreamingMovies_No internet service,StreamingMovies_Yes,Contract_Month-to-month,Contract_One year,Contract_Two year,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,7590-VHVEG,0,0,1,0,1,0,1,29.85,29.85,...,True,False,False,True,False,False,False,False,True,False
1,5575-GNVDE,1,0,0,0,34,1,0,56.95,1889.5,...,True,False,False,False,True,False,False,False,False,True
2,3668-QPYBK,1,0,0,0,2,1,1,53.85,108.15,...,True,False,False,True,False,False,False,False,False,True
3,7795-CFOCW,1,0,0,0,45,0,0,42.3,1840.75,...,True,False,False,False,True,False,True,False,False,False
4,9237-HQITU,0,0,0,0,2,1,1,70.7,151.65,...,True,False,False,True,False,False,False,False,True,False


#### Create interaction feature

In [25]:
df['MonthlyCharges_Tenure'] = df['MonthlyCharges'] * df['tenure']

In [27]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,PaperlessBilling,MonthlyCharges,TotalCharges,...,StreamingMovies_No internet service,StreamingMovies_Yes,Contract_Month-to-month,Contract_One year,Contract_Two year,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,MonthlyCharges_Tenure
0,7590-VHVEG,0,0,1,0,1,0,1,29.85,29.85,...,False,False,True,False,False,False,False,True,False,29.85
1,5575-GNVDE,1,0,0,0,34,1,0,56.95,1889.5,...,False,False,False,True,False,False,False,False,True,1936.3
2,3668-QPYBK,1,0,0,0,2,1,1,53.85,108.15,...,False,False,True,False,False,False,False,False,True,107.7
3,7795-CFOCW,1,0,0,0,45,0,0,42.3,1840.75,...,False,False,False,True,False,True,False,False,False,1903.5
4,9237-HQITU,0,0,0,0,2,1,1,70.7,151.65,...,False,False,True,False,False,False,False,True,False,141.4


#### Feature Scaling for tenure, MonthlyCharges, and TotalCharges

In [28]:
scaler = MinMaxScaler()
df[['tenure', 'MonthlyCharges', 'TotalCharges']] = scaler.fit_transform(df[['tenure', 'MonthlyCharges', 'TotalCharges']])


In [29]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,PaperlessBilling,MonthlyCharges,TotalCharges,...,StreamingMovies_No internet service,StreamingMovies_Yes,Contract_Month-to-month,Contract_One year,Contract_Two year,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check,MonthlyCharges_Tenure
0,7590-VHVEG,0,0,1,0,0.013889,0,1,0.115423,0.001275,...,False,False,True,False,False,False,False,True,False,29.85
1,5575-GNVDE,1,0,0,0,0.472222,1,0,0.385075,0.215867,...,False,False,False,True,False,False,False,False,True,1936.3
2,3668-QPYBK,1,0,0,0,0.027778,1,1,0.354229,0.01031,...,False,False,True,False,False,False,False,False,True,107.7
3,7795-CFOCW,1,0,0,0,0.625,0,0,0.239303,0.210241,...,False,False,False,True,False,True,False,False,False,1903.5
4,9237-HQITU,0,0,0,0,0.027778,1,1,0.521891,0.01533,...,False,False,True,False,False,False,False,True,False,141.4


# Save the processed dataset to a new CSV file

In [30]:
processed_file_path = '/content/drive/My Drive/processed_telco_customer_churn.csv'
df.to_csv(processed_file_path, index=False)


In [31]:
print(f"Processed dataset saved to: {processed_file_path}")

Processed dataset saved to: /content/drive/My Drive/processed_telco_customer_churn.csv
