In [None]:
import pandas as pd
import numpy as np

In [3]:
df =pd.read_csv(r'D:\Study\AI Advanced\Projects\Telco Customer Churn\Data\df_Processed')

In [4]:
df.head().T

Unnamed: 0,0,1,2,3,4
gender,Female,Male,Male,Male,Female
SeniorCitizen,0,0,0,0,0
Partner,Yes,No,No,No,No
Dependents,No,No,No,No,No
tenure,1,34,2,45,2
PhoneService,No,Yes,Yes,No,Yes
MultipleLines,No phone service,No,No,No phone service,No
InternetService,DSL,DSL,DSL,DSL,Fiber optic
OnlineSecurity,No,Yes,Yes,Yes,No
OnlineBackup,Yes,No,Yes,No,No


create behavioral features based on customer tenure and pricing sensitivity to help the model capture churn risk patterns.

In [5]:
df['IsNewCustomer'] = (df['tenure'] <=6).astype(int)

In [6]:
charge_threshold = df['MonthlyCharges'].mean()

df['IsHighCharge'] = (df['MonthlyCharges'] > charge_threshold).astype(int)

In [7]:
df[['tenure', 'MonthlyCharges', 'IsNewCustomer', 'IsHighCharge']].head()

Unnamed: 0,tenure,MonthlyCharges,IsNewCustomer,IsHighCharge
0,1,29.85,1,0
1,34,56.95,0,0
2,2,53.85,1,0
3,45,42.3,0,0
4,2,70.7,1,1


In [9]:
def phone_line_type(row):
    if row['PhoneService'] == 'No':
        return 'NoPhone'
    elif row['MultipleLines'] == 'Yes':
        return 'MultipleLines'
    else:
        return 'SingleLine'

df['PhoneLineType'] = df.apply(phone_line_type, axis=1)

In [10]:
df.drop(columns=['PhoneService', 'MultipleLines'], inplace=True)

PhoneService and MultipleLines were combined into a single feature to reduce redundancy and better represent phone line usage behavior.

In [11]:
internet_services = [
    'OnlineSecurity',
    'OnlineBackup',
    'DeviceProtection',
    'TechSupport',
    'StreamingTV',
    'StreamingMovies'
]

df['NumInternetServices'] = (
    df[internet_services] == 'Yes'
).sum(axis=1)

In [12]:
df.drop(columns=internet_services, inplace=True)

In [13]:
def family_status(row):
    if row['Dependents'] == 'Yes':
        return 'WithDependents'
    elif row['Partner'] == 'Yes':
        return 'PartnerOnly'
    else:
        return 'NoFamily'

df['FamilyStatus'] = df.apply(family_status, axis=1)

In [14]:
df.drop(columns=['Partner', 'Dependents'], inplace=True)

reduced feature redundancy by aggregating related services into higher-level behavioral features

In [15]:
df.head()

Unnamed: 0,gender,SeniorCitizen,tenure,InternetService,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,IsNewCustomer,IsHighCharge,PhoneLineType,NumInternetServices,FamilyStatus
0,Female,0,1,DSL,Month-to-month,Yes,Electronic check,29.85,29.85,No,1,0,NoPhone,1,PartnerOnly
1,Male,0,34,DSL,One year,No,Mailed check,56.95,1889.5,No,0,0,SingleLine,2,NoFamily
2,Male,0,2,DSL,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,1,0,SingleLine,2,NoFamily
3,Male,0,45,DSL,One year,No,Bank transfer (automatic),42.3,1840.75,No,0,0,NoPhone,3,NoFamily
4,Female,0,2,Fiber optic,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,1,1,SingleLine,0,NoFamily


In [16]:
df.shape

(7043, 15)

In [17]:
df['NewCustomer_HighCharge'] = (
    (df['IsNewCustomer'] == 1) & 
    (df['IsHighCharge'] == 1)
).astype(int)

In [18]:
df[['IsNewCustomer', 'IsHighCharge', 'NewCustomer_HighCharge']].head()

Unnamed: 0,IsNewCustomer,IsHighCharge,NewCustomer_HighCharge
0,1,0,0
1,0,0,0
2,1,0,0
3,0,0,0
4,1,1,1


In [19]:
df['IsMonthToMonth'] = (df['Contract'] == 'Month-to-month').astype(int)

In [20]:
df['MonthToMonth_HighCharge'] = (
    (df['IsMonthToMonth'] == 1) &
    (df['IsHighCharge'] == 1)
).astype(int)

In [21]:
df[['Contract', 'IsHighCharge', 'MonthToMonth_HighCharge']].head()

Unnamed: 0,Contract,IsHighCharge,MonthToMonth_HighCharge
0,Month-to-month,0,0
1,One year,0,0
2,Month-to-month,0,0
3,One year,0,0
4,Month-to-month,1,1


In [22]:
df.to_csv('df_featured' , index =False)