# Feature Engineering â€“ Telecom Customer Churn

## Purpose
This notebook prepares the cleaned telecom customer dataset for machine learning by encoding categorical variables, scaling numerical features, and creating a final feature matrix for churn prediction.


In [9]:
import pandas as pd
import numpy as np
import joblib

In [10]:
###cleaned data
df = pd.read_csv(r'C:\Users\AKSHAT\Telecom_Churn_Project DA\Data\Cleaned_dataset.csv')
selected_cols = [
    "tenure",
    "MonthlyCharges",
    "Contract",
    "InternetService",
    "PaymentMethod",
    "Churn"
]

df = df[selected_cols]


In [11]:
### separate target Variable
y = df['Churn']
x = df.drop('Churn',axis=1)

In [12]:
### encoding target variable
y = y.map({'Yes':1,'No':0})

In [13]:
categorial_cols = x.select_dtypes(include='object').columns
numerical_cols = x.select_dtypes(include=['int64','float64']).columns

In [14]:
###onehot
x_encoded = pd.get_dummies(
    x,
    columns=categorial_cols,
    drop_first=True
)

In [15]:
###scalling 
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_encoded[numerical_cols] = scaler.fit_transform(x_encoded[numerical_cols])
joblib.dump(scaler, r"C:\Users\AKSHAT\Telecom_Churn_Project DA/App/scaler.pkl")

['C:\\Users\\AKSHAT\\Telecom_Churn_Project DA/App/scaler.pkl']

In [16]:
### after encoding because Encoding creates binary columns Scaling binary columns is unnecessary

In [17]:
###combine features and targets
x_encoded['Churn'] = y

In [18]:
###save feature engineered dataset
x_encoded.to_csv(r'C:\Users\AKSHAT\Telecom_Churn_Project DA\Data\churn_features.csv',index=False)

## Feature Engineering Summary
- Separated target variable to prevent leakage
- Encoded categorical variables using one-hot encoding
- Scaled numerical features for model stability
- Prepared final ML-ready dataset

In [19]:
x_encoded.columns


Index(['tenure', 'MonthlyCharges', 'Contract_One year', 'Contract_Two year',
       'InternetService_Fiber optic', 'InternetService_No',
       'PaymentMethod_Credit card (automatic)',
       'PaymentMethod_Electronic check', 'PaymentMethod_Mailed check',
       'Churn'],
      dtype='object')

In [20]:
list(x_encoded.columns)


['tenure',
 'MonthlyCharges',
 'Contract_One year',
 'Contract_Two year',
 'InternetService_Fiber optic',
 'InternetService_No',
 'PaymentMethod_Credit card (automatic)',
 'PaymentMethod_Electronic check',
 'PaymentMethod_Mailed check',
 'Churn']

In [22]:
x_encoded["Churn"] = y
x_encoded.to_csv(r'C:\Users\AKSHAT\Telecom_Churn_Project DA\Data\churn_features.csv', index=False)
