# Feature Engineering

Creating new features: Tenure groups, Total Services, Risk Score.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
df = pd.read_csv('../data/Telco-Customer-Churn.csv')

# Clean TotalCharges (from EDA)
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
# Drop missing (only 11 rows)
df.dropna(subset=['TotalCharges'], inplace=True)

df.head()

## 1. Tenure Cohorts
Group customers by how long they've stayed.

In [None]:
labels = ['0-12', '12-24', '24-48', '48-60', '60+']
bins = [0, 12, 24, 48, 60, 100]
df['TenureGroup'] = pd.cut(df['Tenure'], bins=bins, labels=labels)

sns.countplot(x='TenureGroup', hue='Churn', data=df)

## 2. Total Services
How many services does a customer have?

In [None]:
services = ['PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity', 
            'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies']

df['TotalServices'] = (df[services] == 'Yes').sum(axis=1)

sns.boxplot(x='Churn', y='TotalServices', data=df)

## 3. Contract Risk
Month-to-month is high risk.

In [None]:
risk_map = {'Month-to-month': 'High', 'One year': 'Medium', 'Two year': 'Low'}
df['ContractRisk'] = df['Contract'].map(risk_map)

pd.crosstab(df['ContractRisk'], df['Churn'], normalize='index').plot(kind='bar', stacked=True)

## 4. Average Monthly Spend
Total Charges / Tenure. Does it match MonthlyCharges?

In [None]:
# Avoid zero division
df['AvgMonthlySpend'] = df['TotalCharges'] / df['Tenure'].replace(0, 1)

# Check correlation with actual MonthlyCharges
df[['MonthlyCharges', 'AvgMonthlySpend']].corr()

## Save for modeling

In [None]:
df.to_csv('../data/processed_with_features.csv', index=False)