# 🧱 02 - Feature Engineering
This notebook handles data cleaning, transformation, and preparation for model training.

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Load dataset
file_path = '../data/Telco-Customer-Churn.csv'
df = pd.read_csv(file_path)
df.head()

## 🧼 Handle Missing and Invalid Data

In [None]:
# Convert TotalCharges to numeric and fill missing values
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df['TotalCharges'].fillna(df['TotalCharges'].median(), inplace=True)

## 🔁 Encode Binary Categorical Variables

In [None]:
binary_cols = ['Partner', 'Dependents', 'PhoneService', 'PaperlessBilling', 'Churn']
for col in binary_cols:
    df[col] = df[col].map({'Yes': 1, 'No': 0})

## 🧠 One-Hot Encode Multi-Class Categorical Variables

In [None]:
# Drop customerID and encode categorical columns
df.drop('customerID', axis=1, inplace=True)
df = pd.get_dummies(df, drop_first=True)

## 📏 Feature Scaling

In [None]:
scaler = StandardScaler()
numeric_cols = ['tenure', 'MonthlyCharges', 'TotalCharges']
df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

## 🎯 Split into Features and Target

In [None]:
X = df.drop('Churn', axis=1)
y = df['Churn']

# Save for later (optional)
X.to_csv('../data/features.csv', index=False)
y.to_csv('../data/labels.csv', index=False)

✅ Now the data is ready for model training!