# Telco Customer Churn: Feature Engineering

This notebook focuses on creating new features and transforming existing ones to improve model performance.

## 1. Setup and Data Loading

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os
import yaml

# Add parent directory to path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from src.data_loader import TelcoDataLoader
from src.feature_engineer import FeatureEngineer
from src.preprocessor import DataPreprocessor

# Load config
with open('../config/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

# Load data
loader = TelcoDataLoader()
df = loader.load_raw_data()

# Basic cleaning
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df.dropna(subset=['TotalCharges'], inplace=True)

print(f"Dataset shape: {df.shape}")
df.head()

Successfully loaded data from c:\Users\IPK\Telco churn project 1\telco-churn-production\data\raw\telco_churn.csv
Dataset shape: (7032, 21)


Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


## 2. Feature Engineering

In [None]:
# Initialize feature engineer
feature_engineer = FeatureEngineer()

# Create new features
df_engineered = feature_engineer.transform(df.copy())

print(f"New features created:")
new_cols = set(df_engineered.columns) - set(df.columns)
print(new_cols)

df_engineered.head()

## 3. Save Engineered Features

In [11]:
# Save processed data
output_path = '../data/processed/telco_churn_engineered.csv'
os.makedirs(os.path.dirname(output_path), exist_ok=True)
df_engineered.to_csv(output_path, index=False)

print(f"✅ Engineered features saved to: {output_path}")
print(f"Final dataset shape: {df_engineered.shape}")

NameError: name 'df_engineered' is not defined