# Feature Engineering

In [1]:
import pandas as pd

# Load cleaned data
data = pd.read_csv("cleaned_data.csv")

In [2]:
# --- 5.1 Response Delay ---
# Difference between first response and purchase time
if "First Response Time" in data.columns and "Date of Purchase" in data.columns:
    data["Date of Purchase"] = pd.to_datetime(data["Date of Purchase"])
    data["First Response Time"] = pd.to_datetime(data["First Response Time"])
    data["Response_Delay_Hours"] = (data["First Response Time"] - data["Date of Purchase"]).dt.total_seconds() / 3600

# --- 5.2 Resolution Delay ---
# Difference between resolution and first response
if "Time to Resolution" in data.columns and "First Response Time" in data.columns:
    data["Time to Resolution"] = pd.to_datetime(data["Time to Resolution"])
    data["Resolution_Delay_Hours"] = (data["Time to Resolution"] - data["First Response Time"]).dt.total_seconds() / 3600

# --- 5.3 Customer Age Group ---
if "Customer Age" in data.columns:
    bins = [0, 20, 30, 40, 50, 60, 70, 100]
    labels = ["0-20","21-30","31-40","41-50","51-60","61-70","70+"]
    data["Age_Group"] = pd.cut(data["Customer Age"], bins=bins, labels=labels)

# --- 5.4 Drop High-Text Columns (for now) ---
drop_cols = ["Ticket Description", "Ticket Subject", "Customer Name", "Customer Email"]
data = data.drop(columns=[col for col in drop_cols if col in data.columns])

# --- 5.5 Save Feature-Engineered Data ---
data.to_csv("feature_engineered_data.csv", index=False)

print("✅ Feature Engineering Complete. New dataset saved as 'feature_engineered_data.csv'")
print("\nNew Columns Added:")
print([col for col in data.columns if "Delay" in col or col == "Age_Group"])

✅ Feature Engineering Complete. New dataset saved as 'feature_engineered_data.csv'

New Columns Added:
['Response_Delay_Hours', 'Resolution_Delay_Hours', 'Age_Group']
