# Data Preprocessing for Modeling (Encoding & Scaling)

In [5]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load feature-engineered dataset
data = pd.read_csv("Dataset/feature_engineered_data.csv")

# --- 6.1 Encode Categorical Variables ---

# Columns to Label Encode (ordinal or categorical without many unique values)
label_cols = ["Ticket Status", "Ticket Priority", "Ticket Type", "Ticket Channel", "Age_Group"]

le = LabelEncoder()
for col in label_cols:
    if col in data.columns:
        data[col] = le.fit_transform(data[col].astype(str))

# Columns to One-Hot Encode (nominal categories with no order, like Gender & Product Purchased)
one_hot_cols = ["Customer Gender", "Product Purchased"]

data = pd.get_dummies(data, columns=[col for col in one_hot_cols if col in data.columns], drop_first=True)

# --- 6.2 Scale Numerical Features ---
num_cols = ["Customer Age", "Response_Delay_Hours", "Resolution_Delay_Hours"]

scaler = StandardScaler()
for col in num_cols:
    if col in data.columns:
        data[col] = scaler.fit_transform(data[[col]])

# --- 6.3 Save Final Preprocessed Dataset ---
data.to_csv("preprocessed_data.csv", index=False)

print("✅ Data Preprocessing Complete. New dataset saved as 'preprocessed_data.csv'")
print("Final Columns Ready for Modeling:")
print(data.columns)


✅ Data Preprocessing Complete. New dataset saved as 'preprocessed_data.csv'
Final Columns Ready for Modeling:
Index(['Customer Age', 'Date of Purchase', 'Ticket Type', 'Ticket Status',
       'Resolution', 'Ticket Priority', 'Ticket Channel',
       'First Response Time', 'Time to Resolution',
       'Customer Satisfaction Rating', 'Purchase_Year', 'Purchase_Month',
       'Purchase_Day', 'Response_Delay_Hours', 'Resolution_Delay_Hours',
       'Age_Group', 'Customer Gender_1', 'Customer Gender_2',
       'Product Purchased_1', 'Product Purchased_2', 'Product Purchased_3',
       'Product Purchased_4', 'Product Purchased_5', 'Product Purchased_6',
       'Product Purchased_7', 'Product Purchased_8', 'Product Purchased_9',
       'Product Purchased_10', 'Product Purchased_11', 'Product Purchased_12',
       'Product Purchased_13', 'Product Purchased_14', 'Product Purchased_15',
       'Product Purchased_16', 'Product Purchased_17', 'Product Purchased_18',
       'Product Purchased_19', 