In [4]:
import pandas as pd

# Load the dataset
df = pd.read_csv("C:/Users/Admin/Downloads/clean_data_after_eda.csv")

# Convert date columns to datetime
date_cols = ['date_activ', 'date_end']
df[date_cols] = df[date_cols].apply(pd.to_datetime, format='%Y-%m-%d')

# Create tenure feature
df['customer_tenure_days'] = (df['date_end'] - df['date_activ']).dt.days

# Drop unnecessary columns
drop_cols = ['id', 'date_modif_prod', 'date_renewal']
df.drop(columns=drop_cols, inplace=True)

# Feature Engineering
# Consumption trends
df['consumption_ratio'] = df['cons_last_month'] / (df['cons_12m'] + 1)
df['forecast_deviation'] = df['forecast_cons_12m'] - df['cons_12m']

# Price sensitivity
df['price_peak_vs_offpeak'] = df['forecast_price_energy_peak'] / (df['forecast_price_energy_off_peak'] + 1)
df['avg_yearly_price_var'] = df[['var_year_price_off_peak', 'var_year_price_peak', 'var_year_price_mid_peak']].mean(axis=1)
df['avg_6m_price_var'] = df[['var_6m_price_off_peak', 'var_6m_price_peak', 'var_6m_price_mid_peak']].mean(axis=1)

# Profitability metrics
df['profitability_ratio'] = df['margin_net_pow_ele'] / (df['margin_gross_pow_ele'] + 1)
df['power_demand_per_product'] = df['pow_max'] * df['nb_prod_act']

# Handling missing values
df.fillna(df.median(numeric_only=True), inplace=True)
df['channel_sales'] = df['channel_sales'].fillna('Unknown')
df['origin_up'] = df['origin_up'].fillna('Unknown')

# Save the processed dataset
df.to_csv("C:/Users/Admin/Downloads/processed_data.csv", index=False)

print("Feature engineering completed and saved to processed_data.csv")

Feature engineering completed and saved to processed_data.csv
