In [2]:
import pandas as pd
import numpy as np

def engineer_features_pro(input_file):
    print(f"--- Step 2: Professional Feature Engineering from {input_file} ---")
    
    # 1. Load the data
    df = pd.read_csv(input_file, index_col='dt', parse_dates=True)
    
    # 2. Advanced Lags (History)
    # Yesterday's usage
    df['lag_1'] = df['Global_active_power'].shift(1)
    # Same day last week (Crucial for capturing weekly routines)
    df['lag_7'] = df['Global_active_power'].shift(7)
    
    # 3. Seasonal Trends (Proxy for Weather)
    # Since we don't have an external weather file, we use a Sine-Wave 
    # based on the month to represent the temperature cycle (Summer vs Winter)
    df['temp_proxy'] = np.sin(2 * np.pi * df.index.month / 12)
    
    # 4. Rolling Window
    df['rolling_mean_7'] = df['Global_active_power'].rolling(window=7).mean()
    
    # 5. Drop Leakage and Sensor Columns
    cols_to_drop = [
        'Global_intensity', 'Voltage', 'Global_reactive_power', 
        'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3'
    ]
    df.drop(columns=cols_to_drop, inplace=True, errors='ignore')
    
    # 6. Final Clean up
    df.dropna(inplace=True)
    
    # Save the professional feature set
    output_file = 'final_features_pro.csv'
    df.to_csv(output_file)
    
    print(f"Step 2 (Pro) Complete! Features created: {list(df.columns)}")
    return df

if __name__ == "__main__":
    engineer_features_pro('cleaned_daily_data.csv')

--- Step 2: Professional Feature Engineering from cleaned_daily_data.csv ---
Step 2 (Pro) Complete! Features created: ['Global_active_power', 'year', 'month', 'day_of_week', 'lag_1', 'lag_7', 'temp_proxy', 'rolling_mean_7']
