In [12]:
import pandas as pd
import numpy as np

In [13]:
hourly_df = pd.read_csv(
    "../data/processed/hourly_device_energy.csv",
    parse_dates=['timestamp']
)

In [14]:
hourly_df.head()
hourly_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 87828 entries, 0 to 87827
Data columns (total 3 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   Appliance Type            87828 non-null  object        
 1   timestamp                 87828 non-null  datetime64[ns]
 2   Energy Consumption (kWh)  87828 non-null  float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 2.0+ MB


In [15]:
hourly_df = hourly_df.set_index('timestamp')

In [16]:
hourly_df['hour'] = hourly_df.index.hour
hourly_df['day'] = hourly_df.index.day
hourly_df['weekday'] = hourly_df.index.weekday
hourly_df['month'] = hourly_df.index.month

In [17]:
hourly_df['lag_1'] = hourly_df['Energy Consumption (kWh)'].shift(1)
hourly_df['lag_24'] = hourly_df['Energy Consumption (kWh)'].shift(24)

In [18]:
hourly_df['rolling_mean_24'] = (
    hourly_df['Energy Consumption (kWh)']
    .rolling(window=24)
    .mean()
)

In [19]:
hourly_df = hourly_df.dropna()

In [20]:
appliances = hourly_df['Appliance Type'].unique()

print(f"Total appliances detected: {len(appliances)}")
print("Appliances:", appliances)

Total appliances detected: 10
Appliances: ['Air Conditioning' 'Computer' 'Dishwasher' 'Fridge' 'Heater' 'Lights'
 'Microwave' 'Oven' 'TV' 'Washing Machine']


In [21]:
for appliance in appliances:
    
    print(f"\nProcessing appliance: {appliance}")
    
    appliance_df = hourly_df[
        hourly_df['Appliance Type'] == appliance
    ].copy()
    
    
    X = appliance_df.drop(
        ['Energy Consumption (kWh)', 'Appliance Type'],
        axis=1
    )
    y = appliance_df['Energy Consumption (kWh)']
    
    split_index = int(len(appliance_df) * 0.8)
    
    X_train = X.iloc[:split_index]
    X_test  = X.iloc[split_index:]
    y_train = y.iloc[:split_index]
    y_test  = y.iloc[split_index:]
    
    
    X_train.to_csv(
        f"../data/processed/X_train_{appliance}.csv",
        index=False
    )
    X_test.to_csv(
        f"../data/processed/X_test_{appliance}.csv",
        index=False
    )
    y_train.to_csv(
        f"../data/processed/y_train_{appliance}.csv",
        index=False
    )
    y_test.to_csv(
        f"../data/processed/y_test_{appliance}.csv",
        index=False
    )

print("\nFeature engineering completed for all appliances.")


Processing appliance: Air Conditioning

Processing appliance: Computer

Processing appliance: Dishwasher

Processing appliance: Fridge

Processing appliance: Heater

Processing appliance: Lights

Processing appliance: Microwave

Processing appliance: Oven

Processing appliance: TV

Processing appliance: Washing Machine

Feature engineering completed for all appliances.


In [23]:
hourly_df.head(35)

Unnamed: 0_level_0,Appliance Type,Energy Consumption (kWh),hour,day,weekday,month,lag_1,lag_24,rolling_mean_24
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-01-02 00:00:00,Air Conditioning,3.51,0,2,0,1,4.76,4.42,4.267083
2023-01-02 01:00:00,Air Conditioning,4.41,1,2,0,1,3.51,7.61,4.13375
2023-01-02 02:00:00,Air Conditioning,2.11,2,2,0,1,4.41,14.8,3.605
2023-01-02 03:00:00,Air Conditioning,3.47,3,2,0,1,2.11,4.19,3.575
2023-01-02 04:00:00,Air Conditioning,9.11,4,2,0,1,3.47,10.68,3.509583
2023-01-02 05:00:00,Air Conditioning,0.0,5,2,0,1,9.11,6.94,3.220417
2023-01-02 06:00:00,Air Conditioning,4.19,6,2,0,1,0.0,0.0,3.395
2023-01-02 07:00:00,Air Conditioning,6.29,7,2,0,1,4.19,0.0,3.657083
2023-01-02 08:00:00,Air Conditioning,0.0,8,2,0,1,6.29,0.0,3.657083
2023-01-02 09:00:00,Air Conditioning,4.61,9,2,0,1,0.0,2.17,3.75875
