In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

# -------------------------------
# 1. Load Dataset
# -------------------------------
df = pd.read_csv("Raw_data_copy.csv")

# -------------------------------
# 2. Convert Timestamp & Extract Features
# -------------------------------
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

df['Hour'] = df['Timestamp'].dt.hour
df['DayOfWeek'] = df['Timestamp'].dt.dayofweek
df['Month'] = df['Timestamp'].dt.month

# -------------------------------
# 3. Label Encoding
# -------------------------------
le_id = LabelEncoder()
le_type = LabelEncoder()
le_occ = LabelEncoder()

df['Building_ID'] = le_id.fit_transform(df['Building_ID'])
df['Building_Type'] = le_type.fit_transform(df['Building_Type'])
df['Occupancy_Level'] = le_occ.fit_transform(df['Occupancy_Level'])

# -------------------------------
# 4. Add Lag Features (Improves Model)
# -------------------------------
df['lag_1'] = df['Energy_Usage (kWh)'].shift(1)
df['lag_2'] = df['Energy_Usage (kWh)'].shift(2)

# Rolling average (smoothing)
df['rolling_mean_3'] = df['Energy_Usage (kWh)'].rolling(window=3).mean()

# -------------------------------
# 5. Remove rows with NA from lag features
# -------------------------------
df.dropna(inplace=True)

# -------------------------------
# 6. Final Preprocessed Dataset
# -------------------------------
print(df.head())
print("\nFinal Shape:", df.shape)

# Optional: save processed dataset
df.to_csv("preprocessed_energy_data.csv", index=False)


            Timestamp  Building_ID  Energy_Usage (kWh)  Temperature (Â°C)  \
2 2025-01-01 02:00:00            0              187.21             -1.33   
3 2025-01-01 03:00:00            0              262.23              0.24   
4 2025-01-01 04:00:00            0              472.97              5.44   
5 2025-01-01 05:00:00            0              198.15             -7.64   
6 2025-01-01 06:00:00            0              369.35             34.27   

   Humidity (%)  Building_Type  Occupancy_Level  Hour  DayOfWeek  Month  \
2         37.74              2                0     2          2      1   
3         39.97              2                0     3          2      1   
4         89.29              2                2     4          2      1   
5         57.01              2                2     5          2      1   
6         33.01              2                1     6          2      1   

    lag_1   lag_2  rolling_mean_3  
2  230.76  121.30      179.756667  
3  187.21  230.76  