In [8]:
# Add project root to sys.path for src imports
import sys
import os
PROJECT_ROOT = os.path.abspath("..")
sys.path.append(PROJECT_ROOT)

# Core libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import joblib

# Import custom functions
from src.data_loader import load_raw_data, train_test_split_series

Load Raw Data

In [9]:
# Load raw CSV 
df = load_raw_data(os.path.join(PROJECT_ROOT, "data/raw/electricity.csv"))
df.head()

Unnamed: 0_level_0,MT_001,MT_002,MT_003,MT_004,MT_005,MT_006,MT_007,MT_008,MT_009,MT_010,...,MT_361,MT_362,MT_363,MT_364,MT_365,MT_366,MT_367,MT_368,MT_369,MT_370
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2011-01-01 00:15:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011-01-01 00:30:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011-01-01 00:45:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011-01-01 01:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2011-01-01 01:15:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Select Client Series

In [10]:
client_series = df["MT_001"].values.reshape(-1, 1)  # reshape for scaler
print("Series shape:", client_series.shape)

Series shape: (140256, 1)


Train-Test Split

In [11]:
# Split into train and test sets (time series safe)
train_size = 0.8  # 80% train
train_len = int(len(client_series) * train_size)

train = client_series[:train_len]
test = client_series[train_len:]

print("Train shape:", train.shape)
print("Test shape:", test.shape)

Train shape: (112204, 1)
Test shape: (28052, 1)


Scale Data

In [12]:
# Initialize scaler
scaler = MinMaxScaler()

# Fit on train, transform both train and test
train_scaled = scaler.fit_transform(train)
test_scaled = scaler.transform(test)

print("Train scaled min/max:", train_scaled.min(), train_scaled.max())

Train scaled min/max: 0.0 1.0


Create Sequences

In [13]:
# Function to create sequences for multi-step forecasting
def create_sequences(data, lookback=48, horizon=24):
    X, y = [], []
    for i in range(len(data) - lookback - horizon):
        X.append(data[i:i+lookback])
        y.append(data[i+lookback:i+lookback+horizon])
    return np.array(X), np.array(y)

lookback = 48   # past 48 hours
horizon  = 24   # predict next 24 hours

X_train, y_train = create_sequences(train_scaled, lookback, horizon)
X_test, y_test   = create_sequences(test_scaled, lookback, horizon)

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)


X_train shape: (112132, 48, 1)
y_train shape: (112132, 24, 1)
X_test shape: (27980, 48, 1)
y_test shape: (27980, 24, 1)


SAVE Processed Data

In [14]:
# Create processed folder if it doesn't exist
processed_folder = os.path.join(PROJECT_ROOT, "data/processed")
os.makedirs(processed_folder, exist_ok=True)

# Save numpy arrays
np.save(os.path.join(processed_folder, "X_train.npy"), X_train)
np.save(os.path.join(processed_folder, "y_train.npy"), y_train)
np.save(os.path.join(processed_folder, "X_test.npy"), X_test)
np.save(os.path.join(processed_folder, "y_test.npy"), y_test)

# Save scaler
joblib.dump(scaler, os.path.join(processed_folder, "scaler.pkl"))

print("Processed data saved successfully in:", processed_folder)

Processed data saved successfully in: D:\AI Development skills\DeepLearning_Project\data/processed
