In [1]:
import pandas as pd
import os
import numpy as np
import sys

## Load data

In [2]:


project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
src_path = os.path.join(project_root, "src")

if src_path not in sys.path:
    sys.path.append(src_path)
    
from utils.generate_dataset import generate_windows_dataset
from utils.features import decomp_and_features


data_dir_path = os.path.join("..", "data", "gridloss", "train.csv")
df = pd.read_csv(data_dir_path, index_col=0)
df.index = pd.to_datetime(df.index)

# Backfill missing data
df = df.bfill()

grid1_columns = ["grid1-load","grid1-loss","grid1-temp"]
window_size = 168 # 1 week
step_size = 24
num_ts = len(grid1_columns)
dataset_size = (df.shape[0] - window_size)//step_size + 1
force_calc_windows: bool = True

windows_data_paths = [os.path.join("..", "data", "gridloss", "windows", f"ts{i}_w{window_size}_s{step_size}_n{dataset_size}.csv") for i in range(num_ts)]
load_windows: bool = all([os.path.exists(path) for path in windows_data_paths])

if not load_windows or force_calc_windows:
    print("Generating windows dataset")
    data = generate_windows_dataset(df, window_size, step_size, grid1_columns)
else:
    # TODO: Add datetime index to the windows that are saved/loaded
    print("Loading windows dataset")
    data_transposed = [pd.read_csv(path).to_numpy() for path in windows_data_paths]
    data = np.asarray(data_transposed).transpose(1, 2, 0).tolist()
    data = [pd.DataFrame(data[i], columns=grid1_columns) for i in range(len(data))]

Generating windows dataset


100%|██████████| 724/724 [00:00<00:00, 33812.23it/s]


## Features and Decompositions

In [3]:
from utils.features import decomp_and_features

sp = 24 # STL parameter

features_paths = [os.path.join("..", "data", "gridloss", "features", f"features_ts{i}_w{window_size}_s{step_size}_n{dataset_size}.csv") for i in range(num_ts)]
load_features = all([os.path.exists(path) for path in features_paths])

# NOTE: Currently not using, saving or loading decompositions
if not load_features:
    # Calculate features
    decomps, features = decomp_and_features(data, series_periodicity=sp, dataset_size=dataset_size)
    print(np.asarray(decomps).shape, features.shape)
else:
    # Load features from file
    features_transposed = [pd.read_csv(path).to_numpy() for path in features_paths]
    features = np.asarray(features_transposed).transpose(1, 0, 2)
    print(features.shape)

100%|██████████| 724/724 [00:21<00:00, 33.16it/s]

(724, 3) (724, 3, 4)





## Time Series

In [14]:
start_ts_index = 636
target_ts_index = 105

In [15]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [16]:
fig = make_subplots(rows=3, cols=2, subplot_titles=("Original", "Target"))
fig.add_trace(go.Line(x=data[start_ts_index].index, y=data[start_ts_index]["grid1-load"], name="grid_load"), row=1, col=1)
fig.add_trace(go.Line(x=data[start_ts_index].index, y=data[start_ts_index]["grid1-loss"], name="grid_loss"), row=2, col=1)
fig.add_trace(go.Line(x=data[start_ts_index].index, y=data[start_ts_index]["grid1-temp"], name="grid_temp"), row=3, col=1)
fig.add_trace(go.Line(x=data[target_ts_index].index, y=data[target_ts_index]["grid1-load"], name="t_grid_load"), row=1, col=2)
fig.add_trace(go.Line(x=data[target_ts_index].index, y=data[target_ts_index]["grid1-loss"], name="t_grid_loss"), row=2, col=2)
fig.add_trace(go.Line(x=data[target_ts_index].index, y=data[target_ts_index]["grid1-temp"], name="t_grid_temp"), row=3, col=2)
fig.update_layout(height=600, width=800, title_text=f"{start_ts_index=} vs. {target_ts_index=}")
fig.show()

## Initial Univariate TS Transformation

In [37]:
from utils.transformations import manipulate_trend_component, manipulate_seasonal_component

Start off by transforming the grid load time series

In [38]:
start_features = features[start_ts_index]
target_features = features[target_ts_index]

start_decomps = decomps[start_ts_index]

# One of univariate time series in the mulitvariate time series
initial_transformed_ts_index = 0

# Multiplicative constants
f = 2.5 # trend determination
g = -1.5 # trend slope
h = 1.7 # trend linearity
k = 0.8 # seasonal determination

In [39]:
init_transformed_decomps = start_decomps[initial_transformed_ts_index]

init_new_trend = manipulate_trend_component(init_transformed_decomps.trend, f, g, h, m)
init_new_seasonal = manipulate_seasonal_component(init_transformed_decomps.seasonal, k)

init_new_ts = init_new_trend + init_new_seasonal + init_transformed_decomps.resid

In [40]:
fig = make_subplots(rows=2, cols=1, subplot_titles=("Original", "Transformed"))
fig.add_trace(go.Line(x=data[start_ts_index].index, y=data[start_ts_index]["grid1-load"], name="grid_load"), row=1, col=1)
fig.add_trace(go.Line(x=data[start_ts_index].index, y=init_new_ts, name="transformed"), row=2, col=1)
fig.update_layout(height=600, width=800, title_text=f"{start_ts_index=} transformed")
fig.show()

## Multivariate TS Transformation

In [41]:
start_features = features[start_ts_index]
target_features = features[target_ts_index]

start_decomps = decomps[start_ts_index]

# One of univariate time series in the mulitvariate time series
initial_transformed_ts_index = 0

In [42]:
from utils.genetic_algorithm import GeneticAlgorithm

num_generations = 50
num_parents_mating = 1
sol_per_pop = 10
num_genes = 4 # f, g, h, k
init_range_low = -5
init_range_high = 5
parent_selection_type = "sss"
crossover_type = "single_point"
mutation_type = "random"
mutation_percent_genes = 25

In [43]:
from utils.features import (
    trend_strength,
    trend_slope,
    trend_linearity,
    seasonal_strength,
)

### Predict new target features with correlation based model

### Use predicted features to transform MTS

In [44]:
num_ts = len(start_features)

new_mts = []
new_mts_features = []

init_new_ts_features = [
    trend_strength(init_new_trend, init_transformed_decomps.resid),
    trend_slope(init_new_trend),
    trend_linearity(init_new_trend),
    seasonal_strength(init_new_seasonal, init_transformed_decomps.resid)
]

for i in range(num_ts):
    if i == initial_transformed_ts_index:
        new_mts.append(init_new_ts)
        new_mts_features.append(init_new_ts_features)
        continue
    
    print(f"Optimizing for ts{i}")
    # NOTE: This is based on TARGET FEATURES
    # TODO: Should be based on PREDICTED target features 
    # based on the initial transformation
    univariate_decomps = start_decomps[i]
    univariate_target_features = target_features[i]
    
    ga_instance = GeneticAlgorithm(
        original_time_series_decomp=univariate_decomps,
        target_features=univariate_target_features,
        num_generations=num_generations,
        num_parents_mating=num_parents_mating,
        sol_per_pop=sol_per_pop,
        num_genes=num_genes,
        init_range_low=init_range_low,
        init_range_high=init_range_high,
        parent_selection_type=parent_selection_type,
        crossover_type=crossover_type,
        mutation_type=mutation_type,
        mutation_percent_genes=mutation_percent_genes
    )
    
    ga_instance.run_genetic_algorithm()
    
    factors, _, _ = ga_instance.get_best_solution()
    
    new_trend = manipulate_trend_component(univariate_decomps.trend, factors[0], factors[1], factors[2], m=0)
    new_seasonal = manipulate_seasonal_component(univariate_decomps.seasonal, factors[3])
    
    new_ts = new_trend + new_seasonal + univariate_decomps.resid
    new_mts.append(new_ts)
    
    new_mts_features.append([
        trend_strength(new_trend, univariate_decomps.resid),
        trend_slope(new_trend),
        trend_linearity(new_trend),
        seasonal_strength(new_seasonal, univariate_decomps.resid)
    ])
    

Optimizing for ts1
Optimizing for ts2


In [45]:
target_features.shape, np.asarray(new_mts_features).shape

((3, 4), (3, 4))

In [51]:
feature_names = ["trend_strength", "trend_slope", "trend_linearity", "seasonal_strength"]
for i in range(target_features.shape[1]):
    print(f"Feature: {feature_names[i]}")
    for j in range(target_features.shape[0]):
        print(f"TS{j}: Target {float(target_features[j, i])} vs. Transformed {float(new_mts_features[j][i])} (Diff: {float(target_features[j, i]) - float(new_mts_features[j][i])})")

Feature: trend_strength
TS0: Target 0.8210722779993428 vs. Transformed 0.9869272165008968 (Diff: -0.16585493850155397)
TS1: Target 0.9212336673753605 vs. Transformed 0.9206753142723256 (Diff: 0.0005583531030348698)
TS2: Target 0.9605888301426402 vs. Transformed 0.9604372596432429 (Diff: 0.00015157049939729106)
Feature: trend_slope
TS0: Target -0.0009652615354878126 vs. Transformed -0.012292467322013272 (Diff: 0.01132720578652546)
TS1: Target -0.002999780835066143 vs. Transformed -0.004952713868723667 (Diff: 0.0019529330336575238)
TS2: Target 0.00022061644983968162 vs. Transformed 0.0002973724348854493 (Diff: -7.675598504576766e-05)
Feature: trend_linearity
TS0: Target 0.829719716134499 vs. Transformed 0.838828817925354 (Diff: -0.009109101790855001)
TS1: Target 0.9122721662550759 vs. Transformed 0.9118579734867355 (Diff: 0.00041419276834042673)
TS2: Target 0.8299256575899474 vs. Transformed 0.8299875967642871 (Diff: -6.193917433972018e-05)
Feature: seasonal_strength
TS0: Target 0.922473

In [46]:
fig = make_subplots(rows=3, cols=3, subplot_titles=("Original", "Transformed", "Target"))
fig.add_trace(go.Line(x=data[start_ts_index].index, y=data[start_ts_index]["grid1-load"], name="grid_load"), row=1, col=1)
fig.add_trace(go.Line(x=data[start_ts_index].index, y=data[start_ts_index]["grid1-loss"], name="grid_loss"), row=2, col=1)
fig.add_trace(go.Line(x=data[start_ts_index].index, y=data[start_ts_index]["grid1-temp"], name="grid_temp"), row=3, col=1)
fig.add_trace(go.Line(x=data[start_ts_index].index, y=new_mts[0], name="transformed"), row=1, col=2)
fig.add_trace(go.Line(x=data[start_ts_index].index, y=new_mts[1], name="transformed"), row=2, col=2)
fig.add_trace(go.Line(x=data[start_ts_index].index, y=new_mts[2], name="transformed"), row=3, col=2)
fig.add_trace(go.Line(x=data[target_ts_index].index, y=data[target_ts_index]["grid1-load"], name="t_grid_load"), row=1, col=3)
fig.add_trace(go.Line(x=data[target_ts_index].index, y=data[target_ts_index]["grid1-loss"], name="t_grid_loss"), row=2, col=3)
fig.add_trace(go.Line(x=data[target_ts_index].index, y=data[target_ts_index]["grid1-temp"], name="t_grid_temp"), row=3, col=3)
fig.update_layout(height=600, width=800, title_text=f"{start_ts_index=} vs. {target_ts_index=}")
fig.show()