In [85]:
import pandas as pd
import os
import numpy as np
import sys

In [86]:
project_root = os.path.abspath(os.path.join(os.getcwd(), "../"))
if project_root not in sys.path:
    sys.path.append(project_root)

## Load data

In [87]:
from src.utils.generate_dataset import generate_windows_dataset

data_dir_path = os.path.join("..", "data", "gridloss", "train.csv")
df = pd.read_csv(data_dir_path, index_col=0)
df.index = pd.to_datetime(df.index)

# Backfill missing data
df = df.bfill()

grid1_columns = ["grid1-load","grid1-loss","grid1-temp"]
window_size = 168 # 1 week
step_size = 24
num_ts = len(grid1_columns)
dataset_size = (df.shape[0] - window_size)//step_size + 1
force_calc_windows: bool = True

windows_data_paths = [os.path.join("..", "data", "gridloss", "windows", f"ts{i}_w{window_size}_s{step_size}_n{dataset_size}.csv") for i in range(num_ts)]
load_windows: bool = all([os.path.exists(path) for path in windows_data_paths])

if not load_windows or force_calc_windows:
    print("Generating windows dataset")
    data = generate_windows_dataset(df, window_size, step_size, grid1_columns)
else:
    # TODO: Add datetime index to the windows that are saved/loaded
    print("Loading windows dataset")
    data_transposed = [pd.read_csv(path).to_numpy() for path in windows_data_paths]
    data = np.asarray(data_transposed).transpose(1, 2, 0).tolist()
    data = [pd.DataFrame(data[i], columns=grid1_columns) for i in range(len(data))]

Generating windows dataset


100%|██████████| 724/724 [00:00<00:00, 2928.39it/s]


## Features and Decompositions

In [88]:
from src.utils.features import decomp_and_features

sp = 24 # STL parameter

features_paths = [os.path.join("..", "data", "gridloss", "features", f"features_ts{i}_w{window_size}_s{step_size}_n{dataset_size}.csv") for i in range(num_ts)]
load_features = all([os.path.exists(path) for path in features_paths])

# NOTE: Currently not using, saving or loading decompositions
if not load_features:
    # Calculate features
    decomps, features = decomp_and_features(data, series_periodicity=sp, dataset_size=dataset_size)
    print(np.asarray(decomps).shape, features.shape)
else:
    # Load features from file
    features_transposed = [pd.read_csv(path).to_numpy() for path in features_paths]
    features = np.asarray(features_transposed).transpose(1, 0, 2)
    print(features.shape)

100%|██████████| 724/724 [00:14<00:00, 50.78it/s]

(724, 3) (724, 3, 4)





## Time Series

In [89]:
start_ts_index = 636
target_ts_index = 105

In [90]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [91]:
fig = make_subplots(rows=3, cols=2, subplot_titles=("Original", "Target"))
fig.add_trace(go.Line(x=data[start_ts_index].index, y=data[start_ts_index]["grid1-load"], name="grid_load"), row=1, col=1)
fig.add_trace(go.Line(x=data[start_ts_index].index, y=data[start_ts_index]["grid1-loss"], name="grid_loss"), row=2, col=1)
fig.add_trace(go.Line(x=data[start_ts_index].index, y=data[start_ts_index]["grid1-temp"], name="grid_temp"), row=3, col=1)
fig.add_trace(go.Line(x=data[target_ts_index].index, y=data[target_ts_index]["grid1-load"], name="t_grid_load"), row=1, col=2)
fig.add_trace(go.Line(x=data[target_ts_index].index, y=data[target_ts_index]["grid1-loss"], name="t_grid_loss"), row=2, col=2)
fig.add_trace(go.Line(x=data[target_ts_index].index, y=data[target_ts_index]["grid1-temp"], name="t_grid_temp"), row=3, col=2)
fig.update_layout(height=600, width=800, title_text=f"{start_ts_index=} vs. {target_ts_index=}")
fig.show()


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.




## Initial Univariate TS Transformation

In [92]:
from src.utils.transformations import manipulate_trend_component, manipulate_seasonal_component

Start off by transforming the grid load time series

In [93]:
start_features = features[start_ts_index]
target_features = features[target_ts_index]

start_decomps = decomps[start_ts_index]

# One of univariate time series in the mulitvariate time series
initial_transformed_ts_index = 0

# Multiplicative constants
f = 2.5 # trend determination
g = -1.5 # trend slope
h = 1.7 # trend linearity
k = 0.8 # seasonal determination

In [94]:
init_transformed_decomps = start_decomps[initial_transformed_ts_index]

init_new_trend = manipulate_trend_component(init_transformed_decomps.trend, f, g, h, m=0)
init_new_seasonal = manipulate_seasonal_component(init_transformed_decomps.seasonal, k)

init_new_ts = init_new_trend + init_new_seasonal + init_transformed_decomps.resid

In [95]:
fig = make_subplots(rows=2, cols=1, subplot_titles=("Original", "Transformed"))
fig.add_trace(go.Line(x=data[start_ts_index].index, y=data[start_ts_index]["grid1-load"], name="grid_load"), row=1, col=1)
fig.add_trace(go.Line(x=data[start_ts_index].index, y=init_new_ts, name="transformed"), row=2, col=1)
fig.update_layout(height=600, width=800, title_text=f"{start_ts_index=} transformed")
fig.show()


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.




## Multivariate TS Transformation

In [96]:
start_features = features[start_ts_index]
target_features = features[target_ts_index]

start_decomps = decomps[start_ts_index]

# One of univariate time series in the mulitvariate time series
initial_transformed_ts_index = 0

In [97]:
from src.utils.genetic_algorithm import GeneticAlgorithm

num_generations = 50
num_parents_mating = 1
sol_per_pop = 10
num_genes = 4 # f, g, h, k
init_range_low = -5
init_range_high = 5
parent_selection_type = "sss"
crossover_type = "single_point"
mutation_type = "random"
mutation_percent_genes = 25

In [98]:
from src.utils.features import (
    trend_strength,
    trend_slope,
    trend_linearity,
    seasonal_strength,
)

### Predict new target features with correlation based model

Prepare training data to fit the correlation model

In [99]:
mts_features_reshape = features.reshape((features.shape[0], features.shape[1]*features.shape[2]))

ts_indices_to_names = {0: 'grid-load', 1: 'grid-loss', 2: 'grid-temp'}

training_data = []
for idx in range(features.shape[0]):
    for ts_idx in range(features.shape[1]):
        row = {
            'index': idx,
            'ts_name': ts_indices_to_names[ts_idx],
            'trend-strength': features[idx, ts_idx, 0],
            'trend-slope': features[idx, ts_idx, 1],
            'trend-linearity': features[idx, ts_idx, 2],
            'seasonal-strength': features[idx, ts_idx, 3],
        }
        training_data.append(row)

df = pd.DataFrame(training_data)

feature_df = df.pivot_table(
    index='index',
    columns='ts_name',
    values=['trend-strength', 'trend-slope', 'trend-linearity',	'seasonal-strength']
)

feature_df.columns = [f"{ts}_{feature}" for feature, ts in feature_df.columns]

# Extract time series names and their features
ts_names = df['ts_name'].unique()
features = ['trend-strength', 'trend-slope', 'trend-linearity',	'seasonal-strength']


# Create the ordered column list
ordered_columns = [f"{ts}_{feature}" for ts in ts_names for feature in features]

# Reorder columns based on the ordered list
feature_df = feature_df[ordered_columns]

feature_df.head()

Unnamed: 0_level_0,grid-load_trend-strength,grid-load_trend-slope,grid-load_trend-linearity,grid-load_seasonal-strength,grid-loss_trend-strength,grid-loss_trend-slope,grid-loss_trend-linearity,grid-loss_seasonal-strength,grid-temp_trend-strength,grid-temp_trend-slope,grid-temp_trend-linearity,grid-temp_seasonal-strength
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,0.862437,-0.000562,0.185495,0.944772,0.849588,-0.001031,0.19425,0.933,0.945474,7.3e-05,0.1405,0.375897
1,0.860362,-9.1e-05,0.010127,0.968701,0.8541,-0.000102,0.004129,0.964025,0.89013,-3e-05,0.065147,0.399718
2,0.86394,-0.000706,0.348781,0.956525,0.856937,-0.001179,0.329179,0.950966,0.822561,1.2e-05,0.013555,0.295423
3,0.913748,-0.001013,0.719954,0.974909,0.91538,-0.001741,0.717535,0.973655,0.905197,-5.1e-05,0.097971,0.183332
4,0.907825,3.2e-05,0.00043,0.95288,0.910843,0.000123,0.002037,0.949127,0.944452,-0.000144,0.409159,0.257248


We do not need to split into train/test data, since our test data will in this case be transformed time series which do not exist in the training data.

In [100]:
from src.data_transformations.generation_of_supervised_pairs import generate_supervised_dataset_from_original_and_target_dist

train_supervised_dataset = generate_supervised_dataset_from_original_and_target_dist(feature_df, feature_df)

In [101]:
train_supervised_dataset.head()

Unnamed: 0,original_index,original_grid-load_trend-strength,original_grid-load_trend-slope,original_grid-load_trend-linearity,original_grid-load_seasonal-strength,original_grid-loss_trend-strength,original_grid-loss_trend-slope,original_grid-loss_trend-linearity,original_grid-loss_seasonal-strength,original_grid-temp_trend-strength,...,delta_grid-load_trend-linearity,delta_grid-load_seasonal-strength,delta_grid-loss_trend-strength,delta_grid-loss_trend-slope,delta_grid-loss_trend-linearity,delta_grid-loss_seasonal-strength,delta_grid-temp_trend-strength,delta_grid-temp_trend-slope,delta_grid-temp_trend-linearity,delta_grid-temp_seasonal-strength
1,0,0.862437,-0.000562,0.185495,0.944772,0.849588,-0.001031,0.19425,0.933,0.945474,...,0.0,0.0,0.004512,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0,0.862437,-0.000562,0.185495,0.944772,0.849588,-0.001031,0.19425,0.933,0.945474,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.080474
3,0,0.862437,-0.000562,0.185495,0.944772,0.849588,-0.001031,0.19425,0.933,0.945474,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0,0.862437,-0.000562,0.185495,0.944772,0.849588,-0.001031,0.19425,0.933,0.945474,...,0.0,0.0,0.0,0.0,0.0,0.016127,0.0,0.0,0.0,0.0
5,0,0.862437,-0.000562,0.185495,0.944772,0.849588,-0.001031,0.19425,0.933,0.945474,...,0.0,0.010382,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [102]:
start_features

array([[ 9.16380241e-01,  1.42513101e-03,  4.44567027e-01,
         9.45534119e-01],
       [ 9.20431621e-01,  1.99605761e-03,  4.57404102e-01,
         9.44017050e-01],
       [ 9.06345265e-01, -1.58418242e-04,  5.98676700e-01,
         7.29136803e-01]])

In [103]:
target_features

array([[ 8.21072278e-01, -9.65261535e-04,  8.29719716e-01,
         9.22473739e-01],
       [ 9.21233667e-01, -2.99978084e-03,  9.12272166e-01,
         9.16071217e-01],
       [ 9.60588830e-01,  2.20616450e-04,  8.29925658e-01,
         8.11126420e-01]])

In [104]:
init_new_ts_features = np.array(
    [
        trend_strength(init_new_trend, init_transformed_decomps.resid),
        trend_slope(init_new_trend),
        trend_linearity(init_new_trend),
        seasonal_strength(init_new_seasonal, init_transformed_decomps.resid),
    ]
)
init_new_ts_features

array([ 0.98692722, -0.01229247,  0.83882882,  0.91708173])

In [105]:
pred_columns = list(train_supervised_dataset.columns)

delta_features = init_new_ts_features - np.array(start_features[0])

# NOTE: Hard coded row for now

pred_row_list = [
        start_ts_index,
        *start_features[0],
        *start_features[1],
        *start_features[2],
        target_ts_index,
        *target_features[0],
        *target_features[1],
        *target_features[2],
        *delta_features,
        *[0]*4,
        *[0]*4,
    ]

pred_row = np.array(pred_row_list)

pred_row_df = pd.DataFrame([pred_row], columns=pred_columns)

pred_row_df

Unnamed: 0,original_index,original_grid-load_trend-strength,original_grid-load_trend-slope,original_grid-load_trend-linearity,original_grid-load_seasonal-strength,original_grid-loss_trend-strength,original_grid-loss_trend-slope,original_grid-loss_trend-linearity,original_grid-loss_seasonal-strength,original_grid-temp_trend-strength,...,delta_grid-load_trend-linearity,delta_grid-load_seasonal-strength,delta_grid-loss_trend-strength,delta_grid-loss_trend-slope,delta_grid-loss_trend-linearity,delta_grid-loss_seasonal-strength,delta_grid-temp_trend-strength,delta_grid-temp_trend-slope,delta_grid-temp_trend-linearity,delta_grid-temp_seasonal-strength
0,636.0,0.91638,0.001425,0.444567,0.945534,0.920432,0.001996,0.457404,0.944017,0.906345,...,0.394262,-0.028452,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [106]:
from src.models.naive_correlation import CorrelationModel

correlation_model = CorrelationModel()
correlation_model.train(feature_df)

In [107]:
predicted_features = correlation_model.infer(pred_row_df)
predicted_features

100%|██████████| 1/1 [00:00<00:00, 331.91it/s]


Unnamed: 0,grid-load_trend-strength,grid-load_trend-slope,grid-load_trend-linearity,grid-load_seasonal-strength,grid-loss_trend-strength,grid-loss_trend-slope,grid-loss_trend-linearity,grid-loss_seasonal-strength,grid-temp_trend-strength,grid-temp_trend-slope,grid-temp_trend-linearity,grid-temp_seasonal-strength,prediction_index
0,0.986927,-0.000768,0.474479,0.965943,0.984264,-0.003246,0.48473,0.957781,0.925574,-0.004293,0.606593,0.713592,0.0


In [108]:
predicted_features[predicted_features.columns[:4]].to_numpy(), init_new_ts_features

(array([[ 9.86927217e-01, -7.67574673e-04,  4.74479030e-01,
          9.65942708e-01]]),
 array([ 0.98692722, -0.01229247,  0.83882882,  0.91708173]))

In [109]:
predicted_features = predicted_features.to_numpy()[0][:-1].reshape((3, 4))
predicted_features

array([[ 9.86927217e-01, -7.67574673e-04,  4.74479030e-01,
         9.65942708e-01],
       [ 9.84263886e-01, -3.24556554e-03,  4.84729704e-01,
         9.57781121e-01],
       [ 9.25574033e-01, -4.29259177e-03,  6.06592955e-01,
         7.13591879e-01]])

### Use predicted features to transform MTS

In [117]:
num_ts = len(start_features)

new_mts = []
new_mts_features = []

for i in range(num_ts):
    if i == initial_transformed_ts_index:
        new_mts.append(init_new_ts)
        new_mts_features.append(init_new_ts_features)
        continue
    
    print(f"Optimizing for ts{i}")
    univariate_decomps = start_decomps[i]
    univariate_target_features = predicted_features[i]
    
    ga_instance = GeneticAlgorithm(
        original_time_series_decomp=univariate_decomps,
        target_features=univariate_target_features,
        num_generations=num_generations,
        num_parents_mating=num_parents_mating,
        sol_per_pop=sol_per_pop,
        num_genes=num_genes,
        init_range_low=init_range_low,
        init_range_high=init_range_high,
        parent_selection_type=parent_selection_type,
        crossover_type=crossover_type,
        mutation_type=mutation_type,
        mutation_percent_genes=mutation_percent_genes
    )
    
    ga_instance.run_genetic_algorithm()
    
    factors, _, _ = ga_instance.get_best_solution()
    
    new_trend = manipulate_trend_component(univariate_decomps.trend, factors[0], factors[1], factors[2], m=0)
    new_seasonal = manipulate_seasonal_component(univariate_decomps.seasonal, factors[3])
    
    new_ts = new_trend + new_seasonal + univariate_decomps.resid
    new_mts.append(new_ts)
    
    new_mts_features.append([
        trend_strength(new_trend, univariate_decomps.resid),
        trend_slope(new_trend),
        trend_linearity(new_trend),
        seasonal_strength(new_seasonal, univariate_decomps.resid)
    ])
    

Optimizing for ts1
Optimizing for ts2


In [118]:
predicted_features.shape, np.asarray(new_mts_features).shape

((3, 4), (3, 4))

In [119]:
feature_names = ["trend_strength", "trend_slope", "trend_linearity", "seasonal_strength"]
for i in range(predicted_features.shape[1]):
    print(f"Feature: {feature_names[i]}")
    for j in range(predicted_features.shape[0]):
        print(f"TS{j}: Predicted Target {float(predicted_features[j, i])} vs. Transformed {float(new_mts_features[j][i])} (Diff: {float(predicted_features[j, i]) - float(new_mts_features[j][i])})")

Feature: trend_strength
TS0: Predicted Target 0.9869272165008968 vs. Transformed 0.9869272165008968 (Diff: 0.0)
TS1: Predicted Target 0.98426388572625 vs. Transformed 0.9830670244521051 (Diff: 0.001196861274144867)
TS2: Predicted Target 0.9255740326942165 vs. Transformed 0.9269869829255342 (Diff: -0.0014129502313177111)
Feature: trend_slope
TS0: Predicted Target -0.0007675746733367787 vs. Transformed -0.012292467322013272 (Diff: 0.011524892648676493)
TS1: Predicted Target -0.003245565541206078 vs. Transformed 0.0038472342771167373 (Diff: -0.007092799818322815)
TS2: Predicted Target -0.0042925917722107425 vs. Transformed 0.0001795106694990996 (Diff: -0.004472102441709842)
Feature: trend_linearity
TS0: Predicted Target 0.4744790295412724 vs. Transformed 0.838828817925354 (Diff: -0.3643497883840816)
TS1: Predicted Target 0.4847297043070387 vs. Transformed 0.4858716139637672 (Diff: -0.0011419096567285014)
TS2: Predicted Target 0.606592955196601 vs. Transformed 0.606408273452222 (Diff: 0.00

In [121]:
fig = make_subplots(rows=3, cols=2, subplot_titles=("Original", "Transformed"))
fig.add_trace(go.Line(x=data[start_ts_index].index, y=data[start_ts_index]["grid1-load"], name="grid_load"), row=1, col=1)
fig.add_trace(go.Line(x=data[start_ts_index].index, y=data[start_ts_index]["grid1-loss"], name="grid_loss"), row=2, col=1)
fig.add_trace(go.Line(x=data[start_ts_index].index, y=data[start_ts_index]["grid1-temp"], name="grid_temp"), row=3, col=1)
fig.add_trace(go.Line(x=data[start_ts_index].index, y=new_mts[0], name="t_grid_load"), row=1, col=2)
fig.add_trace(go.Line(x=data[start_ts_index].index, y=new_mts[1], name="t_grid_loss"), row=2, col=2)
fig.add_trace(go.Line(x=data[start_ts_index].index, y=new_mts[2], name="t_grid_temp"), row=3, col=2)
# fig.add_trace(go.Line(x=data[target_ts_index].index, y=data[target_ts_index]["grid1-load"], name="t_grid_load"), row=1, col=3)
# fig.add_trace(go.Line(x=data[target_ts_index].index, y=data[target_ts_index]["grid1-loss"], name="t_grid_loss"), row=2, col=3)
# fig.add_trace(go.Line(x=data[target_ts_index].index, y=data[target_ts_index]["grid1-temp"], name="t_grid_temp"), row=3, col=3)
fig.update_layout(height=600, width=800, title_text=f"{start_ts_index=} vs. {target_ts_index=}")
fig.show()