In [None]:
customer_id = 1

# ALL CODE

## 0. Importing and key parameters

In [None]:
import pandas as pd
import torch
import os
from matplotlib import pyplot as plt

In [None]:
# Hyperparameters needed for a run:

# Data fetching
locations_used = 1
start_date = 2005
end_date = 2013

# Forecasting parameters
day_only = False
features = ['P']
final_month = 12
split = 0.8

# Lstm parameters
hidden_size = 400
num_layers = 3
dropout = 0.5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Training parameters
epochs = 200
batch_size = 32
learning_rate = 0.0001

## 1. Target location

In [None]:
data_aus = pd.read_parquet('../data/australia/aus_production.parquet', engine='pyarrow')
data_aus = data_aus[data_aus['Customer'] == customer_id]
data_aus

In [None]:
# Hyperparams from the data
peak_power = data_aus['Generator Capacity'].iloc[0]
latitude = data_aus['latitude'].iloc[0]
longitude = data_aus['longitude'].iloc[0]

# Hyperparams not included in the data
tilt = 0
azimuth = 0
optimalangles = True

latitude, longitude, peak_power, tilt, azimuth

In [None]:
# Unique name for the data, model and metrics
data_name = 'australia' '_' + str(customer_id)
data_name

In [None]:
# Create the folders to save the data and models
data_folder = '../data/AUS/'
model_folder = '../models/AUS/' + data_name
if not os.path.exists(data_folder):
    os.makedirs(data_folder)
if not os.path.exists(model_folder):
    os.makedirs(model_folder)

In [None]:
data_aus = pd.DataFrame(data_aus['Values'])
data_aus = data_aus.resample('H').sum()
data_aus = data_aus.rename(columns={"Values":"P"})

target_data = data_aus
target_data

## 2. Source location

In [None]:
# Import the datafetcher class
from src.data.datafetcher import DataFetcher

# Fetch data from PVGIS
data_PVGIS = DataFetcher(latitude,longitude,peak_power, tilt, azimuth, locations=locations_used, start_date=start_date, end_date=end_date,optimal_angles=1)

# Save the data in the data folder
#path = data.save_data(file_name = data_name + '/' + data_name)
#path

In [None]:
data =[]
data.append(data_PVGIS.dataset[0])

In [None]:
# Import the featurisation class
from src.data.featurisation import Featurisation

# Decide on the features to use in making the model (Note that 'P' should always be included since it's the target variable)
#dataset = Featurisation(data.dataset).base_features(features)
dataset = Featurisation(data).base_features(features)

# Use cyclic features as well
dataset = Featurisation(dataset).cyclic_features(yearly=True)
features = dataset[0].columns # update the features
dataset[0].head()

In [None]:
from src.util import daytime

In [None]:
if day_only is True:
    dataset, removed_hours, kept_hours = daytime.remove_nighttime(dataset)
    lags = len(kept_hours)
    forecast_period = len(kept_hours)
    removed_hours, kept_hours
else:
    lags = 24
    forecast_period = 24

In [None]:
domain_min = [0.0]
domain_max = [peak_power*0.86]
for i in range(len(features[1:])):
    domain_min.append(min(dataset[0][features[i+1]]))
    domain_max.append(max(dataset[0][features[i+1]]))

In [None]:
dataset[0] = dataset[0].tz_localize('UTC').tz_convert('Australia/Sydney').tz_localize(None)
dataset[0] = dataset[0][13:]

## 3. Target featurisation

In [None]:
# Remove nighttime
if day_only is True:
    data_aus = data_aus[~data_aus.index.hour.isin(removed_hours)]

In [None]:
start = data_aus.index[0]
end = dataset[0].index[-1]

In [None]:
# We use the features of dataset[0] because this is the base location, and is identical to the target location
features_nl = dataset[0][features[1:]].loc[start:end]
features_nl.head()

In [None]:
target_data = pd.merge(data_aus.loc[start:end], features_nl, left_index=True, right_index=True)
target_data = target_data.loc[~target_data.index.duplicated(keep='first')]
target_data = target_data.resample('H').asfreq() # Add the missing values from summer time
target_data = target_data.interpolate(method='linear')

target_data

In [None]:
dataset[0] = dataset[0][dataset[0].index.year < target_data.index.year[0]]
dataset[0] = dataset[0].loc[~dataset[0].index.duplicated(keep='first')]
dataset[0] = dataset[0].resample('H').asfreq() # Add the missing values from summer time
dataset[0] = dataset[0].interpolate(method='linear')
dataset[0]

## 4. Create tensors of the data

### 4.1 Source data

In [None]:
# Import the tensorisation class to transform the data into tensors for use in pytorch models
from src.tensors.tensorisation import Tensorisation
import torch

# Get the list of features
features = list(dataset[0].columns)

# Get the tensors
X_train_source = torch.empty(0, dtype=torch.float32)
X_test_source = torch.empty(0, dtype=torch.float32)
y_train_source = torch.empty(0, dtype=torch.float32)
y_test_source = torch.empty(0, dtype=torch.float32)

for i in range(len(dataset)):
    tensors = Tensorisation(dataset[i], 'P', features, lags, forecast_period)
    X_train, X_test, y_train, y_test = tensors.tensor_creation()
    X_train_source = torch.concat([X_train_source, X_train])
    X_test_source = torch.concat([X_test_source, X_test])
    y_train_source = torch.concat([y_train_source, y_train])
    y_test_source = torch.concat([y_test_source, y_test])
    
X_train_source.shape, X_test_source.shape, y_train_source.shape, y_test_source.shape

### 4.2 Target data

In [None]:
training_months = list(target_data[target_data.index.year == (target_data.index.year[-1]-1)].index.month.unique())

In [None]:
# Set the end of the month

training_months_copy = training_months.copy()

for month in training_months_copy:
    if month > final_month:
        training_months.remove(month)

In [None]:
train_starts = []
for i in range(len(training_months)):
    train_start = target_data[(target_data.index.year == (target_data.index.year[-1]-1)) & (target_data.index.month ==training_months[i])].index[0]
    train_starts.append(train_start)
    
train_starts = list(reversed(train_starts))

In [None]:
model_data_end = target_data[(target_data.index.year == (target_data.index.year[-1]-1)) & (target_data.index.month ==final_month)].index[-1]
model_data_end

In [None]:
X_train_target_list = []
X_test_target_list = []
X_eval_target_list = []
y_train_target_list = []
y_test_target_list = []
y_eval_target_list = []

for i in range(len(training_months)):     
    tensors = Tensorisation(target_data[train_starts[i]:model_data_end], 'P', features, lags, forecast_period,train_test_split = split, domain_min=domain_min, domain_max=domain_max)
    eval_tensors = Tensorisation(target_data, 'P', features, lags, forecast_period,domain_min=domain_min, domain_max=domain_max)
    X_train_target, X_test_target, y_train_target, y_test_target = tensors.tensor_creation()
    _, _, _, _, X_eval_target, y_eval_target = eval_tensors.tensor_creation_with_evaluation(len(target_data[(target_data.index >= '2012-07-01')]))
    X_train_target_list.append(X_train_target)
    X_test_target_list.append(X_test_target)
    X_eval_target_list.append(X_eval_target)
    y_train_target_list.append(y_train_target)
    y_test_target_list.append(y_test_target)
    y_eval_target_list.append(y_eval_target) 
    
    print(X_train_target.shape, X_test_target.shape, X_eval_target.shape, y_train_target.shape, y_test_target.shape, y_eval_target.shape)

## 5. Source model

In [None]:
# Import the lstm class to create an untrained LSTM
from src.models.lstm import LSTM

# Set the parameters for the lstm
input_size = len(features)

my_lstm = LSTM(input_size,hidden_size,num_layers, forecast_period, dropout).to(device)
my_lstm

In [None]:
# Import the training class to train the model
import src.models.training as train
 
# Initialize the trainer
training = train.Training(my_lstm, X_train_source, y_train_source, X_test_source, y_test_source, epochs,batch_size=batch_size, learning_rate=learning_rate)

# Train the model and return the trained parameters and the best iteration
state_dict_list, best_epoch = training.fit()

In [None]:
# Load the state dictionary of the best performing model
my_lstm.load_state_dict(state_dict_list[best_epoch])

# Save the model state dictionary for later use 
train.save_model(my_lstm, 'AUS/' + data_name + '/model_' + data_name + '_transfer_0')

In [None]:
# Forecast with the model
forecasts = my_lstm(X_test_source.to(device))

In [None]:
# Import the evaluation script
from src.evaluation.evaluation import Evaluation

# Evaluate the model performance
source_eval = Evaluation(y_test_source.detach().flatten().numpy(), forecasts.cpu().detach().flatten().numpy())

# Show the evaluation metrics
source_eval.metrics()

## 6. Target model

In [None]:
# Set the parameters for the lstm
input_size = len(features)

# Create empty models for each of the periods
lstms = []

for i in range(len(training_months)+1):
    lstms.append(LSTM(input_size,hidden_size,num_layers, forecast_period, dropout).to(device))
    
torch.save(lstms[0].state_dict(), '../models/AUS/' + data_name + '/model_' + data_name + '_target_0')

In [None]:
target_best_epochs = [0]

for i in range(len(training_months)):
    # Initialize the trainer
    training = train.Training(lstms[i+1], X_train_target_list[i], y_train_target_list[i], X_test_target_list[i], y_test_target_list[i], epochs, learning_rate=learning_rate)

    # Train the model and return the trained parameters and the best iteration
    state_dict_list, best_epoch = training.fit()
    
    # Load the state dictionary of the best performing model
    lstms[i+1].load_state_dict(state_dict_list[best_epoch])
    target_best_epochs.append(best_epoch)
    
    # Save the model state dictionary for later use
    #Training.save_model(lstms[i+1], 'AUS/' + data_name + '/model_' + data_name + '_target_' + str(i+1))

In [None]:
target_RMSEs = []

# Evaluate a clean model
forecasts = lstms[0](X_eval_target_list[0].to(device))
source_eval = Evaluation(y_eval_target_list[0].detach().flatten().numpy(), forecasts.cpu().detach().flatten().numpy())

target_RMSEs.append(source_eval.metrics()['RMSE'].values[0])

for i in range(len(training_months)):
    # Forecast with the model
    forecasts = lstms[i+1](X_eval_target_list[i].to(device))
    # Evaluate the model performance
    source_eval = Evaluation(y_eval_target_list[i].detach().flatten().numpy(), forecasts.cpu().detach().flatten().numpy())

    # Show the evaluation metrics
    target_RMSEs.append(source_eval.metrics()['RMSE'].values[0])

In [None]:
plt.plot(target_RMSEs)

## 7. Transfer model

In [None]:
# Layers to freeze

freezing = []

for name, _ in my_lstm.lstm.named_parameters():
    freezing.append(name)
    
freezing = freezing[:4]
freezing

In [None]:
transfer_models = []
transfer_best_epochs = [0]

for i in range(len(training_months)):
    transfer_model  = LSTM(input_size,hidden_size,num_layers, forecast_period, dropout).to(device)
    transfer_model.load_state_dict(torch.load('../models/AUS/' + data_name + '/model_' + data_name + '_transfer_0'))
       
    for name, param in transfer_model.lstm.named_parameters():
        if any(freezing_name in name for freezing_name in freezing):
            param.requires_grad = False

    # Initialize the trainer
    training = train.Training(transfer_model, X_train_target_list[i], y_train_target_list[i], X_test_target_list[i], y_test_target_list[i], epochs, batch_size = batch_size, 
                              learning_rate =learning_rate/100)

    # Train the model and return the trained parameters and the best iteration
    state_dict_list, best_epoch = training.fit()
    
    # Load the state dictionary of the best performing model
    transfer_model.load_state_dict(state_dict_list[best_epoch])
    transfer_best_epochs.append(best_epoch)
    
    # Save the model state dictionary for later use
    #Training.save_model(transfer_model, 'AUS/' + data_name + '/model_' + data_name + '_transfer_' + str(i+1))
    transfer_models.append(transfer_model)

In [None]:
transfer_RMSEs = []

# Evaluate a clean model

transfer_model = LSTM(input_size,hidden_size,num_layers, forecast_period, dropout).to(device)
transfer_model.load_state_dict(torch.load('../models/AUS/' + data_name + '/model_' + data_name + '_transfer_0'))

forecasts = transfer_model(X_eval_target_list[0].to(device))
source_eval = Evaluation(y_eval_target_list[0].detach().flatten().numpy(), forecasts.cpu().detach().flatten().numpy())

transfer_RMSEs.append(source_eval.metrics()['RMSE'].values[0])

for i in range(len(training_months)):
    # Forecast with the model
    forecasts = transfer_models[i](X_eval_target_list[i].to(device))
    # Evaluate the model performance
    source_eval = Evaluation(y_eval_target_list[i].detach().flatten().numpy(), forecasts.cpu().detach().flatten().numpy())

    # Show the evaluation metrics
    transfer_RMSEs.append(source_eval.metrics()['RMSE'].values[0])

In [None]:
plt.plot(target_RMSEs,label='target')
plt.plot(transfer_RMSEs,label='transfer')
plt.legend()

## 8. Baseline

In [None]:
baseline_RMSEs = []

# Evaluate a clean model
forecasts = X_eval_target_list[0][:,:,0]
source_eval = Evaluation(y_eval_target_list[0].detach().flatten().numpy(), forecasts.cpu().detach().flatten().numpy())

baseline_RMSEs.append(source_eval.metrics()['RMSE'].values[0])

for i in range(len(training_months)):
    # Forecast with the model
    forecasts = X_eval_target_list[i][:,:,0]
    # Evaluate the model performance
    source_eval = Evaluation(y_eval_target_list[i].detach().flatten().numpy(), forecasts.cpu().detach().flatten().numpy())

    # Show the evaluation metrics
    baseline_RMSEs.append(source_eval.metrics()['RMSE'].values[0])

# Final visualisation and export

In [None]:
plt.plot(target_RMSEs,label='target')
plt.plot(transfer_RMSEs,label='transfer')
plt.plot(baseline_RMSEs, label='baseline')
plt.legend()

In [None]:
plt.plot(transfer_RMSEs,label='transfer')

In [None]:
column_names = []

for i in range(len(training_months)+1):
    column_names.append(str(i) + 'm')

In [None]:
all_metrics = pd.DataFrame([baseline_RMSEs, target_RMSEs, transfer_RMSEs, target_best_epochs, transfer_best_epochs],columns=column_names, index=['Baseline RMSE', 'Target RMSE', 'Transfer RMSE', 'Target epoch', 'Transfer epoch']).transpose()
all_metrics['Target epoch'] = all_metrics['Target epoch'].astype(int)
all_metrics['Transfer epoch'] = all_metrics['Transfer epoch'].astype(int)
all_metrics

# Comparison

In [None]:
all_metrics.to_csv('../data/AUS/' + 'summary_table_' + data_name + '.csv')

In [None]:
data_name