In [1]:
import os

import csv
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
import random

import logging
import time
import tensorflow as tf

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

from tensorflow.keras.models import load_model
import pickle

In [2]:
from google.colab import drive
drive.mount('/content/drive')
#file_path = '/content/drive/MyDrive/boilers_drive/normalized_df.csv'
train_csv_path = '/content/drive/MyDrive/boilers_drive/train_df.csv'
val_csv_path = '/content/drive/MyDrive/boilers_drive/val_df.csv'
test_csv_path = '/content/drive/MyDrive/boilers_drive/test_df.csv'

Mounted at /content/drive


In [3]:
# Specify the file path of your CSV file

# Read the CSV file
train_data = pd.read_csv(train_csv_path)
val_data = pd.read_csv(val_csv_path)
test_data = pd.read_csv(test_csv_path)

In [4]:
#print(train_data)
#print(val_data)
#print(test_data)

In [5]:
# load pre-prepared random order of houses

random_order_houses = pd.read_csv('/content/drive/MyDrive/boilers_drive/random_order_houses.csv')

In [6]:
# Different houses in data
houses = random_order_houses['house_id'].unique().tolist()
print("Different houses in data:")
print(houses)
print("Number of different houses:")
print(len(houses))

Different houses in data:
['home9', 'home114', 'home5', 'home89', 'home17', 'home63', 'home2', 'home101', 'home14', 'home95', 'home111', 'home67', 'home77', 'home43', 'home86', 'home90', 'home47', 'home110', 'home93', 'home53', 'home34', 'home51', 'home106', 'home46', 'home79', 'home55', 'home65', 'home13']
Number of different houses:
28


**Δημιουργία του encoder decoder transformer**

In [7]:
# positional encoding layer
def positional_encoding(length, depth):
  depth = depth/2
  positions = np.arange(length)[:, np.newaxis]     # (seq, 1)
  depths = np.arange(depth)[np.newaxis, :]/depth   # (1, depth)
  angle_rates = 1 / (10000**depths)         # (1, depth)
  angle_rads = positions * angle_rates      # (pos, depth)

  pos_encoding = np.concatenate([np.sin(angle_rads), np.cos(angle_rads)], axis=-1)
  return tf.cast(pos_encoding, dtype=tf.float32)


# positional embedding layer
class PositionalEmbedding(tf.keras.layers.Layer):
  def __init__(self, vocab_size, d_model):
    super().__init__()
    self.d_model = d_model
    self.embedding = tf.keras.layers.Dense(d_model)  # Project input to d_model dimension
    #self.embedding = tf.keras.layers.Embedding(vocab_size, d_model, mask_zero=True)
    self.pos_encoding = positional_encoding(length=2048, depth=d_model)

  def compute_mask(self, *args, **kwargs):
    return self.embedding.compute_mask(*args, **kwargs)

  def call(self, x):
    length = tf.shape(x)[1]
    x = self.embedding(x)
    # This factor sets the relative scale of the embedding and positonal_encoding.
    x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
    x = x + self.pos_encoding[tf.newaxis, :length, :]
    return x



# Attention layers
# These are all identical except for how the attention is configured

# base attention layer
class BaseAttention(tf.keras.layers.Layer):
  def __init__(self, **kwargs):
    super().__init__()
    self.mha = tf.keras.layers.MultiHeadAttention(**kwargs)
    self.layernorm = tf.keras.layers.LayerNormalization()
    self.add = tf.keras.layers.Add()

# cross attention layer
# (at the center of the Transformer is the cross-attention layer, it connects the encoder and decoder)
class CrossAttention(BaseAttention):
  def call(self, x, context):
    attn_output, attn_scores = self.mha(query=x, key=context, value=context, return_attention_scores=True)
    # Cache the attention scores for plotting later.
    self.last_attn_scores = attn_scores
    x = self.add([x, attn_output])
    x = self.layernorm(x)
    return x

# global self attention layer
# (it is responsible for processing the context sequence, and propagating information along its length)
class GlobalSelfAttention(BaseAttention):
  def call(self, x):
    attn_output = self.mha(query=x, value=x, key=x)
    x = self.add([x, attn_output])
    x = self.layernorm(x)
    return x

# causal self attention layer
# (it does a similar job as the global self attention layer, for the output sequence)
class CausalSelfAttention(BaseAttention):
  def call(self, x):
    attn_output = self.mha(query=x, value=x, key=x, use_causal_mask = True)
    x = self.add([x, attn_output])
    x = self.layernorm(x)
    return x



# feed forward network
# (the transformer also includes this point-wise feed-forward network in both the encoder and decoder)
class FeedForward(tf.keras.layers.Layer):
  def __init__(self, d_model, dff, dropout_rate=0.1):
    super().__init__()
    self.seq = tf.keras.Sequential([
      tf.keras.layers.Dense(dff, activation='relu'),
      tf.keras.layers.Dense(d_model),
      tf.keras.layers.Dropout(dropout_rate)
    ])
    self.add = tf.keras.layers.Add()
    self.layer_norm = tf.keras.layers.LayerNormalization()

  def call(self, x):
    x = self.add([x, self.seq(x)])
    x = self.layer_norm(x)
    return x



# encoder layer
# (the encoder contains a stack of N encoder layers. Where each EncoderLayer contains
#   a GlobalSelfAttention and FeedForward layer)
class EncoderLayer(tf.keras.layers.Layer):
  def __init__(self,*, d_model, num_heads, dff, dropout_rate=0.1):
    super().__init__()

    self.self_attention = GlobalSelfAttention(num_heads=num_heads, key_dim=d_model, dropout=dropout_rate)
    self.ffn = FeedForward(d_model, dff)

  def call(self, x):
    x = self.self_attention(x)
    x = self.ffn(x)
    return x



# The Encoder
class Encoder(tf.keras.layers.Layer):
  def __init__(self, *, num_layers, d_model, num_heads, dff, vocab_size, dropout_rate=0.1):
    super().__init__()

    self.d_model = d_model
    self.num_layers = num_layers
    self.pos_embedding = PositionalEmbedding(vocab_size=vocab_size, d_model=d_model)
    #self.pos_embedding = PositionalEmbedding(d_model=d_model)
    self.enc_layers = [
        EncoderLayer(d_model=d_model, num_heads=num_heads, dff=dff, dropout_rate=dropout_rate)
        for _ in range(num_layers)]
    self.dropout = tf.keras.layers.Dropout(dropout_rate)

  def call(self, x):
    # If x is a tuple, take only the first element
    if isinstance(x, tuple):
        x = x[0]
    # Ensure `x` is shaped as (batch_size, sequence_length)
    if x.shape[-1] == 1:
        x = tf.squeeze(x, axis=-1)  # Remove the last dimension if its size is 1
    # `x` is token-IDs shape: (batch, seq_len)
    x = self.pos_embedding(x)  # Shape `(batch_size, seq_len, d_model)`.
    # Add dropout.
    x = self.dropout(x)
    for i in range(self.num_layers):
      x = self.enc_layers[i](x)
    return x  # Shape `(batch_size, seq_len, d_model)`.



# decoder layer
# (the decoder's stack is slightly more complex, with each DecoderLayer containing
#   a CausalSelfAttention, a CrossAttention, and a FeedForward layer)
class DecoderLayer(tf.keras.layers.Layer):
  def __init__(self, *, d_model, num_heads, dff, dropout_rate=0.1):
    super(DecoderLayer, self).__init__()

    self.causal_self_attention = CausalSelfAttention(num_heads=num_heads, key_dim=d_model, dropout=dropout_rate)
    self.cross_attention = CrossAttention(num_heads=num_heads, key_dim=d_model, dropout=dropout_rate)
    self.ffn = FeedForward(d_model, dff)

  def call(self, x, context):
    x = self.causal_self_attention(x=x)
    x = self.cross_attention(x=x, context=context)
    # Cache the last attention scores for plotting later
    self.last_attn_scores = self.cross_attention.last_attn_scores
    x = self.ffn(x)  # Shape `(batch_size, seq_len, d_model)`.
    return x



# The Decoder
class Decoder(tf.keras.layers.Layer):
  def __init__(self, *, num_layers, d_model, num_heads, dff, vocab_size, dropout_rate=0.1):
    super(Decoder, self).__init__()

    self.d_model = d_model
    self.num_layers = num_layers
    self.pos_embedding = PositionalEmbedding(vocab_size=vocab_size, d_model=d_model)
    #self.pos_embedding = PositionalEmbedding(d_model=d_model)
    self.dropout = tf.keras.layers.Dropout(dropout_rate)
    self.dec_layers = [
        DecoderLayer(d_model=d_model, num_heads=num_heads, dff=dff, dropout_rate=dropout_rate)
        for _ in range(num_layers)]
    self.last_attn_scores = None

  def call(self, x, context):
    # If x is a tuple, take only the first element
    if isinstance(x, tuple):
        x = x[0]
    # Ensure `x` is shaped as (batch_size, sequence_length)
    if x.shape[-1] == 1:
        x = tf.squeeze(x, axis=-1)  # Remove the last dimension if its size is 1
    # `x` is token-IDs shape (batch, target_seq_len)
    x = self.pos_embedding(x)  # (batch_size, target_seq_len, d_model)
    x = self.dropout(x)
    for i in range(self.num_layers):
      x  = self.dec_layers[i](x, context)
    self.last_attn_scores = self.dec_layers[-1].last_attn_scores
    # The shape of x is (batch_size, target_seq_len, d_model).
    return x



# The Transformer
@tf.keras.utils.register_keras_serializable()
class Transformer(tf.keras.Model):
  def __init__(self, *, num_layers, d_model, num_heads, dff,
               input_vocab_size, target_vocab_size, dropout_rate=0.1, **kwargs):
    super().__init__(**kwargs)

    self.input_proj = tf.keras.layers.Dense(d_model)  # Project input to the model dimension
    self.encoder = Encoder(num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff,
                           vocab_size=input_vocab_size, dropout_rate=dropout_rate)
    self.decoder = Decoder(num_layers=num_layers, d_model=d_model, num_heads=num_heads, dff=dff,
                           vocab_size=target_vocab_size, dropout_rate=dropout_rate)
    #self.final_layer = tf.keras.layers.Dense(target_vocab_size)
    self.final_layer = tf.keras.layers.Dense(1)

  #def call(self, inputs, decoder_input=None):  # Expect separate decoder input
  #  x = inputs
  #  context = self.encoder(x)
  #  # If no decoder input is provided, assume we're in inference mode
  #  if decoder_input is None:
  #      # Initialize decoder input with zeros (batch_size, seq_len, d_model)
  #      decoder_input = tf.zeros_like(inputs)
  #  x = self.decoder(decoder_input, context)
  #  logits = self.final_layer(x)
  #  try:
  #      del logits._keras_mask
  #  except AttributeError:
  #      pass
  #  return logits

  def call(self, inputs, decoder_input=None, training=False):
    # Unpack the tuple correctly if necessary
    if isinstance(inputs, tuple):
        encoder_input, decoder_input = inputs

    # Proceed with the encoder and decoder as expected
    # Encode the input sequence
    context = self.encoder(inputs)

    # Ensure that `decoder_input` is properly set even if None
    if decoder_input is None:
        batch_size = tf.shape(inputs)[0]
        decoder_input = tf.zeros((batch_size, 1, self.d_model))  # Initialized with zeros

    if training:
        # In training mode, we use the provided decoder input
        decoder_output = self.decoder(decoder_input, context)
        logits = self.final_layer(decoder_output)
    else:
        # For auto-regressive generation over 1440 time steps
        decoder_output = []

        for t in range(1440):  # Loop over each time step for prediction
            # Run through the decoder to get predictions for this step
            pred = self.decoder(decoder_input, context)
            output = self.final_layer(pred)  # Shape: (batch_size, 1, output_dim)
            decoder_output.append(output)
            # Update `decoder_input` for the next time step
            decoder_input = output

        # Concatenate outputs along the time dimension
        decoder_output = tf.concat(decoder_output, axis=1)  # Shape: (batch_size, 1440, output_dim)
        logits = decoder_output

    # Clear any masking that might interfere with training
    try:
        del logits._keras_mask
    except AttributeError:
        pass

    return logits


**Preparing the data**

In [8]:
# the categories for prediction
final_category = 'blr_mod_lvl'
prediction_categories = ['blr_mod_lvl', 'absorption', 'insulation', 't_r_set', 't_out']
normalized_categories = ['normalized_blr_mod_lvl', 'normalized_absorption', 'normalized_insulation', 'normalized_t_r_set', 'normalized_t_out']

In [9]:
# creating sub-lists, each with the data of one day
# the function that gets category data of a house (ordered by date) and separates by day
def separate_into_days(data_list, minutes_per_day=1440):
    # number of days
    num_days = len(data_list) // minutes_per_day
    # the data into a list of sub-lists, each containing one day's data
    separated_data = [
        data_list[i * minutes_per_day:(i + 1) * minutes_per_day]
        for i in range(num_days)
    ]
    return separated_data


# function to "combine" values of categories and separate into sub-lists based on days
def combine_categories(dataset, categories_list):
    # Combine specified categories into lists
    combined_elements = dataset[categories_list].apply(lambda row: row.tolist(), axis=1)
    return combined_elements.tolist()

In [10]:
def prepare_data_2(house_data, input_categories, output_category, minutes_per_day=1440):
    combined_input_data = combine_categories(house_data, input_categories)
    separated_input_data = separate_into_days(combined_input_data, minutes_per_day)
    #output = house_data[output_category].values
    output = combine_categories(house_data, [output_category])
    separated_output = separate_into_days(output, minutes_per_day)

    input_data = separated_input_data[:-1]  # All except the last day for encoder input
    target_data = separated_output[1:]      # All except the first day for target output
    # For decoder input, shift target by one step
    decoder_input_data = separated_output[:-1]  # The previous day's output as decoder input

    return input_data, decoder_input_data, target_data

In [11]:
# function for getting input and target data
def input_target_split (data, input_categories, selected_houses):
    filtered_data = data[['house_id', 'time', 'normalized_blr_mod_lvl', 'normalized_absorption', 'normalized_insulation', 'normalized_t_r_set', 'normalized_t_out']].copy()
    input_chosen_categories = []
    for cat in input_categories:
        input_chosen_categories.append('normalized_'+cat)

    # We have 1440 minutes per day
    minutes_per_day = 1440

    # Initialize lists to store input and target pairs
    input_data_list, target_data_list, decoder_input_list = [], [], []

    for house_id in selected_houses:
        house_data = filtered_data[filtered_data['house_id'] == house_id]
        house_data = house_data.sort_values(by='time')
        input_data, decoder_input_data, target_data = prepare_data_2(house_data, input_chosen_categories, 'normalized_blr_mod_lvl')
        input_data_list.append(input_data)
        decoder_input_list.append(decoder_input_data)
        target_data_list.append(target_data)

    # Combine all houses' data
    input_data = np.concatenate(input_data_list, axis=0)
    decoder_input_data = np.concatenate(decoder_input_list, axis=0)
    target_data = np.concatenate(target_data_list, axis=0)

    return input_data, decoder_input_data, target_data


In [12]:
def get_split_data(train_data, val_data, input_categories, selected_houses):
    input_train, decoder_input_train, target_train = input_target_split(train_data, input_categories, selected_houses)
    input_val, decoder_input_val, target_val = input_target_split(val_data, input_categories, selected_houses)
    #input_test, decoder_input_test, target_test = input_target_split(test_data, input_categories, selected_houses)
    return input_train, decoder_input_train, target_train, input_val, decoder_input_val, target_val

In [13]:
batch_size=8

In [14]:
def get_datasets(input_train, decoder_input_train, target_train, input_val, decoder_input_val, target_val, batch_size=batch_size):
    # Reduce the batch size, in case it helps !
    # Convert to TensorFlow datasets
    train_dataset = tf.data.Dataset.from_tensor_slices(((input_train, decoder_input_train), target_train))
    #train_dataset = train_dataset.cache().shuffle(1000).batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
    train_dataset = train_dataset.cache().batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

    val_dataset = tf.data.Dataset.from_tensor_slices(((input_val, decoder_input_val), target_val))
    val_dataset = val_dataset.cache().batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)

    # !!! we do this for test_data separatelly later

    # The validation_split argument is designed to work with NumPy arrays or TensorFlow tensors,
    # where it can easily split the data based on a fraction.
    # It doesn't directly work with TensorFlow datasets because they handle data differently.
    return train_dataset, val_dataset

In [15]:
def print_shapes(input_train, decoder_input_train, target_train, input_val, decoder_input_val, target_val):
    print("For train data: input, decoder_input, target  shapes are:")
    print(input_train.shape)
    print(decoder_input_train.shape)
    print(target_train.shape)
    print("For val data: input, decoder_input, target  shapes are:")
    print(input_val.shape)
    print(decoder_input_val.shape)
    print(target_val.shape)
    num_of_categories = input_train.shape[-1]
    print("Number of categories for prediction: "+str(num_of_categories))
    return num_of_categories

In [16]:
# to calculate the R2 during training and predicting
def r2_score(y_true, y_pred):
    # Flatten the arrays to compute R2 on all predicted values at once
    y_true_flat = tf.reshape(y_true, shape=(-1,))
    y_pred_flat = tf.reshape(y_pred, shape=(-1,))

    # Ensure both tensors are of the same type (float32 in this case)
    y_true_flat = tf.cast(y_true_flat, tf.float32)
    y_pred_flat = tf.cast(y_pred_flat, tf.float32)

    # Calculate R2
    ss_res = tf.reduce_sum(tf.square(y_true_flat - y_pred_flat))
    ss_tot = tf.reduce_sum(tf.square(y_true_flat - tf.reduce_mean(y_true_flat)))

    return 1 - ss_res / (ss_tot + tf.keras.backend.epsilon()) # add small epsilon constant to avoid diivision by 0

In [17]:
def make_transformer_model():
    minutes_per_day = 1440
    # Define the Transformer model
    #model = Transformer(num_layers=4, d_model=128, num_heads=8, dff=512, input_vocab_size=minutes_per_day,
    #                    target_vocab_size=minutes_per_day, dropout_rate=0.1) # minutes_per_day = 1440
    model = Transformer(num_layers=4, d_model=64, num_heads=4, dff=256, input_vocab_size=minutes_per_day,
                        target_vocab_size=minutes_per_day, dropout_rate=0.1) # minutes_per_day = 1440
    # Compile the model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss=tf.keras.losses.MeanSquaredError(),
                  metrics=[tf.keras.metrics.MeanAbsoluteError(), r2_score])
    return model

In [18]:
def train_transformer_model(model, train_dataset, val_dataset):
    # Define the EarlyStopping callback
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',  # Metric to monitor
        patience=5,          # Number of epochs with no improvement after which training will be stopped
        verbose=1,           # Verbosity mode
        restore_best_weights=True  # Whether to restore model weights from the epoch with the best value of the monitored quantity
    )
    # Train the model with the EarlyStopping callback
    history = model.fit(
        train_dataset,       # Training dataset
        epochs=20,          # Number of epochs
        validation_data=val_dataset,  # Validation dataset
        callbacks=[early_stopping]  # List of callbacks to apply during training
    )
    # change epochs, patience , etc !!! (e.g. epochs=10, patience=3  or  epochs=20, patience=5)
    return model, history

In [19]:
def training_plots(history):
    # Plotting the training and validation loss and MAE
    plt.figure(figsize=(14, 5))

    # Plot training & validation loss values
    plt.subplot(2, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss (MSE)')
    plt.legend(loc='upper right')

    # Plot training & validation MAE values
    plt.subplot(2, 2, 2)
    plt.plot(history.history['mean_absolute_error'], label='Training MAE')
    plt.plot(history.history['val_mean_absolute_error'], label='Validation MAE')
    plt.title('Model Mean Absolute Error')
    plt.xlabel('Epoch')
    plt.ylabel('MAE')
    plt.legend(loc='upper right')

    # Plot training & validation R2 values
    plt.subplot(2, 2, 3)
    plt.plot(history.history['r2_score'], label='Training R2')
    plt.plot(history.history['val_r2_score'], label='Validation R2')
    plt.title('Model R2 Score')
    plt.xlabel('Epoch')
    plt.ylabel('R2 Score')
    plt.legend(loc='upper right')

    plt.tight_layout()
    plt.show()

In [20]:
def save_transformer_model(model, history, input_categories, num_of_houses, save_path='/content/drive/Othercomputers/My_Laptop/code/updated_transformers/models/'):
    # save the model
    # !!! I need to save the scalers and the input_test, target_test, test_dataset as well !!!
    # !!! Or I need to use the same for all models (save once and use for all) !!!
    categories_used = '_'.join(input_categories)
    houses_used = str(num_of_houses)

    model_name = 'transformer_'+categories_used+"_houses_"+houses_used

    # save trained model
    model.save(save_path + model_name + '.keras')

    # save history of training
    with open(save_path + model_name + '_history.pkl', 'wb') as f:
      pickle.dump(history.history, f)

In [21]:
import pickle

# Load the scalers_dict
with open('/content/drive/MyDrive/boilers_drive/scalers.pkl', 'rb') as file:
    scalers_dict = pickle.load(file)

In [22]:
def get_test_values(test_values, house, category='blr_mod_lvl'):
    flattened_test_values = test_values.reshape(-1)
    if category=='blr_mod_lvl':
      de_scaled_test_values = (flattened_test_values.reshape(-1, 1)) * 100
      # de-scaling is multiplying by 100 for blr_mod_lvl values
      final_test_values = de_scaled_test_values.reshape(-1)
      #final_test_values = list(map(int, de_scaled_test_values))
    else:
      scaler = scalers_dict[house][category]
      test_values_reshaped = test_values.reshape(-1, 1)
      de_scaled_values = scaler.inverse_transform(test_values_reshaped)
      final_test_values = de_scaled_values.flatten()  # flatten de-scaled values
    return final_test_values

# get predictions in list form per house
#def get_predictions(model, test_dataset, house, category='blr_mod_lvl'):
#    test_predictions = model.predict(test_dataset)
#    #print("predictions shape: "+str(test_predictions.shape))
#    prediction = get_test_values(test_predictions, house, category)
#    #print("after processing, prediction shape: "+str(prediction.shape))
#    return prediction

def get_predictions(model, input_test, decoder_input_test, house, category='blr_mod_lvl'):
    # Predict using encoder and decoder inputs
    test_predictions = model.predict((input_test, decoder_input_test), training=False)
    # De-scale and flatten the predictions
    prediction = get_test_values(test_predictions, house, category)
    return prediction

In [23]:
# function that gives us the error metrics for given pair: target, prediction
def get_error_metrics(target, prediction):
    # error values
    error = [t - p for t, p in zip(target, prediction)]
    AE = [abs(e) for e in error] # Absolute Error
    SE = [e ** 2 for e in error] # Squared Error

    # error metrics
    # Mean Absolute Error, gives magnitude of errors without caring for direction
    MAE = np.mean(AE)
    # Mean Squared Error, gives higher weight for larger errors
    MSE = np.mean(SE)
    # Root Mean Squared Error,  it is in the same units as the target variable
    RMSE = np.sqrt(MSE)
    # R-Squared, statistical measure that represents the proportion of the variance for the target variable that's explained by the model
    # provides an indication of the goodness of fit
    mean_target = np.mean(target)
    diff = [t - mean_target for t in target]
    denominator = [d ** 2 for d in diff]
    R2 = 1 - (sum(SE) / sum(denominator))

    return [MAE, MSE, RMSE, R2]

In [24]:
def save_predictions_to_csv(target_values, prediction_values, model_name, house, save_path='/content/drive/Othercomputers/My_Laptop/code/updated_transformers/transformer_predictions/'):
    folder_name = model_name + '_predictions'
    folder_path = os.path.join(save_path, folder_name)

    # Check if the folder exists, and create it if it doesn't
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        #print(f"Folder created: {folder_path}")
    #else:
        #print(f"Folder already exists: {folder_path}")

    # the dataframe with target and prediction values
    df = pd.DataFrame({
        'Target': target_values,
        'Prediction': prediction_values
    })

    file_name = house+'predictions.csv'
    file_path = os.path.join(folder_path, file_name)

    # Save as a CSV file
    df.to_csv(file_path, index=False)

In [25]:
# get test_dataset (only)
def get_test_dataset(input_test, decoder_input_test, target_test, batch_size=batch_size):
    # Reduce the batch size, in case it helps !
    test_dataset = tf.data.Dataset.from_tensor_slices(((input_test, decoder_input_test), target_test))
    test_dataset = test_dataset.cache().batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
    return test_dataset

# get test input and target for every house (separately) for given input_categories
#def get_test_data_per_house(input_categories, batch_size=batch_size, category='blr_mod_lvl'):
#    test_values_per_house = {}
#    test_dataset_per_house = {}
#    for house in houses:
#        input_test, decoder_input_test, target_test = input_target_split(test_data[test_data["house_id"]==house], input_categories, [house])
#        #print("for house with id: "+house)
#        #print("input_test shape: "+str(input_test.shape))
#        #print("target_test shape: "+str(target_test.shape))
#        test_values = get_test_values(target_test, house, category)
#        test_values_per_house[house] = test_values
#        test_dataset_per_house[house] = get_test_dataset(input_test, decoder_input_test, target_test, batch_size=batch_size)
#    return test_values_per_house, test_dataset_per_house


def get_test_data_per_house(input_categories, batch_size=batch_size, category='blr_mod_lvl'):
    test_values_per_house = {}
    test_input_per_house = {}
    test_decoder_input_per_house = {}
    for house in houses:
        # Retrieve encoder input, decoder input, and target
        input_test, decoder_input_test, target_test = input_target_split(test_data[test_data["house_id"] == house], input_categories, [house])

        test_values = get_test_values(target_test, house, category)
        test_values_per_house[house] = test_values
        test_input_per_house[house] = input_test
        test_decoder_input_per_house[house] = decoder_input_test
    return test_values_per_house, test_input_per_house, test_decoder_input_per_house


# function which calculates the average metrics for some houses
def calculate_average_metrics(metrics_per_house, selected_houses):
    MAE_sum, MSE_sum, RMSE_sum, R2_sum = 0, 0, 0, 0
    n = len(selected_houses)
    for house in selected_houses:
        [MAE, MSE, RMSE, R2] = metrics_per_house[house]
        MAE_sum += MAE
        MSE_sum += MSE
        RMSE_sum += RMSE
        R2_sum += R2

    MAE_avg = MAE_sum / n
    MSE_avg = MSE_sum / n
    RMSE_avg = RMSE_sum / n
    R2_avg = R2_sum / n

    return [MAE_avg, MSE_avg, RMSE_avg, R2_avg]

# get prediction metrics per house for a model
# (assume correct input_categories for model and datasets per house)
# it also gives the avg metrics for relevant houses (the houses with which the model was trained) and all houses
#def get_prediction_metrics_per_house(model, model_name, num_of_houses, test_values_per_house, test_dataset_per_house, category='blr_mod_lvl', save_path='/content/drive/Othercomputers/My_Laptop/code/updated_transformers/transformer_predictions/'):
#    prediction_per_house = {}
#    error_metrics_per_house = {}
#    for house in houses:
#        prediction_per_house[house] = get_predictions(model, test_dataset_per_house[house], house, category)
#        save_predictions_to_csv(test_values_per_house[house], prediction_per_house[house], model_name, house, save_path)
#        [MAE, MSE, RMSE, R2] = get_error_metrics(test_values_per_house[house], prediction_per_house[house])
#        error_metrics_per_house[house] = [MAE, MSE, RMSE, R2]
#    for special in ["relevant_houses", "all_houses"]:
#        if (special=="relevant_houses"):
#            selected_houses = houses[:num_of_houses]
#        else: # in this case we use all the houses
#            selected_houses = houses
#        error_metrics_per_house[special] = calculate_average_metrics(error_metrics_per_house, selected_houses)
#    return error_metrics_per_house

def get_prediction_metrics_per_house(model, model_name, num_of_houses, test_values_per_house, test_input_per_house, test_decoder_input_per_house, category='blr_mod_lvl', save_path='/content/drive/Othercomputers/My_Laptop/code/updated_transformers/transformer_predictions/'):
    prediction_per_house = {}
    error_metrics_per_house = {}
    for house in houses:
        prediction_per_house[house] = get_predictions(model, test_input_per_house[house], test_decoder_input_per_house[house], house, category)
        save_predictions_to_csv(test_values_per_house[house], prediction_per_house[house], model_name, house, save_path)
        [MAE, MSE, RMSE, R2] = get_error_metrics(test_values_per_house[house], prediction_per_house[house])
        error_metrics_per_house[house] = [MAE, MSE, RMSE, R2]
    for special in ["relevant_houses", "all_houses"]:
        if (special=="relevant_houses"):
            selected_houses = houses[:num_of_houses]
        else:
            selected_houses = houses
        error_metrics_per_house[special] = calculate_average_metrics(error_metrics_per_house, selected_houses)
    return error_metrics_per_house


# get prediction metrics per house for a model in dataframe form
def get_metrics_dataframe(model_name, error_metrics_per_house, categories_str, num_of_houses, num_of_categories):
    # along with the individual houses we also have average metrics for the model
    # we add first the "relevant" and "all" before the individual houses in the list
    used_houses = ["relevant_houses", "all_houses"]
    for house in houses:
        used_houses.append(house)

    data = {}
    data['categories'] = [categories_str]
    data['num_of_categories'] = [num_of_categories]
    data['num_of_houses'] = [num_of_houses]
    # Constructing the column names and filling them with metrics
    for house in used_houses:
        metrics = error_metrics_per_house[house]
        data['MAE_'+house] = [metrics[0]]
        data['MSE_'+house] = [metrics[1]]
        data['RMSE_'+house] = [metrics[2]]
        data['R2_'+house] = [metrics[3]]

    # Create the dataframe
    new_house_df = pd.DataFrame(data, index=[model_name])
    return new_house_df

In [26]:
# loading the transformer error metrics (until now) from csv and adding the new house (as a new row)
def update_transformer_error_metrics(new_house_df, model_name, path_to_transformer_error_metrics='/content/drive/Othercomputers/My_Laptop/code/updated_transformers/transformer_error_metrics.csv'):
    # load the existing dataframe from csv file
    try:
        transformer_error_metrics = pd.read_csv(path_to_transformer_error_metrics, index_col=0)
    except FileNotFoundError:
        # if the file does not exist, creates a new dataframe
        print("File not found. Creating a new one.")
        transformer_error_metrics = new_house_df # there is no file, so the 'new' is made the 'transformer_error_metrics' file by default
        # save the updated dataframe back to csv file
        transformer_error_metrics.to_csv(path_to_transformer_error_metrics)
        return transformer_error_metrics

    # align columns between existing dataframe and the new dataframe and then concatenate
    new_house_df = new_house_df.reindex(columns=transformer_error_metrics.columns, fill_value=pd.NA) # missing columns will be filled with value: 'NaN'
    # check if the model is already in the existing dataframe
    if model_name in transformer_error_metrics.index:
        # update the existing row with new values
        transformer_error_metrics.loc[model_name] = new_house_df.iloc[0]
    else:
        # append the new row if the model does not exist
        transformer_error_metrics = pd.concat([transformer_error_metrics, new_house_df], axis=0)

    # save the updated dataframe back to csv file
    transformer_error_metrics = transformer_error_metrics.sort_values(by=['num_of_categories', 'categories', 'num_of_houses'])
    transformer_error_metrics.to_csv(path_to_transformer_error_metrics)

    return transformer_error_metrics

In [27]:
#path_to_transformer_error_metrics = '/content/drive/Othercomputers/My_Laptop/code/updated_transformers/transformer_error_metrics.csv'
#transformer_error_metrics = pd.read_csv(path_to_transformer_error_metrics, index_col=0)
#transformer_error_metrics = transformer_error_metrics.sort_values(by=['num_of_categories', 'categories', 'num_of_houses'])
#print(transformer_error_metrics.to_string(index=True))

In [28]:
# do it all completely
def transformer_model_complete(train_data, val_data, input_categories, num_of_houses, batch_size):
    print("splitting data")
    selected_houses = houses[:num_of_houses]
    input_train, decoder_input_train, target_train, input_val, decoder_input_val, target_val = get_split_data(train_data, val_data, input_categories, selected_houses)
    print("getting datasets")
    train_dataset, val_dataset = get_datasets(input_train, decoder_input_train, target_train, input_val, decoder_input_val, target_val, batch_size=batch_size)
    print("printing shapes and getting num_of_categories")
    num_of_categories = print_shapes(input_train, decoder_input_train, target_train, input_val, decoder_input_val, target_val)
    print("making transformer model")
    model = make_transformer_model()
    print("training transformer model")
    model, history = train_transformer_model(model, train_dataset, val_dataset)
    print("the training plots")
    training_plots(history)
    #print("saving the model and the training history")
    #save_transformer_model(model, history, input_categories, num_of_houses)
    return model, history, input_categories, num_of_houses

def doing_testing(model, input_categories, num_of_houses, test_values_per_house, test_input_per_house, test_decoder_input_per_house, category='blr_mod_lvl', save_path='/content/drive/Othercomputers/My_Laptop/code/updated_transformers/transformer_predictions/', path_to_transformer_error_metrics='/content/drive/Othercomputers/My_Laptop/code/updated_transformers/transformer_error_metrics.csv'):
    num_of_categories = len(input_categories)
    categories_used = '_'.join(input_categories)
    houses_used = str(num_of_houses)
    model_name = 'transformer_'+categories_used+"_houses_"+houses_used
    print("getting error metrics")
    error_metrics_per_house = get_prediction_metrics_per_house(model, model_name, num_of_houses, test_values_per_house, test_input_per_house, test_decoder_input_per_house, category, save_path)
    new_house_df = get_metrics_dataframe(model_name, error_metrics_per_house, categories_used, num_of_houses, num_of_categories)
    print("updating the transformer error metrics")
    transformer_error_metrics = update_transformer_error_metrics(new_house_df, model_name, path_to_transformer_error_metrics) # path for load/store is already given
    print("the new transformer error metrics file:")
    print(transformer_error_metrics.to_string(index=True))
    print("--------------------")
    print("finished")


In [29]:
print(prediction_categories)

input_categories = [prediction_categories[0]]

num_of_houses = 1
#num_of_houses = 7
#num_of_houses = len(houses)

possible_num_of_houses = [1, 4, len(houses)] # 1 house, 4 houses (which is about 1/8 of all 28 houses), all houses (28 houses)

print(input_categories)
print(num_of_houses)
print(possible_num_of_houses)

['blr_mod_lvl', 'absorption', 'insulation', 't_r_set', 't_out']
['blr_mod_lvl']
1
[1, 4, 28]


In [30]:
print(prediction_categories[:1])
print(possible_num_of_houses[:1])

['blr_mod_lvl']
[1]


In [31]:
# !!! all models have been trained and saved (problems with loading saved models) !!!
# !!! geting error_metrics !!!
batch_size=8
#batch_size=16 # it is a bit slow, for greater batch_sizes there is error

# ! Metrics are shown for the last batch

for input_category in prediction_categories[:1]:
    input_categories = [input_category]
    #test_values_per_house, test_dataset_per_house = get_test_data_per_house(input_categories, batch_size=batch_size, category='blr_mod_lvl')
    test_values_per_house, test_input_per_house, test_decoder_input_per_house = get_test_data_per_house(input_categories, batch_size=batch_size, category='blr_mod_lvl')
    for num_of_houses in possible_num_of_houses[:1]:
        print("---NEW Transformer MODEL---")
        print("input_categories: "+str(input_categories))
        print("num_of_houses: "+str(num_of_houses))
        print("--------------------")
        model, history, input_categories, num_of_houses = transformer_model_complete(train_data, val_data, input_categories, num_of_houses, batch_size=batch_size)
        doing_testing(model, input_categories, num_of_houses, test_values_per_house, test_input_per_house, test_decoder_input_per_house, category='blr_mod_lvl', save_path='/content/drive/Othercomputers/My_Laptop/code/updated_transformers/transformer_predictions/', path_to_transformer_error_metrics='/content/drive/Othercomputers/My_Laptop/code/updated_transformers/transformer_error_metrics.csv')
        print("--------------------")

---NEW Transformer MODEL---
input_categories: ['blr_mod_lvl']
num_of_houses: 1
--------------------
splitting data
getting datasets
printing shapes and getting num_of_categories
For train data: input, decoder_input, target  shapes are:
(134, 1440, 1)
(134, 1440, 1)
(134, 1440, 1)
For val data: input, decoder_input, target  shapes are:
(33, 1440, 1)
(33, 1440, 1)
(33, 1440, 1)
Number of categories for prediction: 1
making transformer model
training transformer model
Epoch 1/20




InvalidArgumentError: Graph execution error:

Detected at node gradient_tape/transformer_1/decoder_1/positional_embedding_1_1/add/BroadcastGradientArgs defined at (most recent call last):
<stack traces unavailable>
Incompatible shapes: [8,64] vs. [1,1440,64]

Stack trace for op definition: 
File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>
File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelapp.py", line 619, in start
File "/usr/local/lib/python3.10/dist-packages/tornado/platform/asyncio.py", line 195, in start
File "/usr/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
File "/usr/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
File "/usr/lib/python3.10/asyncio/events.py", line 80, in _run
File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 685, in <lambda>
File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 738, in _run_callback
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 825, in inner
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run
File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 377, in dispatch_queue
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 250, in wrapper
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 748, in __init__
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 786, in run
File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 361, in process_one
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper
File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 261, in dispatch_shell
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper
File "/usr/local/lib/python3.10/dist-packages/ipykernel/kernelbase.py", line 539, in execute_request
File "/usr/local/lib/python3.10/dist-packages/tornado/gen.py", line 234, in wrapper
File "/usr/local/lib/python3.10/dist-packages/ipykernel/ipkernel.py", line 302, in do_execute
File "/usr/local/lib/python3.10/dist-packages/ipykernel/zmqshell.py", line 539, in run_cell
File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 2975, in run_cell
File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell
File "/usr/local/lib/python3.10/dist-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner
File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3257, in run_cell_async
File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes
File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
File "<ipython-input-31-6e2bbd9fd975>", line 17, in <cell line: 8>
File "<ipython-input-28-1e7803f8e847>", line 13, in transformer_model_complete
File "<ipython-input-18-c59fd515f6ae>", line 10, in train_transformer_model
File "/usr/local/lib/python3.10/dist-packages/keras/src/utils/traceback_utils.py", line 117, in error_handler
File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 318, in fit
File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 121, in one_step_on_iterator
File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 108, in one_step_on_data
File "/usr/local/lib/python3.10/dist-packages/keras/src/backend/tensorflow/trainer.py", line 70, in train_step

	 [[{{node gradient_tape/transformer_1/decoder_1/positional_embedding_1_1/add/BroadcastGradientArgs}}]]
	tf2xla conversion failed while converting __inference_one_step_on_data_41446[]. Run with TF_DUMP_GRAPH_PREFIX=/path/to/dump/dir and --vmodule=xla_compiler=2 to obtain a dump of the compiled functions.
	 [[StatefulPartitionedCall]] [Op:__inference_one_step_on_iterator_42561]

In [None]:
path_to_transformer_error_metrics = '/content/drive/Othercomputers/My_Laptop/code/updated_transformers/transformer_error_metrics.csv'
transformer_error_metrics = pd.read_csv(path_to_transformer_error_metrics, index_col=0)
transformer_error_metrics = transformer_error_metrics.sort_values(by=['num_of_houses', 'categories', 'num_of_categories'])
print(transformer_error_metrics.to_string(index=True))

**------------------------------------------------------------------**

---
**-------------------Μερικές Δοκιμές---------------------------**

---
**-------------------------------------------------------------------**