# MODEL 2: LSTM solution

In this exercise, we are asked to build a `Seq2Vec` model using at least one nn.LSTM() layer. Using the values of a set of features at timepoints $t-k$ through $t-1$, we are asked to predict the following targets at timepoint $t$: 

- `p(mbar)`, atmospheric pressure
- `T (degC)`, air temperature
- `rh (%)`, relative humidity
- `wv(m/s)`, wind velocity

For simplicity, we will first initialize this model at  $k=4$. Then, once we get that model working, we will set  `k`  as a hyperparameter and iterate through `{4, 6, 8}`, along with two learning rate values `{0.01, 0.001}` and batch size `{32, 64}` .

In [1]:
# First we will import the necessary dependencies
%matplotlib inline
import torch
import torch.nn as nn
import torch.optim as optim
import torchmetrics
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
from torch.utils.data import SubsetRandomSampler
import pytorch_lightning as L
import torch.utils.data
import datetime

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score

In [2]:
# We can check the current working directory
import os
print(os.getcwd())

# We determine whether CUDA is available 
if torch.cuda.is_available():
    device = torch.device("cuda")
    print (f"Using GPU: {torch.cuda.get_device_name(device)}")
else: 
    device = torch.device("cpu")
    print (f"Using CPU")

# We empty the cuda cache for good measure and make sure both the cache and the RAM are at zero
torch.cuda.empty_cache()
print(f"CUDA cache used: {torch.cuda.memory_reserved() / 1e6:.1f} MB")
print(f"CUDA RAM used: {torch.cuda.memory_allocated() / 1e6:.1f} MB")

C:\Users\usuario\RecogniChess
Using GPU: NVIDIA GeForce GTX 960M
CUDA cache used: 0.0 MB
CUDA RAM used: 0.0 MB


Before we do anything else, we will import the data and take a look at it:

In [3]:
# We import the training data
df = pd.read_csv(r"C:\Users\usuario\RecogniChess\HW4\weather_train.csv", header=0, parse_dates=[0])
print(df.shape)

(56072, 15)


In [4]:
df.dropna(inplace=True)
print(df.shape)

(56072, 15)


In [5]:
print(df.shape)

(56072, 15)


In [6]:
# Let's take a look
df.tail()

Unnamed: 0,Date Time,p (mbar),T (degC),Tpot (K),Tdew (degC),rh (%),VPmax (mbar),VPact (mbar),VPdef (mbar),sh (g/kg),H2OC (mmol/mol),rho (g/m**3),wv (m/s),max. wv (m/s),wd (deg)
56067,2015-05-24 14:00:00,991.94,17.16,290.99,9.83,62.0,19.6,12.15,7.45,7.66,12.25,1184.73,3.0,4.6,32.83
56068,2015-05-24 15:00:00,991.52,17.77,291.64,9.29,57.52,20.37,11.72,8.66,7.38,11.82,1181.94,0.88,2.28,99.4
56069,2015-05-24 16:00:00,990.96,19.4,293.32,9.07,51.17,22.56,11.55,11.02,7.28,11.65,1174.77,0.62,1.36,143.0
56070,2015-05-24 17:00:00,990.56,19.03,292.98,9.04,52.25,22.05,11.52,10.53,7.27,11.63,1175.79,0.79,1.36,197.1
56071,2015-05-24 18:00:00,990.23,18.46,292.44,8.97,53.89,21.28,11.47,9.81,7.23,11.58,1177.72,1.11,2.62,49.24


Data looks good, but we need to apply a couple of changes: 
- First, we need to separate out the first column, which doesn't hold feature values but rather keeps time
- Second, we seriously need to minmax scale the other columns because they're on vastly different scales right now.

In [7]:
# First, we will turn the first column into a Pandas datetime object
df['Date Time'] = pd.to_datetime(df['Date Time'])

In [8]:
# Filter out entries whose time is not at noon, 6 PM, midnight, or 6 AM
df = df[(df['Date Time'].dt.hour == 12) | (df['Date Time'].dt.hour == 18) | (df['Date Time'].dt.hour == 0) | (df['Date Time'].dt.hour == 6)]

# Add a new column indicating whether the hour is 12/18 or 0/6
df['Time of Day'] = df['Date Time'].apply(lambda x: 1 if x.hour in [12, 18] else 0)

# Map months to time of year
def map_month_to_time_of_year(month):
    if month in range(1, 4):
        return 0
    elif month in range(4, 7):
        return 1
    elif month in range(7, 10):
        return 2
    else:
        return 3
    
# Add a new column indicating the time of year
df['Time of Year'] = df['Date Time'].apply(lambda x: map_month_to_time_of_year(x.month))

# Reset the index of the resulting dataframe
df = df.reset_index(drop=True)

In [9]:
df.head()

Unnamed: 0,Date Time,p (mbar),T (degC),Tpot (K),Tdew (degC),rh (%),VPmax (mbar),VPact (mbar),VPdef (mbar),sh (g/kg),H2OC (mmol/mol),rho (g/m**3),wv (m/s),max. wv (m/s),wd (deg),Time of Day,Time of Year
0,2009-01-01 06:00:00,997.71,-9.67,263.66,-10.62,92.7,2.93,2.71,0.21,1.69,2.72,1317.71,0.05,0.5,146.0,0,0
1,2009-01-01 12:00:00,1000.3,-6.87,266.27,-8.28,89.6,3.64,3.27,0.38,2.03,3.26,1306.98,1.84,2.63,184.4,1,0
2,2009-01-01 18:00:00,1000.16,-5.25,267.9,-6.75,89.1,4.13,3.68,0.45,2.29,3.68,1298.68,0.55,1.0,183.7,1,0
3,2009-02-01 00:00:00,999.59,-4.54,268.65,-5.46,93.2,4.36,4.06,0.3,2.53,4.06,1294.33,0.41,0.88,155.0,0,0
4,2009-02-01 06:00:00,998.0,-4.43,268.88,-4.94,96.2,4.39,4.23,0.17,2.64,4.23,1291.66,0.81,2.13,40.88,0,0


We see that we only have samples collected every 6 hours. This will help our model look further into the past and learn weather patterns that operate on a timeframe larger than 8 hours.

In [10]:
# Now we MinMax scale every column except the first column
scaler = MinMaxScaler(feature_range=(-1, 1)) # This is the scaler we will use to scale every column to values from 0 to 1
df_scaled = pd.DataFrame(scaler.fit_transform(df.iloc[:, 1:]), columns=df.columns[1:]) # Take all columns except the first
df_scaled.insert(0, 'Date Time', df['Date Time'], True) # Insert the first column into the new dataframe

In [11]:
# Let's take a look again
df_scaled.tail(7)

Unnamed: 0,Date Time,p (mbar),T (degC),Tpot (K),Tdew (degC),rh (%),VPmax (mbar),VPact (mbar),VPdef (mbar),sh (g/kg),H2OC (mmol/mol),rho (g/m**3),wv (m/s),max. wv (m/s),wd (deg),Time of Day,Time of Year
9338,2015-05-23 06:00:00,0.421768,0.072778,0.064858,0.247344,0.406065,-0.63505,-0.402344,-0.863801,-0.412844,-0.40792,-0.162511,-0.819259,-0.853617,-0.174215,-1.0,-0.333333
9339,2015-05-23 12:00:00,0.378232,0.418125,0.413319,0.31021,-0.399773,-0.271183,-0.332031,-0.389436,-0.341896,-0.337178,-0.467809,-0.534815,-0.445106,-0.90264,1.0,-0.333333
9340,2015-05-23 18:00:00,0.343755,0.373338,0.371935,0.418166,-0.080156,-0.330443,-0.195312,-0.56551,-0.204893,-0.200308,-0.446826,-0.86963,-0.836596,0.181439,1.0,-0.333333
9341,2015-05-24 00:00:00,0.400438,0.223933,0.217527,0.458053,0.496665,-0.500632,-0.139062,-0.846194,-0.15107,-0.145713,-0.30897,-0.44,-0.515745,-0.873743,-1.0,-0.333333
9342,2015-05-24 06:00:00,0.408327,0.163751,0.157016,0.390418,0.511765,-0.558085,-0.232031,-0.866908,-0.244037,-0.238754,-0.251897,-0.774815,-0.768511,-0.751542,-1.0,-0.333333
9343,2015-05-24 12:00:00,0.400438,0.320854,0.314206,0.458053,0.165723,-0.395122,-0.139062,-0.694977,-0.15107,-0.145713,-0.389753,-0.751111,-0.748085,-0.259794,1.0,-0.333333
9344,2015-05-24 18:00:00,0.321841,0.420224,0.42097,0.435508,-0.160438,-0.268293,-0.171094,-0.491973,-0.181651,-0.175702,-0.492149,-0.835556,-0.777021,-0.726368,1.0,-0.333333


Every variable is now on a scale from -1 to 1, which will make it much easier for our model to learn. Let us now subset the variables that we want. We will keep the model simple and just take the absolute minimum number of variables possible. That is, we will take only the targets and the two custom variables,  and use their previous timepoints to predict themselves. First, let's subset them:

In [12]:
# In comments are all the options tried, the reader may ignore these

# df = df_scaled[['Date Time', 'p (mbar)','T (degC)', 'rh (%)', 'wv (m/s)', 'Time of Day', 'Time of Year','Tpot (K)', 'Tdew (degC)', 'VPact (mbar)', 'VPdef (mbar)','max. wv (m/s)']] # Create a new dataframe that subsets the scaled dataframe
# df = df_scaled[['Date Time', 'p (mbar)','T (degC)', 'rh (%)', 'wv (m/s)', 'Time of Day', 'Time of Year','Tdew (degC)', 'VPact (mbar)', 'VPdef (mbar)','max. wv (m/s)']] # Create a new dataframe that subsets the scaled dataframe
# df_scaled = df_scaled[['Date Time', 'p (mbar)','T (degC)', 'rh (%)', 'wv (m/s)', 'Time of Day', 'Time of Year', 'VPact (mbar)', 'VPdef (mbar)','max. wv (m/s)']]
# df_scaled = df_scaled[['Date Time', 'p (mbar)','T (degC)', 'rh (%)', 'wv (m/s)', 'Time of Day', 'Time of Year', 'VPdef (mbar)','max. wv (m/s)']]
# df_scaled = df_scaled[['Date Time', 'p (mbar)','T (degC)', 'rh (%)', 'wv (m/s)', 'Time of Day', 'Time of Year', 'max. wv (m/s)']]

df_scaled = df_scaled[['Date Time', 'p (mbar)','T (degC)', 'rh (%)', 'wv (m/s)', 'Time of Day', 'Time of Year']]

df_scaled.tail() # We take a look

Unnamed: 0,Date Time,p (mbar),T (degC),rh (%),wv (m/s),Time of Day,Time of Year
9340,2015-05-23 18:00:00,0.343755,0.373338,-0.080156,-0.86963,1.0,-0.333333
9341,2015-05-24 00:00:00,0.400438,0.223933,0.496665,-0.44,-1.0,-0.333333
9342,2015-05-24 06:00:00,0.408327,0.163751,0.511765,-0.774815,-1.0,-0.333333
9343,2015-05-24 12:00:00,0.400438,0.320854,0.165723,-0.751111,1.0,-0.333333
9344,2015-05-24 18:00:00,0.321841,0.420224,-0.160438,-0.835556,1.0,-0.333333


From inspection the plots of the four targets, we see that there are some anomalies present in the data occurring when the year 2015 begins. Thus, we will trim off the samples corresponding to the year 2015, as we do not want to contaminate our validation set. 

In [13]:
df_scaled = df_scaled[df_scaled['Date Time'].dt.year != 2015] # Remove all rows whose year is 2015
df_scaled.reset_index(drop=True, inplace=True) # Reset the index of the resulting dataframe

In [14]:
df_scaled.tail() # We take a look

Unnamed: 0,Date Time,p (mbar),T (degC),rh (%),wv (m/s),Time of Day,Time of Year
8764,2014-12-30 18:00:00,0.709861,-0.176697,0.85655,-0.568889,1.0,1.0
8765,2014-12-31 00:00:00,0.741709,-0.172148,0.748333,-0.685926,-1.0,1.0
8766,2014-12-31 06:00:00,0.737911,-0.189293,0.781049,-0.96,-1.0,1.0
8767,2014-12-31 12:00:00,0.756026,-0.122113,0.710583,-0.875556,1.0,1.0
8768,2014-12-31 18:00:00,0.774142,-0.140308,0.949667,-0.885926,1.0,1.0


The trimming worked, so we are now ready to create columns containing the prior timepoints:

In [15]:
from copy import deepcopy as dc

def prepare_dataframe_for_lstm(df, n_steps):
    # We first make a deep copy of the original dataframe to avoid any unintentional changes to the original DataFrame
    df = dc(df_scaled)
    
    # Now we set the DateTime column as the index
    df.set_index('Date Time', inplace=True)
    
    # We add columns for past values of pressure, temperature, relative humidity, and wind speed
    for i in range(1, n_steps+1):
        df[f'p (mbar)(t-{i})'] = df['p (mbar)'].shift(i)

    for i in range(1, n_steps+1):
        df[f'T (degC)(t-{i})'] = df['T (degC)'].shift(i)

    for i in range(1, n_steps+1):
        df[f'rh (%)(t-{i})'] = df['rh (%)'].shift(i)

    for i in range(1, n_steps+1):
        df[f'wv (m/s)(t-{i})'] = df['wv (m/s)'].shift(i)

    for i in range(1, n_steps+1):
        df[f'Time of Day(t-{i})'] = df['Time of Day'].shift(i)
        
    for i in range(1, n_steps+1):
        df[f'Time of Year(t-{i})'] = df['Time of Year'].shift(i)

#     for i in range(1, n_steps+1):
#         df[f'Tpot (K)(t-{i})'] = df['Tpot (K)'].shift(i)

#     for i in range(1, n_steps+1):
#         df[f'Tdew (degC)(t-{i})'] = df['Tdew (degC)'].shift(i)

#     for i in range(1, n_steps+1):
#         df[f'VPact (mbar)(t-{i})'] = df['VPact (mbar)'].shift(i)

#     for i in range(1, n_steps+1):
#         df[f'VPdef (mbar)(t-{i})'] = df['VPdef (mbar)'].shift(i)

#     for i in range(1, n_steps+1):
#         df[f'max. wv (m/s)(t-{i})'] = df['max. wv (m/s)'].shift(i)

    # It is also important to drop rows with any missing values
    df.dropna(inplace=True)
    
    # Return the shifted dataframe
    return df

# Define the lookback window size, which is equivalent to 'k' steps in our case
lookback = 4

# Prepare the dataframe for the LSTM model
shifted_df = prepare_dataframe_for_lstm(df, lookback)

# Return the shifted dataframe
shifted_df


Unnamed: 0_level_0,p (mbar),T (degC),rh (%),wv (m/s),Time of Day,Time of Year,p (mbar)(t-1),p (mbar)(t-2),p (mbar)(t-3),p (mbar)(t-4),...,wv (m/s)(t-3),wv (m/s)(t-4),Time of Day(t-1),Time of Day(t-2),Time of Day(t-3),Time of Day(t-4),Time of Year(t-1),Time of Year(t-2),Time of Year(t-3),Time of Year(t-4)
Date Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-02-01 06:00:00,0.548868,-0.380686,0.904366,-0.880000,-1.0,-1.0,0.595325,0.611980,0.616070,0.540394,...,-0.727407,-0.992593,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0
2009-02-01 12:00:00,0.575457,-0.334850,0.750849,-0.771852,1.0,-1.0,0.548868,0.595325,0.611980,0.616070,...,-0.918519,-0.727407,-1.0,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0
2009-02-01 18:00:00,0.628926,-0.333100,0.806216,-0.560000,1.0,-1.0,0.575457,0.548868,0.595325,0.611980,...,-0.939259,-0.918519,1.0,-1.0,-1.0,1.0,-1.0,-1.0,-1.0,-1.0
2009-03-01 00:00:00,0.675091,-0.390483,0.748333,-0.789630,-1.0,-1.0,0.628926,0.575457,0.548868,0.595325,...,-0.880000,-0.939259,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
2009-03-01 06:00:00,0.644996,-0.512596,0.836416,-0.930370,-1.0,-1.0,0.675091,0.628926,0.575457,0.548868,...,-0.771852,-0.880000,-1.0,1.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2014-12-30 18:00:00,0.709861,-0.176697,0.856550,-0.568889,1.0,1.0,0.710738,0.733820,0.820015,0.755150,...,-0.890370,-0.619259,1.0,-1.0,-1.0,1.0,1.0,1.0,1.0,1.0
2014-12-31 00:00:00,0.741709,-0.172148,0.748333,-0.685926,-1.0,1.0,0.709861,0.710738,0.733820,0.820015,...,-0.727407,-0.890370,1.0,1.0,-1.0,-1.0,1.0,1.0,1.0,1.0
2014-12-31 06:00:00,0.737911,-0.189293,0.781049,-0.960000,-1.0,1.0,0.741709,0.709861,0.710738,0.733820,...,-0.817778,-0.727407,-1.0,1.0,1.0,-1.0,1.0,1.0,1.0,1.0
2014-12-31 12:00:00,0.756026,-0.122113,0.710583,-0.875556,1.0,1.0,0.737911,0.741709,0.709861,0.710738,...,-0.568889,-0.817778,-1.0,-1.0,1.0,1.0,1.0,1.0,1.0,1.0


That worked, but the problem is that columns are ordered from $t-1$ to $t-k$, which is the opposite direction we want. We want to feed the furthest timepoints first and end with the most recent timepoint. Moreover, as it stands, columns are ordered by feature, not by timepoint, so we need to fix these two things: 

In [16]:
# Define a list of timepoints to group by
timepoints = [f't-{i}' for i in range(lookback, 0, -1)]

# In comments below are all the options tried. The reader may ignore these

# # Group the shifted columns by timepoint
# all_shifted_cols = []
# for timepoint in timepoints:
#     columns = [f'{col}({timepoint})' for col in ['p (mbar)', 'T (degC)', 'rh (%)', 'wv (m/s)','Time of Day', 'Time of Year','Tpot (K)', 'Tdew (degC)', 'VPact (mbar)', 'VPdef (mbar)', 'max. wv (m/s)']]
#     all_shifted_cols.extend(columns)

# # Group the shifted columns by timepoint
# all_shifted_cols = []
# for timepoint in timepoints:
#     columns = [f'{col}({timepoint})' for col in ['p (mbar)', 'T (degC)', 'rh (%)', 'wv (m/s)','Time of Day', 'Time of Year','Tdew (degC)', 'VPact (mbar)', 'VPdef (mbar)', 'max. wv (m/s)']]
#     all_shifted_cols.extend(columns)
    
# # Group the shifted columns by timepoint
# all_shifted_cols = []
# for timepoint in timepoints:
#     columns = [f'{col}({timepoint})' for col in ['p (mbar)', 'T (degC)', 'rh (%)', 'wv (m/s)','Time of Day', 'Time of Year', 'VPact (mbar)', 'VPdef (mbar)', 'max. wv (m/s)']]
#     all_shifted_cols.extend(columns)
    
# # Group the shifted columns by timepoint
# all_shifted_cols = []
# for timepoint in timepoints:
#     columns = [f'{col}({timepoint})' for col in ['p (mbar)', 'T (degC)', 'rh (%)', 'wv (m/s)','Time of Day', 'Time of Year', 'VPdef (mbar)', 'max. wv (m/s)']]
#     all_shifted_cols.extend(columns)  
    
# # Group the shifted columns by timepoint
# all_shifted_cols = []
# for timepoint in timepoints:
#     columns = [f'{col}({timepoint})' for col in ['p (mbar)', 'T (degC)', 'rh (%)', 'wv (m/s)','Time of Day', 'Time of Year', 'max. wv (m/s)']]
#     all_shifted_cols.extend(columns)



# Group the shifted columns by timepoint
all_shifted_cols = []
for timepoint in timepoints:
    columns = [f'{col}({timepoint})' for col in ['p (mbar)', 'T (degC)', 'rh (%)', 'wv (m/s)','Time of Day', 'Time of Year']]
    all_shifted_cols.extend(columns)

# # Concatenate the target columns and the grouped columns
# target_cols = ['p (mbar)', 'T (degC)', 'rh (%)', 'wv (m/s)']
# feature_cols = ['Time of Day', 'Time of Year','Tpot (K)', 'Tdew (degC)', 'VPact (mbar)', 'VPdef (mbar)', 'max. wv (m/s)']
# all_cols = target_cols + feature_cols + all_shifted_cols

# # Concatenate the target columns and the grouped columns
# target_cols = ['p (mbar)', 'T (degC)', 'rh (%)', 'wv (m/s)']
# feature_cols = ['Time of Day', 'Time of Year','Tdew (degC)', 'VPact (mbar)', 'VPdef (mbar)', 'max. wv (m/s)']
# all_cols = target_cols + feature_cols + all_shifted_cols

# # Concatenate the target columns and the grouped columns
# target_cols = ['p (mbar)', 'T (degC)', 'rh (%)', 'wv (m/s)']
# feature_cols = ['Time of Day', 'Time of Year', 'VPact (mbar)', 'VPdef (mbar)', 'max. wv (m/s)']
# all_cols = target_cols + feature_cols + all_shifted_cols

# # Concatenate the target columns and the grouped columns
# target_cols = ['p (mbar)', 'T (degC)', 'rh (%)', 'wv (m/s)']
# feature_cols = ['Time of Day', 'Time of Year', 'VPdef (mbar)', 'max. wv (m/s)']
# all_cols = target_cols + feature_cols + all_shifted_cols

# # Concatenate the target columns and the grouped columns
# target_cols = ['p (mbar)', 'T (degC)', 'rh (%)', 'wv (m/s)']
# feature_cols = ['Time of Day', 'Time of Year', 'max. wv (m/s)']
# all_cols = target_cols + feature_cols + all_shifted_cols


# Concatenate the target columns and the grouped columns
target_cols = ['p (mbar)', 'T (degC)', 'rh (%)', 'wv (m/s)']
feature_cols = ['Time of Day', 'Time of Year']
all_cols = target_cols + feature_cols + all_shifted_cols

In [17]:
# Let us take a look at the final variable 'all_cols'
all_cols

['p (mbar)',
 'T (degC)',
 'rh (%)',
 'wv (m/s)',
 'Time of Day',
 'Time of Year',
 'p (mbar)(t-4)',
 'T (degC)(t-4)',
 'rh (%)(t-4)',
 'wv (m/s)(t-4)',
 'Time of Day(t-4)',
 'Time of Year(t-4)',
 'p (mbar)(t-3)',
 'T (degC)(t-3)',
 'rh (%)(t-3)',
 'wv (m/s)(t-3)',
 'Time of Day(t-3)',
 'Time of Year(t-3)',
 'p (mbar)(t-2)',
 'T (degC)(t-2)',
 'rh (%)(t-2)',
 'wv (m/s)(t-2)',
 'Time of Day(t-2)',
 'Time of Year(t-2)',
 'p (mbar)(t-1)',
 'T (degC)(t-1)',
 'rh (%)(t-1)',
 'wv (m/s)(t-1)',
 'Time of Day(t-1)',
 'Time of Year(t-1)']

Feature columns are now grouped by tiempoint and the direction of time has been flipped. Good progress, but now we must delete the custom features at timepoint $t$, as we are not allowed to use current timepoints.

In [18]:
# First we udpate the df
shifted_df = shifted_df[all_cols]

In [19]:
#shifted_df = shifted_df.drop(shifted_df.columns[4:11], axis=1)
# shifted_df = shifted_df.drop(shifted_df.columns[4:10], axis=1)
#shifted_df = shifted_df.drop(shifted_df.columns[4:9], axis=1)
# shifted_df = shifted_df.drop(shifted_df.columns[4:8], axis=1)
# shifted_df = shifted_df.drop(shifted_df.columns[4:7], axis=1)

shifted_df = shifted_df.drop(shifted_df.columns[4:6], axis=1) # We delete the custom features at timpoint t

In [20]:
shifted_df

Unnamed: 0_level_0,p (mbar),T (degC),rh (%),wv (m/s),p (mbar)(t-4),T (degC)(t-4),rh (%)(t-4),wv (m/s)(t-4),Time of Day(t-4),Time of Year(t-4),...,rh (%)(t-2),wv (m/s)(t-2),Time of Day(t-2),Time of Year(t-2),p (mbar)(t-1),T (degC)(t-1),rh (%)(t-1),wv (m/s)(t-1),Time of Day(t-1),Time of Year(t-1)
Date Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-02-01 06:00:00,0.548868,-0.380686,0.904366,-0.880000,0.540394,-0.564031,0.816283,-0.992593,-1.0,-1.0,...,0.725683,-0.918519,1.0,-1.0,0.595325,-0.384535,0.828866,-0.939259,-1.0,-1.0
2009-02-01 12:00:00,0.575457,-0.334850,0.750849,-0.771852,0.616070,-0.466060,0.738266,-0.727407,1.0,-1.0,...,0.828866,-0.939259,-1.0,-1.0,0.548868,-0.380686,0.904366,-0.880000,-1.0,-1.0
2009-02-01 18:00:00,0.628926,-0.333100,0.806216,-0.560000,0.611980,-0.409377,0.725683,-0.918519,1.0,-1.0,...,0.904366,-0.880000,-1.0,-1.0,0.575457,-0.334850,0.750849,-0.771852,1.0,-1.0
2009-03-01 00:00:00,0.675091,-0.390483,0.748333,-0.789630,0.595325,-0.384535,0.828866,-0.939259,-1.0,-1.0,...,0.750849,-0.771852,1.0,-1.0,0.628926,-0.333100,0.806216,-0.560000,1.0,-1.0
2009-03-01 06:00:00,0.644996,-0.512596,0.836416,-0.930370,0.548868,-0.380686,0.904366,-0.880000,-1.0,-1.0,...,0.806216,-0.560000,1.0,-1.0,0.675091,-0.390483,0.748333,-0.789630,-1.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2014-12-30 18:00:00,0.709861,-0.176697,0.856550,-0.568889,0.755150,-0.339048,0.675349,-0.619259,1.0,1.0,...,0.881716,-0.727407,-1.0,1.0,0.710738,-0.305808,0.891783,-0.817778,1.0,1.0
2014-12-31 00:00:00,0.741709,-0.172148,0.748333,-0.685926,0.820015,-0.403079,0.735749,-0.890370,-1.0,1.0,...,0.891783,-0.817778,1.0,1.0,0.709861,-0.176697,0.856550,-0.568889,1.0,1.0
2014-12-31 06:00:00,0.737911,-0.189293,0.781049,-0.960000,0.733820,-0.363891,0.881716,-0.727407,-1.0,1.0,...,0.856550,-0.568889,1.0,1.0,0.741709,-0.172148,0.748333,-0.685926,-1.0,1.0
2014-12-31 12:00:00,0.756026,-0.122113,0.710583,-0.875556,0.710738,-0.305808,0.891783,-0.817778,1.0,1.0,...,0.748333,-0.685926,-1.0,1.0,0.737911,-0.189293,0.781049,-0.960000,-1.0,1.0


That looks good. The first four columns contain our targets at timepoint $t$. Then, the other columns contain the $t-k$ ... $t-1$ values for each target variable. We're not done yet, though. Now we can turn this data into a Numpy array:

In [21]:
# We convert to numpy and print
shifted_df_as_np = shifted_df.to_numpy()
print(shifted_df_as_np)

[[ 0.54886779 -0.38068579  0.90436643 ... -0.93925926 -1.
  -1.        ]
 [ 0.57545654 -0.33484955  0.75084938 ... -0.88       -1.
  -1.        ]
 [ 0.62892622 -0.33310007  0.80621618 ... -0.77185185  1.
  -1.        ]
 ...
 [ 0.73791088 -0.18929321  0.78104945 ... -0.68592593 -1.
   1.        ]
 [ 0.7560263  -0.12211337  0.71058261 ... -0.96       -1.
   1.        ]
 [ 0.77414171 -0.14030791  0.94966654 ... -0.87555556  1.
   1.        ]]


In [22]:
# Let's take a look at the shape
shifted_df_as_np.shape

(8765, 28)

We have quite a few rows and 28 columns. That is, 4 targets in the first 4 columns plus $4*k$ columns which equals 24 in this case because we set $k$ to be equal to 6. Now we need to divide this into our `X` and `y` variables. For `X` we will take all columns but the first 4 and for `y` we will take just the first 4:

In [23]:
# We define X and y as mentioned
X = shifted_df_as_np[:, 4:]
y = shifted_df_as_np[:, :4]
# Print the shapes to inspect them
X.shape, y.shape

((8765, 24), (8765, 4))

In [25]:
# Now we will split into train and val objects
split_index = int(len(X) * 0.9) # We define and index
print(split_index)

7888


In [26]:
# Here we will use the index to split our Xs and ys:
X_train = X[:split_index]
X_test = X[split_index:]

y_train = y[:split_index]
y_test = y[split_index:]

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) # Let us inspect the shapes of the splits

(7888, 24) (877, 24) (7888, 4) (877, 4)


The dimension match perfectly for Xs and their respective ys. Now we can reshape the data in the following way:

In [27]:
# We essentially want number_of_samples, lookback_size(k), and number of features for the Xs
X_train = X_train.reshape(-1, 4, 6)
X_test = X_test.reshape(-1, 4, 6)

print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(7888, 4, 6) (877, 4, 6)
(7888, 4) (877, 4)


In [28]:
# Pytorch only deals with tensors, so we need to convert these last np arrays into torch floats:
X_train = torch.tensor(X_train).float()
y_train = torch.tensor(y_train).float()
X_test = torch.tensor(X_test).float()
y_test = torch.tensor(y_test).float()

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape) # Inspect the shape once again

torch.Size([7888, 4, 6]) torch.Size([877, 4, 6]) torch.Size([7888, 4]) torch.Size([877, 4])


Everything looks good now. The next steps are easy since we've already done a lot of pre-processing legwork. Essentially, we just need to pass our Xs and ys into a Dataset class and then wrap the Dataset class into a DataLoader object that will batch our data nicely:

In [29]:
from torch.utils.data import Dataset

class TimeSeriesDataset(Dataset): # Nothing fancy here, just making sure we can access length and individual items
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]
    
train_dataset = TimeSeriesDataset(X_train, y_train)
test_dataset = TimeSeriesDataset(X_test, y_test)

In [30]:
from torch.utils.data import DataLoader

batch_size =  32 # We will try batch_size values of 64 and 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) 
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [31]:
# One last sanity check. Let us make sure we can retrieve batches and that they have the right size: 
for _, batch in enumerate(train_loader):
    x_batch, y_batch = batch[0].to(device), batch[1].to(device)
    print(x_batch.shape, y_batch.shape)
    break

torch.Size([32, 4, 6]) torch.Size([32, 4])


Great, the first dimension of both X and y now correspond to batch length, not number of n_rows * n_columns. This means that the batching has worked well. Let's verify the same for the validation dataloader (here defined as test_loader): 

In [32]:
# One last sanity check. Let us make sure we can retrieve batches and that they have the right size: 
for _, batch in enumerate(test_loader):
    x_batch, y_batch = batch[0].to(device), batch[1].to(device)
    print(x_batch.shape, y_batch.shape)
    break

torch.Size([32, 4, 6]) torch.Size([32, 4])


All good. At long last, we can now define our model class. This next part is a bit finicky in standard PyTorch, compared to Lightning or even TensorFlow, as we need to initialize the hidden state `h0` cell state `c0`. That said, since our model is not too complex, we can live with it:

In [33]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_stacked_layers):
        super().__init__()
        
        # Store the input parameters as instance variables
        self.hidden_size = hidden_size
        self.num_stacked_layers = num_stacked_layers

        # Initialize an LSTM layer with the given input size, hidden size, and number of stacked layers. Set batch_first to True.
        self.lstm = nn.LSTM(input_size, hidden_size, num_stacked_layers, 
                            batch_first=True)
               
        # Initialize a linear layer to map the hidden state output from the LSTM to a scalar output. The output of this linear layer will be the 4 predicted values.
        self.fc1 = nn.Linear(hidden_size, 4)
    
    def forward(self, x):
        h_n = torch.zeros(self.num_stacked_layers, x.size(0), self.hidden_size)
        c_n = torch.zeros(self.num_stacked_layers, x.size(0), self.hidden_size)
        # Pass the input through the LSTM layer
        output, (h_n, c_n) = self.lstm(x)

        # Pass the LSTM output through the linear layer
        output = (self.fc1(output[:, -1, :]))
        
        return output

# Instantiate a new LSTM model with input size 4, hidden size 4, and 3 stacked LSTM layers (really these should be called LSTM cells, but I don't get to name things).
model = LSTM(6, 4, 3)

# Move the model to the device specified by the 'device' variable
model.to(device)

# Print the model
print(model)


LSTM(
  (lstm): LSTM(6, 4, num_layers=3, batch_first=True)
  (fc1): Linear(in_features=4, out_features=4, bias=True)
)


Next we will define a standard training loop as a function called `train_and_validate` which will take number of epochs as input:

In [34]:
def train_and_validate(num_epochs):
    
    ##############
    # TRAINING LOOP #
    #############
      
    # We initialize lists to store training and validation losses and R2s for each epoch   
    model.train(True)
    
    for epoch in range(num_epochs):
        print(f'Epoch: {epoch + 1}')
        
        # Train the model for one epoch
        running_loss = 0.0  
        running_total = 0.0 
        running_correct = 0.0 
        
        for batch_index, batch in enumerate(train_loader):
            x_batch, y_batch = batch[0].to(device), batch[1].to(device)

            output = model(x_batch)
            loss = loss_function(output, y_batch)
            running_loss += loss.item() 
            running_total += y_batch.size(0) 

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            y_pred = output.detach().cpu().numpy()
            y_true = y_batch.detach().cpu().numpy()
            r2 = r2_score(y_true, y_pred)
            running_correct += r2*y_batch.size(0)

            if batch_index % 1000 == 99:  
                avg_loss_across_batches = running_loss / 100
                avg_r2_across_batches = running_correct / running_total
                print('Batch {0}, MAE Loss: {1:.3f}, R2: {2:.3f}'.format(batch_index+1,
                                                                    avg_loss_across_batches,
                                                                    avg_r2_across_batches))
                running_loss = 0.0 
                running_correct = 0.0 
                running_total = 0.0 

        epoch_loss = running_loss / len(train_loader) 
        epoch_r2 = running_correct / running_total
        
        train_losses.append(epoch_loss)
        train_r2s.append(epoch_r2)
        
        print('Epoch Train MAE Loss: {0:.3f}, Epoch Train R2: {1:.3f}'.format(epoch_loss, epoch_r2))  
        print()
        
        ###############
        # VALIDATION LOOP #
        ###############
        
        # Validate the model for one epoch
        model.train(False)

        running_loss = 0.0 
        running_total = 0.0 
        running_correct = 0.0 
        
        for batch_index, batch in enumerate(test_loader):
            x_batch, y_batch = batch[0].to(device), batch[1].to(device)

            with torch.no_grad():
                output = model(x_batch)
                loss = loss_function(output, y_batch)
                running_loss += loss.item()
                running_total += y_batch.size(0)

                y_pred = output.detach().cpu().numpy()
                y_true = y_batch.detach().cpu().numpy()
                r2 = r2_score(y_true, y_pred)
                running_correct += r2*y_batch.size(0)

        avg_loss_across_batches = running_loss / len(test_loader)
        avg_r2_across_batches = running_correct / running_total
        
        val_losses.append(avg_loss_across_batches)
        val_r2s.append(avg_r2_across_batches)
               
        print('Val MAE Loss: {0:.3f}'.format(avg_loss_across_batches))
        print('Val R2 Score: {0:.3f}'.format(avg_r2_across_batches))
        print('***************************************************')
        print()
        
        model.train(True)

In [None]:
### We're ready to train ! ###
learning_rate = 0.01
loss_function = nn.L1Loss() 
num_epochs = 1000
weight_decay = 1e-5
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

train_losses = []
val_losses = []
train_r2s = []
val_r2s = []

train_and_validate(num_epochs = num_epochs)

Epoch: 1
Batch 100, MAE Loss: 0.229, R2: -0.052
Epoch Train MAE Loss: 0.099, Epoch Train R2: 0.425

Val MAE Loss: 0.153
Val R2 Score: 0.415
***************************************************

Epoch: 2
Batch 100, MAE Loss: 0.147, R2: 0.505
Epoch Train MAE Loss: 0.080, Epoch Train R2: 0.552

Val MAE Loss: 0.132
Val R2 Score: 0.524
***************************************************

Epoch: 3
Batch 100, MAE Loss: 0.123, R2: 0.602
Epoch Train MAE Loss: 0.071, Epoch Train R2: 0.621

Val MAE Loss: 0.107
Val R2 Score: 0.631
***************************************************

Epoch: 4
Batch 100, MAE Loss: 0.115, R2: 0.635
Epoch Train MAE Loss: 0.068, Epoch Train R2: 0.647

Val MAE Loss: 0.103
Val R2 Score: 0.606
***************************************************

Epoch: 5
Batch 100, MAE Loss: 0.115, R2: 0.640
Epoch Train MAE Loss: 0.067, Epoch Train R2: 0.652

Val MAE Loss: 0.103
Val R2 Score: 0.653
***************************************************

Epoch: 6
Batch 100, MAE Loss: 0.112, R2

Batch 100, MAE Loss: 0.103, R2: 0.668
Epoch Train MAE Loss: 0.061, Epoch Train R2: 0.675

Val MAE Loss: 0.100
Val R2 Score: 0.655
***************************************************

Epoch: 45
Batch 100, MAE Loss: 0.104, R2: 0.676
Epoch Train MAE Loss: 0.062, Epoch Train R2: 0.677

Val MAE Loss: 0.097
Val R2 Score: 0.666
***************************************************

Epoch: 46
Batch 100, MAE Loss: 0.104, R2: 0.678
Epoch Train MAE Loss: 0.061, Epoch Train R2: 0.675

Val MAE Loss: 0.098
Val R2 Score: 0.669
***************************************************

Epoch: 47
Batch 100, MAE Loss: 0.102, R2: 0.673
Epoch Train MAE Loss: 0.062, Epoch Train R2: 0.672

Val MAE Loss: 0.101
Val R2 Score: 0.655
***************************************************

Epoch: 48
Batch 100, MAE Loss: 0.102, R2: 0.680
Epoch Train MAE Loss: 0.062, Epoch Train R2: 0.677

Val MAE Loss: 0.098
Val R2 Score: 0.666
***************************************************

Epoch: 49
Batch 100, MAE Loss: 0.104, R2: 0.6

Batch 100, MAE Loss: 0.103, R2: 0.684
Epoch Train MAE Loss: 0.060, Epoch Train R2: 0.685

Val MAE Loss: 0.094
Val R2 Score: 0.677
***************************************************

Epoch: 88
Batch 100, MAE Loss: 0.102, R2: 0.683
Epoch Train MAE Loss: 0.061, Epoch Train R2: 0.681

Val MAE Loss: 0.094
Val R2 Score: 0.681
***************************************************

Epoch: 89
Batch 100, MAE Loss: 0.100, R2: 0.685
Epoch Train MAE Loss: 0.061, Epoch Train R2: 0.685

Val MAE Loss: 0.100
Val R2 Score: 0.659
***************************************************

Epoch: 90
Batch 100, MAE Loss: 0.102, R2: 0.680
Epoch Train MAE Loss: 0.061, Epoch Train R2: 0.678

Val MAE Loss: 0.096
Val R2 Score: 0.683
***************************************************

Epoch: 91
Batch 100, MAE Loss: 0.103, R2: 0.679
Epoch Train MAE Loss: 0.060, Epoch Train R2: 0.684

Val MAE Loss: 0.095
Val R2 Score: 0.677
***************************************************

Epoch: 92
Batch 100, MAE Loss: 0.102, R2: 0.6

Batch 100, MAE Loss: 0.100, R2: 0.691
Epoch Train MAE Loss: 0.061, Epoch Train R2: 0.687

Val MAE Loss: 0.095
Val R2 Score: 0.691
***************************************************

Epoch: 131
Batch 100, MAE Loss: 0.100, R2: 0.694
Epoch Train MAE Loss: 0.060, Epoch Train R2: 0.686

Val MAE Loss: 0.095
Val R2 Score: 0.684
***************************************************

Epoch: 132
Batch 100, MAE Loss: 0.101, R2: 0.686
Epoch Train MAE Loss: 0.060, Epoch Train R2: 0.687

Val MAE Loss: 0.093
Val R2 Score: 0.690
***************************************************

Epoch: 133
Batch 100, MAE Loss: 0.100, R2: 0.700
Epoch Train MAE Loss: 0.061, Epoch Train R2: 0.681

Val MAE Loss: 0.098
Val R2 Score: 0.670
***************************************************

Epoch: 134
Batch 100, MAE Loss: 0.101, R2: 0.683
Epoch Train MAE Loss: 0.060, Epoch Train R2: 0.692

Val MAE Loss: 0.096
Val R2 Score: 0.680
***************************************************

Epoch: 135
Batch 100, MAE Loss: 0.101, R2

Batch 100, MAE Loss: 0.103, R2: 0.682
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.696

Val MAE Loss: 0.094
Val R2 Score: 0.687
***************************************************

Epoch: 174
Batch 100, MAE Loss: 0.102, R2: 0.692
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.688

Val MAE Loss: 0.095
Val R2 Score: 0.696
***************************************************

Epoch: 175
Batch 100, MAE Loss: 0.101, R2: 0.686
Epoch Train MAE Loss: 0.060, Epoch Train R2: 0.695

Val MAE Loss: 0.099
Val R2 Score: 0.672
***************************************************

Epoch: 176
Batch 100, MAE Loss: 0.101, R2: 0.688
Epoch Train MAE Loss: 0.060, Epoch Train R2: 0.690

Val MAE Loss: 0.095
Val R2 Score: 0.684
***************************************************

Epoch: 177
Batch 100, MAE Loss: 0.100, R2: 0.695
Epoch Train MAE Loss: 0.060, Epoch Train R2: 0.688

Val MAE Loss: 0.095
Val R2 Score: 0.685
***************************************************

Epoch: 178
Batch 100, MAE Loss: 0.100, R2

Batch 100, MAE Loss: 0.099, R2: 0.699
Epoch Train MAE Loss: 0.060, Epoch Train R2: 0.696

Val MAE Loss: 0.095
Val R2 Score: 0.691
***************************************************

Epoch: 217
Batch 100, MAE Loss: 0.100, R2: 0.697
Epoch Train MAE Loss: 0.060, Epoch Train R2: 0.686

Val MAE Loss: 0.092
Val R2 Score: 0.693
***************************************************

Epoch: 218
Batch 100, MAE Loss: 0.100, R2: 0.687
Epoch Train MAE Loss: 0.060, Epoch Train R2: 0.692

Val MAE Loss: 0.094
Val R2 Score: 0.686
***************************************************

Epoch: 219
Batch 100, MAE Loss: 0.101, R2: 0.700
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.688

Val MAE Loss: 0.095
Val R2 Score: 0.690
***************************************************

Epoch: 220
Batch 100, MAE Loss: 0.100, R2: 0.700
Epoch Train MAE Loss: 0.060, Epoch Train R2: 0.690

Val MAE Loss: 0.100
Val R2 Score: 0.678
***************************************************

Epoch: 221
Batch 100, MAE Loss: 0.101, R2

Batch 100, MAE Loss: 0.100, R2: 0.699
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.708

Val MAE Loss: 0.095
Val R2 Score: 0.689
***************************************************

Epoch: 260
Batch 100, MAE Loss: 0.100, R2: 0.707
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.692

Val MAE Loss: 0.095
Val R2 Score: 0.702
***************************************************

Epoch: 261
Batch 100, MAE Loss: 0.100, R2: 0.709
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.703

Val MAE Loss: 0.091
Val R2 Score: 0.706
***************************************************

Epoch: 262
Batch 100, MAE Loss: 0.099, R2: 0.706
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.702

Val MAE Loss: 0.096
Val R2 Score: 0.692
***************************************************

Epoch: 263
Batch 100, MAE Loss: 0.099, R2: 0.706
Epoch Train MAE Loss: 0.060, Epoch Train R2: 0.698

Val MAE Loss: 0.092
Val R2 Score: 0.708
***************************************************

Epoch: 264
Batch 100, MAE Loss: 0.099, R2

Batch 100, MAE Loss: 0.098, R2: 0.710
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.707

Val MAE Loss: 0.090
Val R2 Score: 0.714
***************************************************

Epoch: 303
Batch 100, MAE Loss: 0.099, R2: 0.713
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.701

Val MAE Loss: 0.092
Val R2 Score: 0.719
***************************************************

Epoch: 304
Batch 100, MAE Loss: 0.097, R2: 0.711
Epoch Train MAE Loss: 0.060, Epoch Train R2: 0.703

Val MAE Loss: 0.093
Val R2 Score: 0.695
***************************************************

Epoch: 305
Batch 100, MAE Loss: 0.099, R2: 0.704
Epoch Train MAE Loss: 0.058, Epoch Train R2: 0.710

Val MAE Loss: 0.093
Val R2 Score: 0.696
***************************************************

Epoch: 306
Batch 100, MAE Loss: 0.098, R2: 0.710
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.706

Val MAE Loss: 0.089
Val R2 Score: 0.712
***************************************************

Epoch: 307
Batch 100, MAE Loss: 0.098, R2

Batch 100, MAE Loss: 0.097, R2: 0.708
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.710

Val MAE Loss: 0.094
Val R2 Score: 0.706
***************************************************

Epoch: 346
Batch 100, MAE Loss: 0.099, R2: 0.712
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.705

Val MAE Loss: 0.095
Val R2 Score: 0.705
***************************************************

Epoch: 347
Batch 100, MAE Loss: 0.100, R2: 0.706
Epoch Train MAE Loss: 0.058, Epoch Train R2: 0.711

Val MAE Loss: 0.093
Val R2 Score: 0.709
***************************************************

Epoch: 348
Batch 100, MAE Loss: 0.099, R2: 0.709
Epoch Train MAE Loss: 0.058, Epoch Train R2: 0.713

Val MAE Loss: 0.093
Val R2 Score: 0.709
***************************************************

Epoch: 349
Batch 100, MAE Loss: 0.098, R2: 0.715
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.704

Val MAE Loss: 0.092
Val R2 Score: 0.712
***************************************************

Epoch: 350
Batch 100, MAE Loss: 0.099, R2

Batch 100, MAE Loss: 0.097, R2: 0.708
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.713

Val MAE Loss: 0.092
Val R2 Score: 0.716
***************************************************

Epoch: 389
Batch 100, MAE Loss: 0.098, R2: 0.707
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.706

Val MAE Loss: 0.091
Val R2 Score: 0.715
***************************************************

Epoch: 390
Batch 100, MAE Loss: 0.097, R2: 0.715
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.706

Val MAE Loss: 0.091
Val R2 Score: 0.720
***************************************************

Epoch: 391
Batch 100, MAE Loss: 0.097, R2: 0.710
Epoch Train MAE Loss: 0.060, Epoch Train R2: 0.704

Val MAE Loss: 0.090
Val R2 Score: 0.726
***************************************************

Epoch: 392
Batch 100, MAE Loss: 0.099, R2: 0.706
Epoch Train MAE Loss: 0.058, Epoch Train R2: 0.712

Val MAE Loss: 0.093
Val R2 Score: 0.710
***************************************************

Epoch: 393
Batch 100, MAE Loss: 0.099, R2

Batch 100, MAE Loss: 0.097, R2: 0.717
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.706

Val MAE Loss: 0.090
Val R2 Score: 0.725
***************************************************

Epoch: 432
Batch 100, MAE Loss: 0.099, R2: 0.705
Epoch Train MAE Loss: 0.058, Epoch Train R2: 0.712

Val MAE Loss: 0.090
Val R2 Score: 0.708
***************************************************

Epoch: 433
Batch 100, MAE Loss: 0.097, R2: 0.716
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.705

Val MAE Loss: 0.091
Val R2 Score: 0.716
***************************************************

Epoch: 434
Batch 100, MAE Loss: 0.098, R2: 0.714
Epoch Train MAE Loss: 0.058, Epoch Train R2: 0.713

Val MAE Loss: 0.091
Val R2 Score: 0.710
***************************************************

Epoch: 435
Batch 100, MAE Loss: 0.099, R2: 0.717
Epoch Train MAE Loss: 0.058, Epoch Train R2: 0.710

Val MAE Loss: 0.091
Val R2 Score: 0.716
***************************************************

Epoch: 436
Batch 100, MAE Loss: 0.099, R2

Batch 100, MAE Loss: 0.098, R2: 0.710
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.713

Val MAE Loss: 0.093
Val R2 Score: 0.711
***************************************************

Epoch: 475
Batch 100, MAE Loss: 0.099, R2: 0.711
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.712

Val MAE Loss: 0.094
Val R2 Score: 0.707
***************************************************

Epoch: 476
Batch 100, MAE Loss: 0.098, R2: 0.711
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.711

Val MAE Loss: 0.091
Val R2 Score: 0.700
***************************************************

Epoch: 477
Batch 100, MAE Loss: 0.098, R2: 0.716
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.709

Val MAE Loss: 0.092
Val R2 Score: 0.709
***************************************************

Epoch: 478
Batch 100, MAE Loss: 0.098, R2: 0.715
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.706

Val MAE Loss: 0.092
Val R2 Score: 0.714
***************************************************

Epoch: 479
Batch 100, MAE Loss: 0.099, R2

Batch 100, MAE Loss: 0.096, R2: 0.708
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.709

Val MAE Loss: 0.090
Val R2 Score: 0.717
***************************************************

Epoch: 518
Batch 100, MAE Loss: 0.097, R2: 0.715
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.708

Val MAE Loss: 0.089
Val R2 Score: 0.716
***************************************************

Epoch: 519
Batch 100, MAE Loss: 0.098, R2: 0.711
Epoch Train MAE Loss: 0.058, Epoch Train R2: 0.715

Val MAE Loss: 0.094
Val R2 Score: 0.699
***************************************************

Epoch: 520
Batch 100, MAE Loss: 0.099, R2: 0.707
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.707

Val MAE Loss: 0.093
Val R2 Score: 0.704
***************************************************

Epoch: 521
Batch 100, MAE Loss: 0.099, R2: 0.720
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.706

Val MAE Loss: 0.091
Val R2 Score: 0.717
***************************************************

Epoch: 522
Batch 100, MAE Loss: 0.098, R2

Batch 100, MAE Loss: 0.099, R2: 0.703
Epoch Train MAE Loss: 0.058, Epoch Train R2: 0.717

Val MAE Loss: 0.090
Val R2 Score: 0.724
***************************************************

Epoch: 561
Batch 100, MAE Loss: 0.099, R2: 0.710
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.706

Val MAE Loss: 0.090
Val R2 Score: 0.706
***************************************************

Epoch: 562
Batch 100, MAE Loss: 0.097, R2: 0.716
Epoch Train MAE Loss: 0.058, Epoch Train R2: 0.711

Val MAE Loss: 0.093
Val R2 Score: 0.711
***************************************************

Epoch: 563
Batch 100, MAE Loss: 0.099, R2: 0.706
Epoch Train MAE Loss: 0.058, Epoch Train R2: 0.719

Val MAE Loss: 0.091
Val R2 Score: 0.721
***************************************************

Epoch: 564
Batch 100, MAE Loss: 0.099, R2: 0.710
Epoch Train MAE Loss: 0.058, Epoch Train R2: 0.711

Val MAE Loss: 0.090
Val R2 Score: 0.720
***************************************************

Epoch: 565
Batch 100, MAE Loss: 0.099, R2

Batch 100, MAE Loss: 0.099, R2: 0.711
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.711

Val MAE Loss: 0.094
Val R2 Score: 0.697
***************************************************

Epoch: 604
Batch 100, MAE Loss: 0.099, R2: 0.713
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.712

Val MAE Loss: 0.092
Val R2 Score: 0.725
***************************************************

Epoch: 605
Batch 100, MAE Loss: 0.097, R2: 0.714
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.711

Val MAE Loss: 0.091
Val R2 Score: 0.708
***************************************************

Epoch: 606
Batch 100, MAE Loss: 0.100, R2: 0.702
Epoch Train MAE Loss: 0.058, Epoch Train R2: 0.723

Val MAE Loss: 0.092
Val R2 Score: 0.700
***************************************************

Epoch: 607
Batch 100, MAE Loss: 0.098, R2: 0.701
Epoch Train MAE Loss: 0.059, Epoch Train R2: 0.710

Val MAE Loss: 0.093
Val R2 Score: 0.705
***************************************************

Epoch: 608
Batch 100, MAE Loss: 0.099, R2

In [None]:
# We can now plot and save the results

# plot the training and validation loss
plt.plot(train_losses, label='Training Loss')
plt.plot(val_losses, label='Validation Loss')
plt.legend()
plt.title('Loss value at every epoch')
plt.xlabel('Epoch (1-1000)')
plt.ylabel('Average MAE')

# save the plot to a local directory
dir_path = r'C:\Users\usuario\RecogniChess\HW4\Snapshots\LSTM\k=6\TEST3'
if not os.path.exists(dir_path):
    os.makedirs(dir_path)
plt.savefig(os.path.join(dir_path, 'loss.png'))

# plot the training and validation R2 scores
plt.figure()
plt.plot(train_r2s, label='Training R2')
plt.plot(val_r2s, label='Validation R2')
plt.legend()
plt.title('R2 Score at every epoch')
plt.xlabel('Epoch (1-1000)')
plt.ylabel('Average Coefficient of determination (R2) value')

# save the plot to a local directory
plt.savefig(os.path.join(dir_path, 'r2.png'))


In [None]:
# Find highest training and validation R2 values
highest_train_r2 = max(train_r2s)
highest_val_r2 = max(val_r2s)

# Find lowest training and validation losses
lowest_train_loss = min(train_losses)
lowest_val_loss = min(val_losses)

In [None]:
# Set directory path and filename for output file
output_dir = "C:/Users/usuario/RecogniChess/HW4/Snapshots/LSTM/k=6/TEST3"
output_filename = "results.txt"

# Create output directory if it doesn't already exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Open output file for writing
with open(os.path.join(output_dir, output_filename), 'w') as f:
    
    # Write highest train R2 to file
    f.write("The highest train R2 was: " + str(highest_train_r2) + "\n")
    
    # Write highest val R2 to file
    f.write("The highest val R2 was: " + str(highest_val_r2) + "\n")
    
    # Write lowest train loss to file
    f.write("The lowest train loss was: " + str(lowest_train_loss) + "\n")
    
    # Write lowest val loss to file
    f.write("The lowest val loss was: " + str(lowest_val_loss) + "\n")
    
# Print confirmation message
print("Results saved to " + os.path.join(output_dir, output_filename))
