In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [2]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [24]:
# Load the weather data
data = pd.read_csv('./data/stats_GC.csv', index_col = 0, parse_dates = True) # Replace 'weather_data.csv' with your dataset filename
data = data[['hs','hmax','tp','tm01','tm02','dpm','dm','dspr','swe']]
# Prepare the data
target_column = 'hs'  # Replace 'parameter_to_predict' with your target parameter column name

# Normalize the data
scaler = MinMaxScaler()
data[target_column] = scaler.fit_transform(data[target_column].values.reshape(-1, 1))

# Split the data into training and testing sets
train_size = int(len(data) * 0.8)  # 80% for training, 20% for testing
train_data = data[:train_size]
test_data = data[train_size:]

# Convert the data into sequences
def create_sequences(data, sequence_length, target_length):
    sequences = []
    for i in range(len(data) - sequence_length - target_length + 1):
        sequence = data[i:i+sequence_length]
        target = data[i+sequence_length:i+sequence_length+target_length]
        sequences.append((sequence, target))
    return sequences

sequence_length = 7 * 24  # Length of input sequence
target_length = 24  # Length of target sequence (24 hours)
train_sequences = create_sequences(train_data, sequence_length, target_length)
test_sequences = create_sequences(test_data, sequence_length, target_length)

# Convert sequences to PyTorch tensors
def to_tensor(data):
    x = torch.tensor([sequence[0].values for sequence in data]).float()
    y = torch.tensor([sequence[1][target_column].values for sequence in data]).float()
    return x, y

train_x, train_y = to_tensor(train_sequences)
test_x, test_y = to_tensor(test_sequences)

# Define the device for training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the model
input_size = train_x.size(-1)
hidden_size = 64
num_layers = 2
output_size = target_length
model = LSTMModel(input_size, hidden_size, num_layers, output_size).to(device)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
batch_size = 16

model.train()
for epoch in range(num_epochs):
    for i in range(0, len(train_x), batch_size):
        batch_x = train_x[i:i+batch_size].to(device)

In [14]:
train_sequences

[                           hs      hmax        tp      tm01      tm02  \
 time                                                                    
 2010-01-01 00:00:00  0.230421  2.215352  7.695021  6.008552  5.585557   
 2010-01-01 01:00:00  0.229504  2.210356  7.682237  5.964159  5.515502   
 2010-01-01 02:00:00  0.229738  2.215275  7.675863  5.887595  5.415151   
 2010-01-01 03:00:00  0.231402  2.231501  7.678613  5.799396  5.309187   
 2010-01-01 04:00:00  0.233902  2.252947  7.689030  5.742883  5.251201   
 2010-01-01 05:00:00  0.236486  2.273790  7.697777  5.714181  5.224169   
 2010-01-01 06:00:00  0.238925  2.292762  7.700089  5.708382  5.222059   
 2010-01-01 07:00:00  0.240106  2.300670  7.693833  5.741251  5.261688   
 2010-01-01 08:00:00  0.241917  2.313962  7.690864  5.762911  5.285067   
 2010-01-01 09:00:00  0.242513  2.316499  7.683423  5.815367  5.347378   
 2010-01-01 10:00:00  0.242684  2.314615  7.688308  5.903964  5.449686   
 
                            dpm     

In [None]:
[sequence['hs'].values for sequence in train_sequences]

KeyError: -1