In [None]:
!pip install optuna-dashboard
!pip install optuna
!pip install pyngrok


In [None]:
import optuna
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import os
import pandas as pd
import torch.optim.lr_scheduler as lr_scheduler

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

In [None]:
df = pd.read_excel("/kaggle/input/weather-cond1/4_year_df.xlsx", engine="openpyxl")

In [None]:
scaler = MinMaxScaler()

scaled_df = scaler.fit_transform(df)

scaled_df

In [None]:
def create_sequences(data, input_length, output_length):

    sequences_x = []
    sequences_y = []
    for i in range(len(data)-input_length-output_length+1):
        sequences_x.append(data[i:i+input_length])
        sequences_y.append(data[i+input_length:i+input_length+output_length,0]) #It only should use the first column

    return np.array(sequences_x), np.array(sequences_y)

In [None]:
class WeatherDataset(Dataset):
    def __init__(self, x_data, y_data):
        self.x_data = x_data
        self.y_data = y_data

    def __len__(self):
        return len(self.x_data)

    def __getitem__(self, idx):
        return self.x_data[idx], self.y_data[idx]

In [None]:
def train_val_split(sequences_x, sequences_y, val_percentage):
    val_size = int(val_percentage*len(sequences_x)/100)

    val_x, train_x = sequences_x[:val_size], sequences_x[val_size:]
    val_y, train_y = sequences_y[:val_size], sequences_y[val_size:]

    return train_x, val_x, train_y,  val_y

In [None]:
input_length = 700
output_length = 300

sequences_x, sequences_y = create_sequences(scaled_df, input_length, output_length)

In [None]:
x_train, x_val, y_train, y_val = train_val_split(sequences_x, sequences_y, 25) #splitting the data (25 percent is the validation)

In [None]:
train_dataset = WeatherDataset(torch.tensor(x_train, dtype = torch.float32).to(device), torch.tensor(y_train, dtype = torch.float32).to(device))
val_dataset = WeatherDataset(torch.tensor(x_val, dtype = torch.float32).to(device), torch.tensor(y_val, dtype = torch.float32).to(device))

In [None]:
class LSTM_weather(nn.Module):
    def __init__(self, input_dim, hidden_size, num_layers, seq_length, output_length, dropout):
        super(LSTM_weather, self).__init__()
        self.input_dim = input_dim
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.seq_length = seq_length
        self.output_length = output_length

        self.lstm = nn.LSTM(input_dim, hidden_size, num_layers, batch_first = True, dropout = dropout)
        self.fc = nn.Linear(hidden_size, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        h_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c_0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        output, (hn, cn) = self.lstm(x, (h_0, c_0))

        out = output[:, -self.output_length:, :]

        out = self.relu(self.fc(out))

        return out

In [None]:
def training_and_val(model, optimizer, criterion, train_loader, val_loader, num_epochs):

  val_loss1 = 0

  for epoch in range(0,num_epochs):

    model.train()
    for x_batch, y_batch in train_loader:

        optimizer.zero_grad()

        output = model(x_batch).to(device)

        loss = criterion(output.squeeze(-1), y_batch)

        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        optimizer.step()

  model.eval()
  with torch.no_grad():
      for x_val_batch, y_val_batch in val_loader:

          output_value_val = model(x_val_batch)

          loss_val = criterion(output_value_val.squeeze(-1), y_val_batch)

          val_loss1 += loss_val.item()

  val_loss1 /= len(val_loader) #taking the average

  return val_loss1


In [None]:
def objective(trial):
  input_dim = 10
  hidden_size = trial.suggest_int('hidden_size', 300, 400)
  num_layers = trial.suggest_int('num_layers', 1,2)

  if num_layers > 1:
      dropout = trial.suggest_float('dropout', 0.1, 0.4)
  else:
      dropout = 0.0
        
  learning_rate = trial.suggest_loguniform('learning_rate', 5e-4, 1e-2)
  optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'SGD'])
  seq_length = 700
  output_length = 300
  batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128])


  train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = False, drop_last=True)
  val_loader = DataLoader(val_dataset, batch_size = batch_size, shuffle = False, drop_last=True)

  model = LSTM_weather(input_dim, hidden_size, num_layers, seq_length, output_length, dropout).to(device)
  criterion = nn.MSELoss()

  optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=learning_rate)

  num_epochs = 10
  val_loss = training_and_val(model, optimizer, criterion, train_loader, val_loader, num_epochs)


  return val_loss

In [None]:
study = optuna.create_study(storage="sqlite:///first_try.db",direction = 'minimize')

study.optimize(objective, n_trials = 50)
