In [14]:
import torch
from tqdm import tqdm
import sys
import logging
logging.basicConfig(level=logging.INFO)

original_sys_path = sys.path.copy()

sys.path.insert(0, "/Users/florian/Documents/github/study/IoT/IoT/main/")
import dataprep as dp 
import foo

sys.path = original_sys_path

In [15]:
FILEPATH = "/Users/florian/Documents/github/study/IoT/IoT/data/aggregated_data/agg_hourly.parquet"
window_size = 50
train_test_split_ratio = 0.8
batch_size = 64

features = ["tmp", "CO2", "hum", "VOC"]
target = "tmp"
scaling = True

In [16]:
data = dp.create_DataLoader(filepath= FILEPATH, window_size= window_size, train_ratio= train_test_split_ratio, batch_size= batch_size, features= features, target= target, scaling= scaling)

In [17]:
import optuna
from copy import deepcopy
import os

BASE_PATH = os.getenv('BASE_PATH', "/Users/florian/Documents/github/study/IoT/IoT/")
sys.path.insert(0, os.path.join(BASE_PATH, "main"))

# Set parameters
FILEPATH = os.path.join(BASE_PATH, "data/aggregated_data/agg_hourly.parquet")
FILEPATH_STUDY = os.path.join(BASE_PATH, "models/train/LSTM_trials.csv")

NUM_EPOCHS = 10
LEARNING_RATE = 0.0001
FILEPATH_BEST_MODEL = "/Users/florian/Documents/github/study/IoT/IoT/models/best_LSTM.pth"

models_and_losses = {}

def objective(trial):
    hidden_size = trial.suggest_categorical('hidden_size', [50, 100, 150])
    num_layers = trial.suggest_categorical('num_layers', [1, 2, 3])
    activation = trial.suggest_categorical('activation', ['relu', 'sigmoid', 'tanh'])
    if num_layers == 1:
        dropout = 0
    else:    
        dropout = trial.suggest_categorical('dropout', [0, 0.5, 1])

    model = foo.LSTM(input_size= data["train"].x.shape[2], hidden_size= hidden_size, num_layers= num_layers, output_size= 1, dropout= dropout, activation= activation)
    optimizer = torch.optim.Adam(model.parameters(), lr= LEARNING_RATE)
    model.train()

    for epoch in range(NUM_EPOCHS):
        loop = tqdm(enumerate(data["train"].loader), total=len(data["train"].loader), leave=True)
        for batch_idx, (features, target) in loop:
            optimizer.zero_grad(set_to_none= True)
            output = model(features)
            loss = torch.nn.MSELoss()(output, target)
            loss.backward()
            optimizer.step()

            loop.set_description(f"Epoch [{epoch+1}/{NUM_EPOCHS}]")
            loop.set_postfix(loss=loss.item(), lr= LEARNING_RATE)

        trial.report(loss.item(), epoch)

        if trial.should_prune():
            logging.info("Trial was pruned at epoch {}.".format(epoch))
            raise optuna.exceptions.TrialPruned()

    models_and_losses[trial.number] = {"model": deepcopy(model), "loss": loss.item()}

    return loss.item()

study = optuna.create_study(direction= 'minimize', study_name= 'LSTM')
study.optimize(objective, n_trials= 100)

best_params = study.best_params
best_loss = study.best_value
min_loss = min_loss = min(models_and_losses.items(), key=lambda x: x[1]['loss'])

logging.info(f"Beste Hyperparameter: {best_params}")
logging.info(f"Niedrigster Verlust: {best_loss}")
logging.info(f"Modell mit geringstem Loss: {min_loss}")
torch.save(min_loss[1]['model'].state_dict(), FILEPATH_BEST_MODEL)

[I 2024-06-20 19:11:02,021] A new study created in memory with name: LSTM
Epoch [1/10]:  79%|███████▉  | 350/443 [00:11<00:03, 30.29it/s, loss=1.54, lr=0.0001] 
[W 2024-06-20 19:11:13,632] Trial 0 failed with parameters: {'hidden_size': 50, 'num_layers': 2, 'activation': 'relu', 'dropout': 0} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/optuna/study/_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/var/folders/fm/5lbdpfj928d6sqhy_19cp6ww0000gn/T/ipykernel_36452/1713140457.py", line 37, in objective
    loss.backward()
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch/_tensor.py", line 522, in backward
    torch.autograd.backward(
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/torch/autograd/__init__.py", line

KeyboardInterrupt: 

In [None]:
def objective(trial):
    hidden_size = trial.suggest_categorical('hidden_size', [50, 100, 150])
    num_layers = trial.suggest_categorical('num_layers', [1, 2, 3, 4])
    activation = trial.suggest_categorical('activation', ['relu', 'sigmoid', 'tanh'])
    if num_layers == 1:
        dropout = 0
    else:    
        dropout = trial.suggest_categorical('dropout', [0, 0.5, 1])

    model = foo.LSTM(input_size= data["train"].x.shape[2], hidden_size= hidden_size, num_layers= num_layers, output_size= 1, dropout= dropout, activation= activation)
    optimizer = torch.optim.Adam(model.parameters(), lr= LEARNING_RATE)
    model.train()

    for epoch in range(NUM_EPOCHS):
        loop = tqdm(enumerate(data["train"].loader), total=len(data["train"].loader), leave=True)
        for batch_idx, (features, target) in loop:
            optimizer.zero_grad(set_to_none= True)
            output = model(features)
            loss = torch.nn.MSELoss()(output, target)
            loss.backward()
            optimizer.step()

            loop.set_description(f"Epoch [{epoch+1}/{NUM_EPOCHS}]")
            loop.set_postfix(loss=loss.item(), lr= LEARNING_RATE)
            loop.update()

        trial.report(loss.item(), epoch)

        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return loss.item()

study = optuna.create_study(direction= 'minimize', study_name= 'LSTM')
study.optimize(objective, n_trials= 100)

best_params = study.best_params
best_loss = study.best_value
trials = study.trials_dataframe()
trials.to_csv(FILEPATH_STUDY)

logging.info(f"Beste Hyperparameter: {best_params}")
logging.info(f"Niedrigster Verlust: {best_loss}")

In [9]:
model = foo.LSTM(input_size= data["train"].x.shape[2], hidden_size= 100, num_layers= 3, output_size= 3, dropout= 0, activation= 'sigmoid')

In [10]:
from tqdm import tqdm

num_epochs = 10
learning_rate = 0.0001
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
train_loader = data["train"].loader

model.train()
for epoch in range(num_epochs):
    loop = tqdm(enumerate(train_loader), total=len(train_loader), leave=True)
    for i, (features, target) in loop:
        optimizer.zero_grad(set_to_none=True)
        output = model(features)
        #target = target.unsqueeze(1)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        # Update progress bar
        loop.set_description(f"Epoch [{epoch+1}/{num_epochs}]")
        loop.set_postfix(loss=loss.item(), lr= learning_rate)


# Final output
print(f"Training completed. Final loss: {loss.item()}, final learning rate: {learning_rate}")

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
Epoch [1/10]: 100%|██████████| 443/443 [00:28<00:00, 15.48it/s, loss=0.549, lr=0.0001]
Epoch [2/10]: 100%|██████████| 443/443 [00:28<00:00, 15.37it/s, loss=0.506, lr=0.0001]
Epoch [3/10]: 100%|██████████| 443/443 [00:28<00:00, 15.75it/s, loss=0.403, lr=0.0001]
Epoch [4/10]: 100%|██████████| 443/443 [00:30<00:00, 14.64it/s, loss=0.62, lr=0.0001] 
Epoch [5/10]: 100%|██████████| 443/443 [00:28<00:00, 15.61it/s, loss=0.457, lr=0.0001]
Epoch [6/10]: 100%|██████████| 443/443 [00:28<00:00, 15.68it/s, loss=0.567, lr=0.0001]
Epoch [7/10]: 100%|██████████| 443/443 [00:28<00:00, 15.33it/s, loss=0.471, lr=0.0001]
Epoch [8/10]: 100%|██████████| 443/443 [00:28<00:00, 15.51it/s, loss=0.598, lr=0.0001]
Epoch [9/10]: 100%|██████████| 443/443 [00:28<00:00, 15.56it/s, loss=0.712, lr=0.0001]
Epoch [10/10]: 100%|██████████| 443/443 [00:28<00:00, 15.43it/s, loss=0.188, lr=0.0001]

Training completed. Final loss: 0.18818385899066925, final learning rate: 0.0001





In [11]:
model.eval()  
test_loader = data["test"].loader

test_features, test_targets = next(iter(test_loader)) 

with torch.no_grad():  
    predictions = model(test_features)  

from sklearn.preprocessing import StandardScaler
feature_index = 0
scaler = data["test"].scaler


feature_scaler = StandardScaler()
feature_scaler.mean_ = scaler.mean_[feature_index]
feature_scaler.scale_ = scaler.scale_[feature_index]

inversed_predictions = feature_scaler.inverse_transform(predictions)
inversed_targets = feature_scaler.inverse_transform(test_targets)


train_loss = criterion(predictions, test_targets)
print('Test Loss: {:.4f}'.format(train_loss.item()))

Test Loss: 0.1356


  return F.mse_loss(input, target, reduction=self.reduction)


In [12]:
import plotly.graph_objects as go

def plt_pred(test_targets, predictions):
    fig = go.Figure()
    fig.add_trace(go.Scatter(y=test_targets, mode='lines', name='test_targets'))
    fig.add_trace(go.Scatter(y=predictions, mode='lines', name='Predictions'))
    fig.update_layout(title='test_targets vs predictions', xaxis_title='Index', yaxis_title='Value')
    fig.show()

plt_pred(inversed_targets.reshape(-1).tolist(), inversed_predictions.reshape(-1).tolist())