In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import torch.nn.functional as F
import numpy as np
import pickle



In [2]:
torch.cuda.is_available()

False

In [3]:
train_x = pd.read_parquet("../../data/preprocessed/holdout_1.pq") # , engine='pyarrow'
train_y = pd.read_parquet("../../data/preprocessed/holdout_1_y.pq")
test_x = pd.read_parquet("../../data/preprocessed/thursdays.pq")
test_y = pd.read_parquet("../../data/preprocessed/thursdays_y.pq")

In [8]:
len(train_y)

478807

In [33]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
target_column = 'ProzessData_ActData_AB1_Temperature_DR1_MassMixingStage'

# Normalize features in train_x (excluding the timestamp column)
scaler_x = MinMaxScaler()
train_x_scaled = scaler_x.fit_transform(train_x.iloc[:, 1:])  # Adjust if the first column isn't the timestamp

# Normalize the target column in train_y
scaler_y = MinMaxScaler()
train_y_scaled = scaler_y.fit_transform(train_y[[target_column]])

# Convert to PyTorch tensors
train_x_tensor = torch.tensor(train_x_scaled).float().unsqueeze(1)  # Add time step dimension
train_y_tensor = torch.tensor(train_y_scaled).float()

# DataLoader
train_dataset = TensorDataset(train_x_tensor, train_y_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.dropout = nn.Dropout(0.2)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        out = self.dropout(lstm_out[:, -1, :])
        out = self.fc(out)
        return out

# Model initialization
input_dim = train_x_tensor.shape[2]  # Number of features
hidden_dim = 50  # Example value
output_dim = train_y_tensor.shape[1]  # Should be 1 as we're predicting a single target

model = LSTMModel(input_dim, hidden_dim, output_dim).to(device)

# Loss and optimizer
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop

num_epochs = 30
for epoch in range(num_epochs):
    total_loss = 0
    total_mae = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        
        # Calculate loss (MSE)
        loss = criterion(outputs, labels)
        total_loss += loss.item()

        # Calculate MAE
        #print(type(F.l1_loss(outputs, labels, reduction='sum').item()))
        list1 = outputs.tolist()
        list2 = labels.tolist()
        list1 = np.array(list1).flatten()
        list2 = np.array(list2).flatten()
        abweichung = []
        for a, b in zip(list1, list2):
            
            abweichung.append(abs(a - b))    # Give points
        points = []
        for diff in abweichung:
            if diff < 0.05:
                points.append(1.0)
            elif 0.05 <= diff < 0.1:
                points.append(0.5)
            elif 0.1 <= diff < 0.5:
                points.append(0.25)
            else:
                points.append(0) 

        list1 = points

        points2 = [] 
        consecutive_count =0      
        for i in range(0, len(list1)):
            
            if list1[i] >=0.5 and consecutive_count == 0 :

                points2.append (1)

            elif list1[i] >=0.5 and consecutive_count != 0:

                if consecutive_count == 1:
                    points2.append (.5)
                    points2.append(1)
        
                elif 2 <= consecutive_count <= 10:
                    points2.extend([0.25] * consecutive_count)
                    points2.append(1)
                else:
                    points2.extend([0] * consecutive_count)
                    points2.append(1)

                consecutive_count = 0# Reset consecutive count

            else:
                consecutive_count+=1

        
        if consecutive_count == 1:
            points2.append (.5)
        elif 2 <= consecutive_count <= 0.01:
            points2.extend([0.25] * consecutive_count)
        else:
            points2.extend([0] * consecutive_count)





        total = sum(points)+sum(points2) 
        maxpoint = len(points)*2

        #print(" Punkte aus abweichungslist:", sum(points), "Punkte aus Zeitabweichung:", sum(points2), "max Punkte:",len(points))

        #print(total/maxpoint)
        
        mae = ((maxpoint/total)-1)*100
        total_mae += mae

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

    # Average loss and MAE over the epoch
    avg_loss = total_loss / len(train_loader)
    avg_mae = total_mae / len(train_loader.dataset)

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}, MAE: {avg_mae:.4f}')
    print(total/maxpoint)

Epoch 1/30, Loss: 0.0244, MAE: 0.1211
0.967391304347826
Epoch 2/30, Loss: 0.0219, MAE: 0.0601
0.967391304347826
Epoch 3/30, Loss: 0.0214, MAE: 0.0733
0.9510869565217391
Epoch 4/30, Loss: 0.0211, MAE: 0.0960
0.9782608695652174
Epoch 5/30, Loss: 0.0209, MAE: 0.1094
0.9782608695652174
Epoch 6/30, Loss: 0.0209, MAE: 0.1170
0.9891304347826086
Epoch 7/30, Loss: 0.0208, MAE: 0.1204
0.9510869565217391
Epoch 8/30, Loss: 0.0208, MAE: 0.1232
0.9891304347826086
Epoch 9/30, Loss: 0.0207, MAE: 0.1234
0.9782608695652174
Epoch 10/30, Loss: 0.0206, MAE: 0.1251
0.9782608695652174
Epoch 11/30, Loss: 0.0207, MAE: 0.1268
0.907608695652174
Epoch 12/30, Loss: 0.0206, MAE: 0.1273
0.9782608695652174
Epoch 13/30, Loss: 0.0206, MAE: 0.1279
0.9293478260869565
Epoch 14/30, Loss: 0.0206, MAE: 0.1289
0.967391304347826
Epoch 15/30, Loss: 0.0206, MAE: 0.1298
0.9891304347826086
Epoch 16/30, Loss: 0.0206, MAE: 0.1296
0.9239130434782609
Epoch 17/30, Loss: 0.0206, MAE: 0.1298
0.9619565217391305
Epoch 18/30, Loss: 0.0206, 

In [32]:
import torch

# Assuming your model is named 'model'
torch.save(model, '../../models/lstm_philip2.pth')

In [28]:
with open('../../models/lstm_philip2.pkl', 'wb') as file:
    pickle.dump(model, file)

NameError: name 'pickle' is not defined