In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import torch.nn.functional as F

In [3]:
torch.cuda.is_available()

True

In [7]:
# train_x = pd.read_csv(r"C:\Users\ethor\Desktop\hackathon\OWL_Factory_Hackathon\data\train_x.csv", index_col=0)
# train_y = pd.read_csv(r"C:\Users\ethor\Desktop\hackathon\OWL_Factory_Hackathon\data\train_y.csv", index_col=0)
train_x = pd.read_parquet("../../data/preprocessed/holdout_1.pq") # , engine='pyarrow'
train_y = pd.read_parquet("../../data/preprocessed/holdout_1_y.pq")
test_x = pd.read_parquet("../../data/preprocessed/thursdays.pq")
test_y = pd.read_parquet("../../data/preprocessed/thursdays_y.pq")

In [8]:
len(train_y)

478807

In [9]:

device = torch.device('cuda:0')

# Load the data (this step assumes that you've loaded the data into train_x and train_y)
# train_x = pd.read_csv('path_to_train_x.csv')
# train_y = pd.read_csv('path_to_train_y.csv')

# Assuming 'feature_column' is the name of the target column in train_y
target_column = 'ProzessData_ActData_AB1_Temperature_DR1_MassMixingStage'

# Normalize features in train_x (excluding the timestamp column)
scaler_x = MinMaxScaler()
train_x_scaled = scaler_x.fit_transform(train_x.iloc[:, 1:])  # Adjust if the first column isn't the timestamp

# Normalize the target column in train_y
scaler_y = MinMaxScaler()
train_y_scaled = scaler_y.fit_transform(train_y[[target_column]])

# Convert to PyTorch tensors
train_x_tensor = torch.tensor(train_x_scaled).float().unsqueeze(1)  # Add time step dimension
train_y_tensor = torch.tensor(train_y_scaled).float()

# DataLoader
train_dataset = TensorDataset(train_x_tensor, train_y_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.dropout = nn.Dropout(0.2)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        out = self.dropout(lstm_out[:, -1, :])
        out = self.fc(out)
        return out

# Model initialization
input_dim = train_x_tensor.shape[2]  # Number of features
hidden_dim = 50  # Example value
output_dim = train_y_tensor.shape[1]  # Should be 1 as we're predicting a single target

model = LSTMModel(input_dim, hidden_dim, output_dim).to(device)

# Loss and optimizer
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 150
for epoch in range(num_epochs):
    total_loss = 0
    total_mae = 0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        
        # Calculate loss (MSE)
        loss = criterion(outputs, labels)
        total_loss += loss.item()

        # Calculate MAE
        mae = F.l1_loss(outputs, labels, reduction='sum').item()  # l1_loss is MAE
        total_mae += mae

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

    # Average loss and MAE over the epoch
    avg_loss = total_loss / len(train_loader)
    avg_mae = total_mae / len(train_loader.dataset)

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}, MAE: {avg_mae:.4f}')

Epoch 1/10, Loss: 0.0249, MAE: 0.0249
Epoch 2/10, Loss: 0.0221, MAE: 0.0221
Epoch 3/10, Loss: 0.0214, MAE: 0.0214
Epoch 4/10, Loss: 0.0211, MAE: 0.0211


KeyboardInterrupt: 

In [None]:
with open('../../models/lstm_1.pkl', 'wb') as file:
    pickle.dump(model, file)