In [1]:
import pandas as pd
import dataprep as dp

In [2]:
df = pd.read_parquet("output.parquet")
a1 = ["a101", "a102", "a103", "a106", "a107", "a108", "a111", "a112"]
df_a1 = df[df["device_id"].isin(a1)]

In [3]:
df = dp.group_data(df_a1, "h")

In [4]:
import plotly.express as px


px.scatter(df, x= "date_time", y= "tmp", color= "device_id")

In [5]:
df_cutoff = df.copy
df_cutoff = dp.cutoff_data(df, "2023-09-04", "2023-10-01")
df_mean = dp.build_lvl_df(df_cutoff, a1, ["tmp", "hum", "CO2", "VOC"], reset_ind= True)
df_mean

Unnamed: 0,tmp,hum,CO2,VOC
0,24.964333,50.809000,438.800000,595.400000
1,25.578571,49.506286,436.800000,665.800000
2,26.663306,46.451139,460.833333,625.000000
3,26.909722,43.444583,446.333333,595.000000
4,27.170694,39.356250,423.000000,613.166667
...,...,...,...,...
635,24.424524,48.633810,463.000000,1031.142857
636,24.261905,48.739167,461.000000,1003.714286
637,24.130048,48.716929,465.714286,974.571429
638,24.031310,48.627381,469.571429,956.857143


In [6]:
from sklearn.preprocessing import StandardScaler
from copy import deepcopy

scaler = StandardScaler()

df_mean_scaled = deepcopy(df_mean)
df_mean_scaled["target"] = df_mean_scaled["tmp"].shift(-1)
df_mean_scaled = scaler.fit_transform(df_mean_scaled)

X = df_mean_scaled[:, :-1]
y = df_mean_scaled[:, -1]

X.shape, y.shape

((640, 4), (640,))

In [7]:
X_train, X_test, y_train, y_test = dp.train_test_split(X, y)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

(torch.Size([630, 4]),
 torch.Size([630]),
 torch.Size([10, 4]),
 torch.Size([10]))

In [8]:
X_train_new, X_test_new = dp.format_tensor(X_train), dp.format_tensor(X_test)
X_train = X_train_new
X_test = X_test_new
y_train = y_train[:-1]
y_test = y_test[:-1]
X_train.shape, X_test.shape, y_train.shape, y_test.shape

(torch.Size([629, 48, 4]),
 torch.Size([9, 48, 4]),
 torch.Size([629]),
 torch.Size([9]))

In [9]:
from torch.utils.data import DataLoader, TensorDataset

batch_size = 120

train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)

train_loader = DataLoader(train_data, shuffle=False, batch_size=batch_size)
test_loader = DataLoader(test_data, shuffle=False, batch_size=batch_size)

AttributeError: 'DataLoader' object has no attribute 'shape'

In [10]:
import torch, torch.nn as nn, torch.optim as optim


class LSTM(nn.Module):

    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super().__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

        self.optimizer = optim.Adam(self.parameters(), lr=0.0001)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size)

        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim

class LSTM_1(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.5, activation='relu'):
        super(LSTM_1, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.activation_function = activation

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout)

        if activation == 'relu':
            self.activation = nn.ReLU()
        elif activation == 'tanh':
            self.activation = nn.Tanh()
        elif activation == 'sigmoid':
            self.activation = nn.Sigmoid()
        else:
            raise ValueError("Invalid activation function, choose from 'relu', 'tanh', or 'sigmoid'")

        self.optimizer = optim.Adam(self.parameters(), lr=0.001)  # Erhöhte Lernrate für schnellere Konvergenz

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0))
        out = self.dropout(out[:, -1, :])  # Dropout für Regularisierung
        out = self.fc(out)
        out = self.activation(out)  # Aktivierungsfunktion anwenden
        return out


In [12]:
model = LSTM(input_size=X_train.shape[2], hidden_size=100, num_layers=1, output_size=1)

In [18]:
model = LSTM_1(input_size=X_train.shape[2], hidden_size=100, num_layers=1, output_size=1, dropout=0, activation='relu')

In [19]:
from tqdm import tqdm

num_epochs = 100
learning_rate = 0.0001
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
#scheduler = lr_scheduler.ExponentialLR(optimizer, gamma= 0.99)

model.train()
for epoch in range(num_epochs):
    loop = tqdm(enumerate(train_loader), total=len(train_loader), leave=True)
    for i, (features, target) in loop:
        optimizer.zero_grad(set_to_none=True)
        output = model(features)
        target = target.unsqueeze(1)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        # Update progress bar
        loop.set_description(f"Epoch [{epoch+1}/{num_epochs}]")
        loop.set_postfix(loss=loss.item(), lr=0.001)


# Final output
print(f"Training completed. Final loss: {loss.item()}, final learning rate: {learning_rate}")

Epoch [1/100]: 100%|██████████| 6/6 [00:00<00:00, 37.52it/s, loss=0.479, lr=0.001]
Epoch [2/100]: 100%|██████████| 6/6 [00:00<00:00, 39.74it/s, loss=0.472, lr=0.001]
Epoch [3/100]: 100%|██████████| 6/6 [00:00<00:00, 43.68it/s, loss=0.468, lr=0.001]
Epoch [4/100]: 100%|██████████| 6/6 [00:00<00:00, 43.28it/s, loss=0.466, lr=0.001]
Epoch [5/100]: 100%|██████████| 6/6 [00:00<00:00, 40.70it/s, loss=0.466, lr=0.001]
Epoch [6/100]: 100%|██████████| 6/6 [00:00<00:00, 41.89it/s, loss=0.466, lr=0.001]
Epoch [7/100]: 100%|██████████| 6/6 [00:00<00:00, 40.61it/s, loss=0.466, lr=0.001]
Epoch [8/100]: 100%|██████████| 6/6 [00:00<00:00, 41.77it/s, loss=0.466, lr=0.001]
Epoch [9/100]: 100%|██████████| 6/6 [00:00<00:00, 38.79it/s, loss=0.466, lr=0.001]
Epoch [10/100]: 100%|██████████| 6/6 [00:00<00:00, 42.84it/s, loss=0.466, lr=0.001]
Epoch [11/100]: 100%|██████████| 6/6 [00:00<00:00, 27.33it/s, loss=0.466, lr=0.001]
Epoch [12/100]: 100%|██████████| 6/6 [00:00<00:00, 43.36it/s, loss=0.466, lr=0.001]
E

Training completed. Final loss: 0.4651678800582886, final learning rate: 0.0001





In [20]:
model.eval()  # Set the model to evaluation mode

train_features, train_targets = next(iter(train_loader))  # Get a batch of train data
train_targets = train_targets.unsqueeze(1)  # Expand target to match the output shape

with torch.no_grad():  # Disable gradient computation
    predictions = model(train_features)  # Make predictions

# Calculate the mean squared error of the predictions
train_loss = criterion(predictions, train_targets)
print('Test Loss: {:.4f}'.format(train_loss.item()))

Test Loss: 0.1856


In [26]:
predictions.shape

torch.Size([120, 1])

In [23]:
# Angenommen, Ihr `StandardScaler` wurde auf Daten mit 5 Features trainiert
# Und Sie möchten nur das erste Feature Ihrer Vorhersagen zurücktransformieren
feature_index = 0

# Erstellen Sie einen neuen `StandardScaler` für das entsprechende Feature
feature_scaler = StandardScaler()
feature_scaler.mean_ = scaler.mean_[feature_index]
feature_scaler.scale_ = scaler.scale_[feature_index]

# Verwenden Sie den `feature_scaler` um die Vorhersagen zurück zu transformieren
inversed_predictions = feature_scaler.inverse_transform(predictions)

# Tun Sie dasselbe für die Ziele
inversed_targets = feature_scaler.inverse_transform(train_targets)

In [24]:
inversed_predictions.shape, inversed_targets.shape

((120, 1), (120, 1))

In [32]:
import plotly.graph_objects as go

def plt_pred(test_targets, predictions):
    fig = go.Figure()
    fig.add_trace(go.Scatter(y=test_targets, mode='lines', name='test_targets'))
    fig.add_trace(go.Scatter(y=predictions, mode='lines', name='Predictions'))
    fig.update_layout(title='test_targets vs predictions', xaxis_title='Index', yaxis_title='Value')
    fig.show()

plt_pred(inversed_targets.reshape(-1).tolist(), inversed_predictions.reshape(-1).tolist())