In [1]:
import pandas as pd, dataprep as dp
import torch

In [60]:
filename = "/Users/florian/Documents/github/study/IoT/IoT/main/aggregated_hourly.csv"

df = pd.read_csv(filename)
df.drop(columns=["Unnamed: 0"], inplace=True)
df["date_time"] = pd.to_datetime(df["date_time"])
df = df[~df["device_id"].str.contains("hka-aqm-am")]
df["device_id"] = df["device_id"].str.replace("hka-aqm-", "")
#dp.save_to_parquet(df, "agg_hourly.parquet")

INFO:root:Data saved to agg_hourly.parquet


In [31]:
data = dp.prepare_data(filename, window_size= 50, train_ratio= 0.8, batch_size= 64, features= ["tmp", "CO2", "hum", "VOC"], target= "tmp", scaling= True)

In [32]:
from foo import LSTM_1

model = LSTM_1(input_size= data["train"].x.shape[2], hidden_size=100, num_layers=1, output_size=1, dropout= 0)

In [33]:
from tqdm import tqdm

num_epochs = 10
learning_rate = 0.0001
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
train_loader = data["train"].loader

model.train()
for epoch in range(num_epochs):
    loop = tqdm(enumerate(train_loader), total=len(train_loader), leave=True)
    for i, (features, target) in loop:
        optimizer.zero_grad(set_to_none=True)
        output = model(features)
        #target = target.unsqueeze(1)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        # Update progress bar
        loop.set_description(f"Epoch [{epoch+1}/{num_epochs}]")
        loop.set_postfix(loss=loss.item(), lr= learning_rate)


# Final output
print(f"Training completed. Final loss: {loss.item()}, final learning rate: {learning_rate}")

Epoch [1/10]: 100%|██████████| 1547/1547 [00:29<00:00, 51.70it/s, loss=0.327, lr=0.0001]
Epoch [2/10]: 100%|██████████| 1547/1547 [00:30<00:00, 51.32it/s, loss=0.296, lr=0.0001]
Epoch [3/10]: 100%|██████████| 1547/1547 [00:29<00:00, 51.62it/s, loss=0.577, lr=0.0001]
Epoch [4/10]: 100%|██████████| 1547/1547 [00:30<00:00, 51.53it/s, loss=0.209, lr=0.0001]
Epoch [5/10]: 100%|██████████| 1547/1547 [00:29<00:00, 51.59it/s, loss=0.376, lr=0.0001]
Epoch [6/10]: 100%|██████████| 1547/1547 [00:30<00:00, 50.51it/s, loss=0.408, lr=0.0001] 
Epoch [7/10]: 100%|██████████| 1547/1547 [00:30<00:00, 51.52it/s, loss=0.152, lr=0.0001]
Epoch [8/10]: 100%|██████████| 1547/1547 [00:30<00:00, 51.52it/s, loss=1.18, lr=0.0001] 
Epoch [9/10]: 100%|██████████| 1547/1547 [00:29<00:00, 51.80it/s, loss=0.678, lr=0.0001]
Epoch [10/10]: 100%|██████████| 1547/1547 [00:30<00:00, 50.11it/s, loss=0.18, lr=0.0001] 

Training completed. Final loss: 0.17992085218429565, final learning rate: 0.0001





In [34]:
model.eval()  
test_loader = data["test"].loader

test_features, test_targets = next(iter(test_loader)) 
#test_targets = test_targets.unsqueeze(1)  

with torch.no_grad():  
    predictions = model(test_features)  

train_loss = criterion(predictions, test_targets)
print('Test Loss: {:.4f}'.format(train_loss.item()))

Test Loss: 0.0128


In [35]:
from sklearn.preprocessing import StandardScaler
feature_index = 0
scaler = data["test"].scaler


feature_scaler = StandardScaler()
feature_scaler.mean_ = scaler.mean_[feature_index]
feature_scaler.scale_ = scaler.scale_[feature_index]

inversed_predictions = feature_scaler.inverse_transform(predictions)
inversed_targets = feature_scaler.inverse_transform(test_targets)

In [36]:
import plotly.graph_objects as go

def plt_pred(test_targets, predictions):
    fig = go.Figure()
    fig.add_trace(go.Scatter(y=test_targets, mode='lines', name='test_targets'))
    fig.add_trace(go.Scatter(y=predictions, mode='lines', name='Predictions'))
    fig.update_layout(title='test_targets vs predictions', xaxis_title='Index', yaxis_title='Value')
    fig.show()

plt_pred(inversed_targets.reshape(-1).tolist(), inversed_predictions.reshape(-1).tolist())

In [37]:
torch.save(model.state_dict(), "lstm_gut.pth")