In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch 

In [3]:
df = pd.read_csv('aggregated_hourly.csv')
#for every distinct device_id create a new column target with the value of WIFI shiftied by 1
df['target'] = 0
for i in df['device_id'].unique():
    df.loc[df['device_id'] == i, 'target'] = df.loc[df['device_id'] == i, 'WIFI'].shift(-1)
df.sort_values(by=['date_time'], inplace=True)
#drop
df = df.dropna()   
#onehot encode device_id into int
df = pd.get_dummies(df, columns=['device_id'], prefix = 'device_id')
#delete date_time column and device_id column
df = df.drop(columns=['date_time'])
df

Unnamed: 0,tmp,hum,snr,CO2,VOC,vis,IR,WIFI,BLE,rssi,...,device_id_hka-aqm-am201a,device_id_hka-aqm-am201b,device_id_hka-aqm-am204,device_id_hka-aqm-am205,device_id_hka-aqm-am209,device_id_hka-aqm-am210,device_id_hka-aqm-am211,device_id_hka-aqm-am301,device_id_hka-aqm-am307,device_id_hka-aqm-am308
46711,25.080000,44.9700,-16.800000,754,558,379,64,4,0,-131,...,False,False,False,False,False,False,False,False,False,False
54194,23.900000,52.1100,-15.200000,686,593,255,35,5,0,-135,...,False,False,False,False,False,False,False,False,False,False
54195,24.137500,51.8300,-11.625000,800,633,256,36,3,0,-125,...,False,False,False,False,False,False,False,False,False,False
54196,24.432500,52.0000,-15.650000,902,825,289,55,5,2,-125,...,False,False,False,False,False,False,False,False,False,False
24671,24.908000,52.3160,-2.550000,1128,450,213,91,3,1,-115,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24669,24.595000,45.5850,-14.900000,469,790,5,1,2,1,-125,...,False,False,False,False,False,False,False,False,False,False
84148,27.086667,38.9300,9.066667,441,836,32,4,5,0,-102,...,False,False,False,False,False,False,False,False,False,False
61166,24.035000,48.9550,6.000000,10026,9997,12,3,2,2,-96,...,False,False,False,False,False,False,False,False,False,False
7473,24.492500,45.9275,-9.700000,445,865,8,2,1,4,-120,...,False,False,False,False,False,False,False,False,False,False


In [4]:
X = df.drop(columns=['target']).astype('float32')
y = df['target'].astype('float32')
X = torch.tensor(X.values).float()
y = torch.tensor(y.values).float()
print(X.shape)  
print(y.shape)

torch.Size([143802, 58])
torch.Size([143802])


In [5]:
X_new = []
window_size = 500
#for i in range(0, len(X), window_size):
for i in range(0, len(X)):
    if i < window_size:
        number_of_padding = window_size - i
        padding = torch.zeros(number_of_padding, X.shape[1])
        X_new.append(torch.cat((padding, X[:i])))
    else:
        X_new.append(X[i-window_size:i])
X_new = torch.stack(X_new)
X_new = X_new[1:]
X_new.shape

torch.Size([143801, 500, 58])

In [22]:
X_new.shape

torch.Size([143801, 500, 58])

In [6]:
# Create a DataLoader
from torch.utils.data import DataLoader, TensorDataset

# Create a TensorDataset
y = y[1:]
data = TensorDataset(X_new, y)
data

<torch.utils.data.dataset.TensorDataset at 0x28c684247f0>

In [20]:
class PositionalEncoding(torch.nn.Module):
    """
    compute sinusoid encoding.
    """
    def __init__(self, d_model, max_len,device):
        """
        constructor of sinusoid encoding class

        :param d_model: dimension of model
        :param max_len: max sequence length
        :param device: hardware device setting
        """
        super(PositionalEncoding, self).__init__()

        # same size with input matrix (for adding with input matrix)
        self.encoding = torch.zeros(max_len, d_model, device=device)
        self.encoding.requires_grad = False  # we don't need to compute gradient
        pos = torch.arange(0, max_len,device=device)
        pos = pos.float().unsqueeze(dim=1)
        # # 1D => 2D unsqueeze to represent word's position

        _2i = torch.arange(0, d_model, step=2, device=device).float()
        # 'i' means index of d_model (e.g. embedding size = 50, 'i' = [0,50])
        # "step=2" means 'i' multiplied with two (same with 2 * i)

        self.encoding[:, 0::2] = torch.sin(pos / (10000 ** (_2i / d_model)))
        self.encoding[:, 1::2] = torch.cos(pos / (10000 ** (_2i / d_model)))
        # compute positional encoding to consider positional information of words

    def forward(self, x):
        # self.encoding
        # [max_len = 512, d_model = 512]

        #batch_size, seq_len = x.size()
        # [batch_size = 128, seq_len = 30]

        return self.encoding
        #return self.encoding[:seq_len, :]
        # [seq_len = 30, d_model = 512]
        # it will add with tok_emb : [128, 30, 512]         

In [8]:
class ScaledDotProduct(torch.nn.Module):
    """
    scaled dot product attention class
    """
    def __init__(self):
        """
        constructor of scaled dot product attention class
        """
        super(ScaledDotProduct, self).__init__()
        
    def forward(self, Q, K, V, mask=None):
        """
        forward pass of scaled dot product attention
        :param Q: query tensor
        :param K: key tensor
        :param V: value tensor
        :param mask: mask tensor
        :return: output tensor
        """
        d_k = K.size(-1)
        # get dimension of key
        scores = (Q @ K.transpose(-2, -1)) / np.sqrt(d_k)
        # compute attention score
        # if mask is not None:
        #     scores = scores.masked_fill(mask == 0, -1e9)
        #     # apply mask to score
        attention = torch.nn.functional.softmax(scores, dim=-1)
        # apply softmax to score
        output = attention @ V
        # compute output tensor
        return output, attention

In [9]:
class MultiHeadAttention(torch.nn.Module):
    """
    multihead attention class
    """
    def __init__(self, d_model, num_heads):
        """
        constructor of multihead attention class

        :param d_model: dimension of model
        :param num_heads: number of head in multihead attention
        """
        super(MultiHeadAttention, self).__init__()

        self.d_model = d_model
        self.num_heads = num_heads

        self.d_k = d_model // num_heads
        # get dimension of key

        self.W_Q = torch.nn.Linear(d_model, d_model)
        self.W_K = torch.nn.Linear(d_model, d_model)
        self.W_V = torch.nn.Linear(d_model, d_model)
        # linear transformation for query, key, value

        self.scaled_dot_product = ScaledDotProduct()
        # scaled dot product attention

        self.linear = torch.nn.Linear(d_model, d_model)
        # linear transformation for output

    def forward(self, Q, K, V, mask=None):
        """
        forward pass of multihead attention

        :param Q: query tensor
        :param K: key tensor
        :param V: value tensor
        :param mask: mask tensor
        :return: output tensor
        """
        batch_size = Q.size(0)
        # get batch size

        Q = self.W_Q(Q).view(batch_size, -1, self.num_heads, self.d_k).transpose(1, 2)
        K = self.W_K(K).view(batch_size, -1, self.num_heads, self.d_k).transpose(1, 2)
        V = self.W_V(V).view(batch_size, -1, self.num_heads, self.d_k).transpose(1, 2)
        # linear transformation and split into multihead

        if mask is not None:
            mask = mask.unsqueeze(1)
            # unsqueeze mask

        output, attention = self.scaled_dot_product(Q, K, V, mask)
        # scaled dot product attention

        output = output.transpose(1, 2).contiguous().view(batch_size, -1, self.d_model)
        # concatenate multihead attention

        return self.linear(output), attention

In [81]:
class Decoder(torch.nn.Module):
    """
    decoder layer class
    """
    def __init__(self, input,d_model,max_len,num_heads,d_ff,device):
        """
        constructor of decoder layer

        :param d_model: dimension of model
        :param num_heads: number of head in multihead attention
        :param d_ff: dimension of feed forward layer
        :param dropout: dropout rate
        """
        super(Decoder, self).__init__()

        self.embed = torch.nn.Linear(input, d_model).to(device)
        self.positonal_encoding = PositionalEncoding(d_model, max_len=max_len,device=device).to(device)
        self.norm = torch.nn.LayerNorm(d_model).to(device)
        self.attn = MultiHeadAttention(d_model, num_heads).to(device)

        self.ff = torch.nn.Sequential(
            torch.nn.LayerNorm(d_model),
            torch.nn.Linear(d_model, d_ff),
            torch.nn.ReLU(),
            torch.nn.Linear(d_ff, 1)
        ).to(device)

    def forward(self, x):
        """
        forward pass of decoder layer

        :param x: input tensor (query)
        :param memory: input tensor (key, value)
        :param src_mask: source mask
        :param tgt_mask: target mask
        :return: output tensor
        """
        x = self.embed(x)
        x = x + self.positonal_encoding(x)
        x_norm = self.norm(x)
        x_att, _ = self.attn(x_norm, x_norm, x_norm)
        x = x + x_att
        x = self.ff(x) #shape am ende noch (500,1), soll des so ?
        return x[:, -1].squeeze()

In [82]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
Decoder1 = Decoder(data[0][0].shape[1], 32,max_len=data[0][0].shape[0],num_heads=4,d_ff=128,device=device)
d = Decoder1.forward(x)

In [58]:
x.shape

torch.Size([25, 500, 58])

In [85]:
# Train Decoder
from torch.optim import Adam
from torch.nn import MSELoss

# Create a DataLoader
from torch.utils.data import DataLoader, TensorDataset
# Create a DataLoader
data_loader = DataLoader(data, batch_size=32, shuffle=True)

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Create a Decoder
Decoder1 = Decoder(data[0][0].shape[1], 32,max_len=data[0][0].shape[0],num_heads=2,d_ff=64,device=device)

# Create a Loss Function
criterion = MSELoss()

# Create an Optimizer
optimizer = Adam(Decoder1.parameters(), lr=0.001)

In [87]:
# Train the Decoder
import torch
import time

# Move model to GPU
Decoder1 = Decoder1.to(device)

# Define the number of epochs
num_epochs = 10

# Define the loss function and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(Decoder1.parameters(), lr=0.001)

Decoder1.train()
for epoch in range(num_epochs):
    for x, y in data_loader:
        # Move tensors to GPU
        x = x.to(device)
        y = y.to(device)
        # print(x.shape)
        # print(y.shape)
        # break

        # Forward pass
        output = Decoder1(x)
        loss = criterion(output, y)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'Epoch: {epoch + 1:03d}, Loss: {loss.item():.4f}')

Epoch: 001, Loss: 3.8604
Epoch: 002, Loss: 1.5877
Epoch: 003, Loss: 1.9125
Epoch: 004, Loss: 1.6431
Epoch: 005, Loss: 2.4121
Epoch: 006, Loss: 4.3164
Epoch: 007, Loss: 4.3205
Epoch: 008, Loss: 1.9703
Epoch: 009, Loss: 1.5076
Epoch: 010, Loss: 5.3296


In [28]:
x.shape

torch.Size([32, 500, 58])

In [188]:
data_loader

<torch.utils.data.dataloader.DataLoader at 0x24c002eb4f0>

In [67]:
d_model = 512
num_heads = 8
multihead_attention = MultiHeadAttention(d_model, num_heads)


batch_size = 64
seq_len = 10
Q = torch.rand(batch_size, seq_len, d_model)
K = torch.rand(batch_size, seq_len, d_model)
V = torch.rand(batch_size, seq_len, d_model)
output, attention = multihead_attention(Q, K, V)

In [69]:
output.shape

torch.Size([64, 10, 512])

In [55]:

scaled_dot_product = ScaledDotProduct()
Q = torch.randn(10, 20, 30)
K = torch.randn(10, 20, 30)
V = torch.randn(10, 20, 40)


output, attention = scaled_dot_product(Q, K, V)

In [63]:
output[0].shape, attention[0].shape

(torch.Size([20, 40]), torch.Size([20, 20]))

In [64]:
attention[0][0]

tensor([0.0163, 0.0329, 0.0278, 0.1355, 0.0135, 0.0550, 0.0034, 0.0812, 0.0678,
        0.0717, 0.0605, 0.0011, 0.0840, 0.1410, 0.0687, 0.0337, 0.0112, 0.0352,
        0.0548, 0.0048])

In [46]:
Decoder = Decoder(data[0][0].shape[1], 25)

In [51]:
data[0][0]

tensor([[  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
        [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
        [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
        ...,
        [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
        [  0.0000,   0.0000,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
        [ 25.0800,  44.9700, -16.8000,  ...,   0.0000,   0.0000,   0.0000]])

In [52]:
Decoder.forward(data[0][0]).shape

torch.Size([500, 25])

In [56]:
class TransformerModel(torch.nn.Module):
    """
    Transformer model class
    """
    def __init__(self, input_size, d_model, nhead, num_encoder_layers, dim_feedforward, max_len, device, src_pad_idx):
        """
        constructor of TransformerModel class

        :param input_size: input size
        :param d_model: dimension of model
        :param nhead: number of head
        :param num_encoder_layers: number of encoder layer
        :param dim_feedforward: dimension of feedforward
        :param max_len: max sequence length
        :param device: hardware device setting
        :param src_pad_idx: the index of the source padding token
        """
        super(TransformerModel, self).__init__()

        self.src_pad_idx = src_pad_idx
        self.embedding = torch.nn.Linear(input_size, d_model)
        self.pos_encoder = PositionalEncoding(d_model, max_len, device)

        encoder_layer = torch.nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward)
        encoder_norm = torch.nn.LayerNorm(d_model)
        self.transformer_encoder = torch.nn.TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm)

        self.fc = torch.nn.Linear(d_model, 1)

    def make_src_mask(self, src):
        src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)
        return src_mask

    def forward(self, src):
        """
        forward method

        :param src: source input
        :return: output
        """
        src_mask = self.make_src_mask(src)
        src = self.embedding(src)
        src = src + self.pos_encoder(src)

        output = self.transformer_encoder(src, src_key_padding_mask=src_mask.squeeze())
        output = self.fc(output)

        return output

In [53]:

d_model = 512
max_len = 1000
pe = PositonalEncoding(d_model, max_len)
x = torch.zeros((1, max_len, d_model))
output1 = pe.forward(x)
output1

tensor([[[ 0.0000e+00,  1.0000e+00,  0.0000e+00,  ...,  1.0000e+00,
           0.0000e+00,  1.0000e+00],
         [ 8.4147e-01,  5.4030e-01,  8.2186e-01,  ...,  1.0000e+00,
           1.0366e-04,  1.0000e+00],
         [ 9.0930e-01, -4.1615e-01,  9.3641e-01,  ...,  1.0000e+00,
           2.0733e-04,  1.0000e+00],
         ...,
         [-8.9797e-01, -4.4006e-01,  4.2620e-01,  ...,  9.9427e-01,
           1.0317e-01,  9.9466e-01],
         [-8.5547e-01,  5.1785e-01,  9.8628e-01,  ...,  9.9425e-01,
           1.0327e-01,  9.9465e-01],
         [-2.6461e-02,  9.9965e-01,  6.9756e-01,  ...,  9.9424e-01,
           1.0337e-01,  9.9464e-01]]])