In [37]:
import pandas as pd
import numpy as np
import copy 
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [4]:
df = pd.read_csv('Data/sensor1.csv')

In [5]:
df

Unnamed: 0,sensor_id,timestamp,temperature,humidity,ohms,moisture,weather_humidity,weather_pressure,weather_temp_dew,weather_temp_dry,weather_wind_dir,weather_wind_speed,weather_wind_max,weather_wind_min,weather_precip_past10min
0,1,2020-08-28 03:00:00+00:00,15.012970,77.00,270.067017,12.990156,95.579013,1004.827315,11.475756,12.131467,108.966365,1.825696,2.818346,,0.000000
1,1,2020-08-28 04:00:00+00:00,15.198558,77.00,261.323128,13.038244,96.114617,1004.332653,12.136401,12.751486,103.050999,2.153497,3.074299,,0.000000
2,1,2020-08-28 05:00:00+00:00,15.384146,77.00,252.579239,13.086331,95.666685,1003.678711,12.646634,13.377194,102.429338,1.645717,2.843965,,0.000000
3,1,2020-08-28 06:00:00+00:00,15.569734,77.00,243.835350,13.134418,94.448831,1003.407445,13.056790,13.834935,111.662148,2.000634,3.346310,,0.000000
4,1,2020-08-28 07:00:00+00:00,15.755322,77.00,235.091461,13.182506,93.943871,1003.311881,13.650739,14.700486,88.541827,1.934695,3.406582,,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57967,42,2020-10-20 19:00:00+00:00,10.695000,99.99,55.961068,15.403181,83.363973,1002.234081,7.042912,9.740160,191.395958,2.868811,5.950202,,0.068418
57968,42,2020-10-20 20:00:00+00:00,10.660000,99.99,56.017153,15.401460,89.845917,1002.017034,7.713235,9.304126,191.618384,2.302741,4.824224,,0.031582
57969,42,2020-10-20 21:00:00+00:00,10.666667,99.99,55.524990,15.416776,92.324956,1001.807850,8.107616,9.218550,188.959264,2.140578,4.475191,,0.000000
57970,42,2020-10-20 22:00:00+00:00,10.673333,99.99,55.032826,15.432091,93.548737,1001.611697,8.526925,9.522056,191.684353,2.046183,4.293633,,0.000000


In [85]:
def split_train_test_val(df, test_split = 0.2, val_split = 0.2, RANDOM_SEED = 42):
    
    idx_train, idx_test = train_test_split([i for i in range(1,43)], test_size=test_split, random_state = RANDOM_SEED)
    idx_only_train, idx_val = train_test_split(idx_train, test_size=val_split, random_state = RANDOM_SEED)
    
    df['Train'] = False
    df['Validation'] = False
    df['Test'] = False
    
    df.loc[df['sensor_id'].isin(idx_train), 'Train'] = True
    df.loc[df['sensor_id'].isin(idx_test), 'Test'] = True
    df.loc[df['sensor_id'].isin(idx_val), 'Validation'] = True
    
    
    return df
    
def set_index(df):
    df['idx']=0
    for idx, grp in enumerate(df.groupby('sensor_id')):
        df.loc[df.sensor_id == grp[0], "idx"] = idx
        
    
    

In [86]:

class data(Dataset):
    
    def __init__(self, path_to_csv, test, list_features, fixed_points = False):
        super(data).__init__()
        df = pd.read_csv(path_to_csv)
        df = split_train_test_val(df)
        
        if test:
            self.df = df[df['Test']==True].reset_index()
            del df
            set_index(self.df)

        else:
            self.df = df[df['Train']==True].reset_index()
            del df
            set_index(self.df)

        self.fixed_points = fixed_points
        self.list_features = list_features
    
    def __getitem__(self, index):
        
        if self.fixed_points:
            nb_point=200
            start = np.random.randint(0,len(self.df[self.df['idx']==index]) - nb_point +1)
            _input = torch.from_numpy(np.array(self.df[self.df['idx']==index][self.list_features][start:start + nb_point]).transpose())[:,:-1]
            target = torch.from_numpy(np.array(self.df[self.df['idx']==index][self.list_features][start:start + nb_point]).transpose())[:,1:]
            
            return _input, target
            
            
        else:
            _input = torch.from_numpy(np.array(self.df[self.df['idx']==index][self.list_features]).transpose())[:,:-1]
            target = torch.from_numpy(np.array(self.df[self.df['idx']==index][self.list_features]).transpose())[:,1:]
            
            return _input, target
            
            
        
    
    def __len__(self):
        return len(self.df.groupby('sensor_id'))
        
        
        
        

In [87]:
dataset = data('Data/sensor1.csv', test = False, list_features = ['humidity'], fixed_points=False)

In [88]:
loader = DataLoader(dataset, batch_size=1)

In [89]:
# Shape : [Batch, Features, Time Steps]

test, train = next(iter(loader))
print("Train | ", train.shape, "|", train)
print("Test | ", test.shape, "|", test)

Train |  torch.Size([1, 1, 1292]) | tensor([[[77.0000, 77.0000, 77.0000,  ..., 69.6000, 69.8000, 70.0000]]],
       dtype=torch.float64)
Test |  torch.Size([1, 1, 1292]) | tensor([[[77.0000, 77.0000, 77.0000,  ..., 69.4000, 69.6000, 69.8000]]],
       dtype=torch.float64)


In [90]:

# # input: array-like of shape (n_samples, n_features)

print(train.shape)
train = train.permute(0,2,1)
train = torch.squeeze(train, 0)
print(train.shape)

test = test.permute(0,2,1)
test = torch.squeeze(test, 0)

# Fit to data to range 0 - 1, then transform it.
scaler = MinMaxScaler()
X_train = scaler.fit_transform(train)
X_test = scaler.fit_transform(test)




torch.Size([1, 1, 1292])
torch.Size([1292, 1])


In [78]:
X_train = torch.unsqueeze(torch.from_numpy(X_train), 0)
X_test = torch.unsqueeze(torch.from_numpy(X_test), 0)

In [79]:
class Autoencoder(nn.Module):

    def __init__(self):
        super(Autoencoder,self).__init__()

        input_size = 1292
        latent_1 = 400
        bottleneck = 16

        self.encoder = nn.Sequential(
            nn.LSTM(input_size, latent_1),
            nn.ReLU(True),
            nn.LSTM(latent_1,bottleneck),
            nn.ReLU(True))
        self.decoder = nn.Sequential(             
            nn.LSTM(bottleneck,latent_1),
            nn.ReLU(True),
            nn.LSTM(latent_1,input_size),
            nn.ReLU(True))
    def forward(self,x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

    model = Autoencoder()
    print(model)

Autoencoder(
  (encoder): Sequential(
    (0): LSTM(1292, 400)
    (1): ReLU(inplace=True)
    (2): LSTM(400, 16)
    (3): ReLU(inplace=True)
  )
  (decoder): Sequential(
    (0): LSTM(16, 400)
    (1): ReLU(inplace=True)
    (2): LSTM(400, 1292)
    (3): ReLU(inplace=True)
  )
)


In [80]:
def train_model(model, train_dataset, val_dataset, n_epochs):
    
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    criterion = nn.L1Loss(reduction='sum')
    history = dict(train=[], val=[])
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 10000.0
    
    for epoch in range(1, n_epochs + 1):
        model = model.train()
        train_losses = []

        for seq_true in train_dataset:

            optimizer.zero_grad()
            seq_pred = model(seq_true)
            loss = criterion(seq_pred, seq_true)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())

        val_losses = []
        model = model.eval()

        with torch.no_grad():
            for seq_true in val_dataset:

                seq_true = seq_true
                seq_pred = model(seq_true)
                loss = criterion(seq_pred, seq_true)
                val_losses.append(loss.item())
            
        train_loss = np.mean(train_losses)
        val_loss = np.mean(val_losses)
        history['train'].append(train_loss)
        history['val'].append(val_loss)
        
        if val_loss < best_loss:
            best_loss = val_loss
            best_model_wts = copy.deepcopy(model.state_dict())

        print(f'Epoch {epoch}: train loss {train_loss} val loss {val_loss}')

    model.load_state_dict(best_model_wts)
    return model.eval(), history

In [81]:
model, history = train_model(model, X_train, X_test, n_epochs=150)

RuntimeError: input must have 3 dimensions, got 2