In [48]:
import pandas as pd
import numpy as np
import torch
from torch import nn 
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from pathlib import Path

In [11]:
df = pd.read_csv('solarenergy.csv')

  df = pd.read_csv('solarenergy.csv')


In [12]:
df.head()

Unnamed: 0,Datetime,solar_mw,wind-direction,wind-speed,humidity,average-wind-speed-(period),average-pressure-(period),temperature
0,03/08/2020 0:00,11385,27,7.5,75.0,8.0,29.82,69.0
1,02/08/2020 23:00,12390,27,7.5,77.0,5.0,29.85,69.0
2,02/08/2020 22:00,13312,27,7.5,70.0,0.0,29.89,69.0
3,02/08/2020 21:00,13832,27,7.5,33.0,0.0,29.91,69.0
4,02/08/2020 20:00,14038,27,7.5,21.0,3.0,29.89,69.0


In [13]:
df.columns

Index(['Datetime', 'solar_mw', 'wind-direction', 'wind-speed', 'humidity',
       'average-wind-speed-(period)', 'average-pressure-(period)',
       'temperature'],
      dtype='object')

In [18]:
df.values

array([['03/08/2020 0:00', 11385, '27', ..., 8.0, 29.82, 69.0],
       ['02/08/2020 23:00', 12390, '27', ..., 5.0, 29.85, 69.0],
       ['02/08/2020 22:00', 13312, '27', ..., 0.0, 29.89, 69.0],
       ...,
       ['01/05/2007 3:00', 6583, nan, ..., nan, nan, nan],
       ['01/05/2007 2:00', 6803, nan, ..., nan, nan, nan],
       ['01/05/2007 1:00', 7190, nan, ..., nan, nan, nan]],
      shape=(116117, 8), dtype=object)

In [32]:
df.dtypes

Datetime                        object
solar_mw                         int64
wind-direction                  object
wind-speed                     float64
humidity                       float64
average-wind-speed-(period)    float64
average-pressure-(period)      float64
temperature                    float64
dtype: object

In [34]:
df['wind-direction'].values

array(['27', '27', '27', ..., nan, nan, nan],
      shape=(116117,), dtype=object)

In [41]:
class TimeSeriesDataset(Dataset):

    def __init__(self, root_path, mode = 'train', data_path = 'solarenergy.csv',
                 size = None, scale = True, features = 'M' ,target = None):
        
        self.seq_len = size[0]
        self.label_len = size[1]
        self.pred_len = size[2]

        assert mode in ['train', 'val', 'test']
        mode_map = {'train': 0, 'val': 1, "test": 2}
        self.set_mode = mode_map[mode]
        self.features = features
        self.target = target

        self.root_path = root_path
        self.data_path = data_path
        self.scale = scale
        self.__read_data__()

    
    def __read_data__(self):
        self.scaler = StandardScaler()
        df_raw = pd.read_csv(Path(self.root_path) / self.data_path)

       


        num_train = int(len(df_raw) * 0.7)
        num_test = int(len(df_raw) * 0.2)
        num_val = int(len(df_raw) * 0.1)

        borders1 = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len]
        borders2 = [num_train, num_train + num_val, len(df_raw)]

        border1 = borders1[self.set_mode]
        border2 = borders2[self.set_mode]

        if self.features == 'M':
            cols_data = df_raw.columns[1:]
            df_data = df_raw[cols_data]
        elif self.features == 'S':
            df_data = df_raw[[self.target]]

        df_data =  df_data.apply(pd.to_numeric, errors='coerce').fillna(method='ffill').fillna(method='bfill').values

        
        if self.scale:
            train_data = df_data[borders1[0] : borders2[0]]
            self.scaler.fit(train_data)
            data  = self.scaler.transform(df_data)
        else:
            data = df_data
        
        self.X = data[border1:border2]
        self.y = data[border1:border2]
    
    def __getitem__(self, index):
        x_start  = index
        x_end = x_start + self.seq_len
        y_start  = x_end - self.label_len
        y_end = y_start + self.label_len + self.pred_len

        seq_x = self.X[x_start:x_end]
        seq_y = self.y[y_start:y_end]

        return seq_x, seq_y
    
    def __len__(self):
        return len(self.X) - self.seq_len - self.pred_len + 1
    
    def reverse_transform(self, data):
        return self.scaler.inverse_transform(data)

In [22]:
def create_windows_multivariate(data, lookback, horizon):
    X, Y = [], []
    for i in range(len(data) - lookback - horizon + 1):
        X.append(data[i: i + lookback])
        Y.append(data[i + lookback: i + lookback + horizon])
    
    return np.asarray(X), np.asarray(Y)

In [10]:
def patch_sequence(x, patch_size):
    L = x.shape[0]
    n_patches =  L // patch_size
    x = x[:n_patches * patch_size]

    return x.reshape(n_patches, patch_size)

In [42]:
L = 10
T = 5 
size = (L,0,T)

dataset = TimeSeriesDataset(root_path='.', mode = 'train', size= size)

  df_raw = pd.read_csv(Path(self.root_path) / self.data_path)
  df_data =  df_data.apply(pd.to_numeric, errors='coerce').fillna(method='ffill').fillna(method='bfill').values


In [46]:
x, y = dataset[0]
print(x.shape)
print(y.shape)

(10, 7)
(5, 7)


In [53]:
batch_size = 32
dataloader = DataLoader(dataset, batch_size= batch_size, shuffle= True)
sample = next(iter(dataloader))
print(sample[0].shape,sample[1].shape)

torch.Size([32, 10, 7]) torch.Size([32, 5, 7])


In [None]:
class sLSTMcell(nn.Module):
    def __init__(self, input_dim, hidden_dim):

        self.Wi = nn.Linear(input_dim, hidden_dim, bias= True)
        self.Ri = nn.Linear(hidden_dim, hidden_dim, bias= True)