<h1>Applying weighted loss function in time series forecasting</h1>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
from datetime import datetime

import sys
from tqdm import tqdm

plt.style.use('dark_background')

In [None]:
%cd project/dl_phd_project

/home/jupyter/work/resources/dl_phd_project


In [None]:
# df = pd.read_csv('../data/small_data.csv')
df = pd.read_csv('data/small_data.csv')

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import MinMaxScaler

# Создание загрузчика данных

In [None]:
class TimeSeriesDataset(Dataset):
    def __init__(self, path, seq_length, step_size, split='train', train_size=0.7, val_size=0.15, scaler=None):
        '''
        Args:
            path (str): path to csv file with data
            seq_length (int): sequence length for input data
            step_size (int): steps count forward for prediction (horizon)
            split (str): type of sample ('train', 'val', 'test')
            train_size (float): data proportion for train sample
            val_size (float): data proportion for val sample -> test_size = 1 - train_size - val_size
            scaler (sklearn.preprocessing)
        '''
        
        self.features = ['temp_ice', 'temp_inside']
        self.data = pd.read_csv(path, usecols=['time'] + self.features)
        self.seq_length = seq_length
        self.step_size = step_size
        self.scaler = scaler

        # split train, validation and test sampling
        n = len(self.data)
        train_end = int(n * train_size)
        val_end = train_end + int(n * val_size)

        if split == 'train':
            self.data = self.data[:train_end]
            self.scaler = MinMaxScaler()
            self.scaler.fit(self.data[['temp_ice', 'temp_inside']])
    
        elif split == 'val':
            self.data = self.data[train_end:val_end]
        elif split == 'test':
            self.data = self.data[val_end:]
        else:
            raise ValueError('split must be "train", "val" or "test"')


        # define weights for loss function
        self.weights = self.data['temp_ice'].diff(1).fillna(0)**4
        self.weights = self.weights.values

        if self.scaler is not None:
            self.data[['temp_ice', 'temp_inside']] = self.scaler.transform(self.data[['temp_ice', 'temp_inside']])
        else:
            raise ValueError('Scaler not defined, create a train_dataset instance for it')
        

    def __len__(self):
        return len(self.data) - self.seq_length - self.step_size + 1
    

    def __getitem__(self, idx):
        x = self.data.iloc[idx: idx + self.seq_length][['temp_ice', 'temp_inside']].values
        y = self.data.iloc[idx + self.seq_length + self.step_size - 1]['temp_ice']
        w = self.weights[idx + self.seq_length + self.step_size - 1]

        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32), torch.tensor(w, dtype=torch.float32)
    

# path = '../data/small_data.csv'
path = 'data/small_data.csv'
seq_length = 60
step_size = 30

train_dataset = TimeSeriesDataset(path, seq_length, step_size, split='train', train_size=0.7, val_size=0.15)
val_dataset = TimeSeriesDataset(path, seq_length, step_size, split='val', train_size=0.7, val_size=0.15, scaler=train_dataset.scaler)
test_dataset = TimeSeriesDataset(path, seq_length, step_size, split='test', train_size=0.7, val_size=0.15, scaler=train_dataset.scaler)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=False, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, drop_last=True)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'