In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

## 1 Load Dataset


In [5]:
df = pd.read_csv('./data/temperature_data.csv')
df.head()

Unnamed: 0,Day,Temperature
0,1,0.133553
1,2,0.061635
2,3,0.175963
3,4,0.302001
4,5,0.08297


##### Goal of generation:

- perhaps look at first 30 - 60 days of temperature data
- then from pattern, generate for next n days, following timeseries forecasting prediction to generate
- i.e not one-step prediction but range-prediction


In [6]:
df.isna().sum()

Day            0
Temperature    0
dtype: int64

In [7]:
temp_data = df['Temperature'].values

### 1.1 Data Preprocessing (Time-series)

- specify custom dataset (prep time-series data for training)
- efficient data handling
- compatible with data loaders
- customized preprocessing
  - converting data to tensors

### Usage steps of Custom Dataset class

1. pass temperature data to custom dataset for sequence to be created
2. create dataloader with help of custom dataset
3. feed dataloader to model


In [17]:
T = 30 # sequence length
batch_size = 32 # No. of sequences to be trained per iteration

# Dataset -> handling, managing data in structured way
class TimeSeriesDataset(Dataset): 
    def __init__(self, data, sequence_length): # init custom dataset
        self.data = data # to hold the temperature data
        self.sequence_length = sequence_length # No. time steps to predict next value
    
    # return total No. of sequence that can be created, given no overlap
    # eg: total data = 3, sequence length = 2, then we have 3 - 2 = 1 (1,2) or (2,3)
    # +1 if overlap allowed
    def __len__(self):  
        return len(self.data) - self.sequence_length

    # retrieve specific sequence & corresponding target value
    # i is start idx
    def __getitem__(self, i):
        x = self.data[i: i+self.sequence_length] # sequence data
        y = self.data[i+self.sequence_length] # target
        
        x = torch.tensor(x, dtype=torch.float32).unsqueeze(-1) # batch_size x sequence_length x input_dim
        y = torch.tensor(y, dtype=torch.float32)

        return x, y


In [18]:
temp_dataset = TimeSeriesDataset(temp_data, T) # T = sequence_length
temp_dataloader = DataLoader(temp_dataset, batch_size=batch_size, shuffle=False) # time-series should not shuffle data, order is impt