In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn import functional as F
from torch.autograd import Variable
from torch import Tensor, optim, nn
import wandb
from tqdm import tqdm


In [2]:
device = (
    "cuda" if torch.cuda.is_available()
    else "cpu"
)
if torch.cuda.is_available():
    print(f'PyTorch version: {torch.__version__}')
    print('*' * 10)
    print(f'_CUDA version: ')
    !nvcc --version
    print('*' * 10)
    print(f'CUDNN version: {torch.backends.cudnn.version()}')
    print(f'Available GPU devices: {torch.cuda.device_count()}')
    print(f'Device Name: {torch.cuda.get_device_name()}')
print(f"Using {device} device")

PyTorch version: 2.1.0
**********
_CUDA version: 
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:09:35_Pacific_Daylight_Time_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
**********
CUDNN version: 8801
Available GPU devices: 1
Device Name: NVIDIA GeForce RTX 2080
Using cuda device


In [41]:
class StaticDataset(Dataset):
    def __init__(self, data_dir, sequence_length, transform=None, target_transform=None):
        self.data = torch.tensor(np.load(data_dir)[:, :-1], dtype=torch.float32, device=device)
        self.labels = torch.tensor(np.load(data_dir)[:, -1], dtype=torch.int64, device=device)
        self.transform = transform
        self.target_transform = target_transform
        self.sequence_length = sequence_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        end_idx = idx + self.sequence_length
        sample = self.data[idx:end_idx, :]
        print(sample.shape)
        label = self.labels[idx]
        if self.transform:
            sample = self.transform(sample)
        if self.target_transform:
            label = self.target_transform(label)
        return sample, label

In [42]:
def make_loader(dataset, batch_size=64):
    return DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0)

In [43]:
train_dataset = StaticDataset('../data/processed/train_timeline.npy', 16)

In [44]:
train_dataset.__getitem__(1)[0].shape

17
torch.Size([16, 381])


torch.Size([16, 381])

shape of X_train: (n_samples, sequence_length, n_features)

In [45]:
train_loader = make_loader(train_dataset, batch_size=1)

In [46]:
train_dataset.__getitem__(0)

16
torch.Size([16, 381])


(tensor([[-1.6270, -0.3796, -1.0531,  ..., -0.9372, -1.2317, -1.3332],
         [-1.4100, -0.3796, -0.8200,  ..., -0.9372, -1.2317, -1.3332],
         [-1.1931, -0.3796, -0.8200,  ..., -0.9372, -1.2141, -1.3332],
         ...,
         [ 1.1934, -0.3796,  0.8507,  ...,  1.4548,  1.2806,  1.3303],
         [ 1.4103, -0.3796,  1.0061,  ...,  1.6499,  1.5063,  1.4828],
         [ 1.6271, -0.3796,  1.0061,  ...,  1.6852,  1.7649,  1.7559]],
        device='cuda:0'),
 tensor(0, device='cuda:0'))