In [None]:
import numpy as np
import torch

from typing import Optional

from src.tensor import Tensor

In [None]:
from typing import Self
from src.tensor import Tensor

class DataLoader:
    def __init__(self, data: np.ndarray | Tensor, batchsize: int, shuffle: bool = False, shuffle_seed: Optional[int] = None):
        if isinstance(data, Tensor):
            self.data: np.ndarray = data.value.copy()
        elif isinstance(data, np.ndarray):
            self.data: np.ndarray = data.copy()
        elif isinstance(data, list):
            self.data: np.ndarray = np.array(data)
        else:
            raise TypeError(f"Data type {type(data)} not supported")

        if shuffle:
            _rng: np.random.Generator = np.random.default_rng(shuffle_seed)
            _rng.shuffle(self.data)
    
        self.batchsize: int = batchsize
        self.idx = 0

    def __getitem__(self, idx) -> Tensor:
        if idx * self.batchsize >= len(self.data):
            raise IndexError(f"Index out of bounds, you're picking the {idx}. batch not the {idx} element of the data!")
        # Note that we don't have to clamp `(idx + 1) * self.batchsize` because numpy does that for us
        return Tensor(self.data[idx * self.batchsize:(idx + 1) * self.batchsize])

    def __iter__(self) -> Self:
        return self

    def __next__(self) -> Tensor:
        if self.idx >= len(self.data):
            raise StopIteration
        batch = self.data[self.idx:self.idx+self.batchsize]
        self.idx += self.batchsize
        return Tensor(batch)

In [None]:
_rng = np.random.default_rng(0x42)
data: np.ndarray[np.float32] = _rng.normal(3, 2, size=(16, 4)).astype(np.float32)
batchsize = 4

dl = DataLoader(data, batchsize, shuffle=True, shuffle_seed=0x42)

In [None]:
for batch in dl:
    print(batch)

In [None]:
import numpy as np
import torch

from typing import Optional, Self

from src.tensor import Tensor
from src.data import DataLoader

SEED = 0x42

_rng = np.random.default_rng(SEED)
data: np.ndarray[np.float32] = _rng.normal(3, 2, size=(18, 6)).astype(np.float32)
batchsize = 32

dl_not_shuffled = DataLoader(data, batchsize, shuffle=False)

for idx, batch in enumerate(dl_not_shuffled):
    print(idx)
    print(batch)

In [None]:
_rng = np.random.default_rng(SEED)
data: np.ndarray[np.float32] = _rng.normal(3, 2, size=(16, 6)).astype(np.float32)
batchsize = 4

dl_not_shuffled = DataLoader(data, batchsize, shuffle=False)

for idx, batch in enumerate(dl_not_shuffled):
    print(idx)
    print(batch)
    print()

: 

In [None]:
from enum import StrEnum

from src.data import Data

class StatlearningNames(StrEnum):
    Advertising = "Advertising"
    Auto = "Auto"
    College = "College"
    Ch12Ex13 = "Ch12Ex13"
    Credit = "Credit"
    Heart = "Heart"
    Income1 = "Income1"
    Incomev = "Incomev"


class Statlearning(Data):
    def __init__(self, data: np.ndarray | Tensor, name: Optional[str] = None):
        if name not in [name.value for name in StatlearningNames]:
            raise ValueError(f"Name {name} not in Statlearning datasets")

        super().__init__(data, name)

        self.url = f"https://www.statlearning.com/s/{self.name}.csv"