# dataset

> This module provides a dataset manager that is used to get a pytorch dataset or to add new ones. It also contains the dataset of the CAT scans.

In [None]:
#| default_exp dataset

In [None]:
#| hide
from nbdev.showdoc import *
from fastcore.test import *
from fastcore.utils import *

In [None]:
#| export
from torch.utils.data import Dataset

## Pytorch dataset of CAT scans

In [None]:
#| export
class CATDataset(Dataset):
    "Pytorch dataset to handle CAT scans data"
    
    def __init__(self):
        pass

    def __len__(self):
        pass

    def __getitem__(self):
        pass

## The dataset manager class

After creating the `CATDataset` we need to create a manager class that will enable us to retrieve specific instances of that dataset and possibly add new ones.

In [None]:
#| export
class DatasetManager:
    "Manage your datasets through a dictionary containing pytorch dataset classes and their kwargs."

    def __init__(self) -> None:
        self.dataset_dict = {
            
        }

A `DatasetManager` instance already contains the reference to the `CATDataset`. It does it in two flavours: one complete version; one small dataset for testing purposes. 

In [None]:
#|echo: false
manager = DatasetManager()
print("The existing keys are:\n" + "\n".join([k for k in manager.dataset_dict.keys()]))

test_eq(manager.dataset_dict.keys(), [])

The existing keys are:



Expand the handled datasets either by adding them to an instance of the `DatasetManager` via the `add_dataset` method or by inheriting the class and extending the dataset dictionary. 

In [None]:
#| export
@patch
def get_dataset(self:DatasetManager,
                 dataset:str): # A key of the dataset dictionary
    "A getter method to retrieve the wanted dataset."
    assert dataset in self.dataset_dict.keys(), f'{dataset} is not an existing dataset, choose one from {self.dataset_dict.keys()}.'
    ds_class, kwargs = self.dataset_dict[dataset]
    return ds_class(**kwargs)

Add example of getting CATDataset

In [None]:
pass

In [None]:
#| export
@patch
def add_dataset(self:DatasetManager,
                dataset_key:str,        # The key to access the dataset
                dataset_class:Dataset,  # The class of the pytorch dataset 
                dataset_kwargs:dict):     # The parameters needed to instanciate the dataset
    "In order to use a different dataset it is possible to add it to the manager."
    self.dataset_dict[dataset_key] = (dataset_class, dataset_kwargs)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()