In [30]:
import os
import gymnos
import numpy as np

In [48]:
from pprint import pprint
from gymnos.utils.data import DataLoader
from gymnos.datasets.dataset import HDF5Dataset
from gymnos.services.download_manager import DownloadManager

In [3]:
boston_housing = gymnos.datasets.load("boston_housing")

In [10]:
boston_housing.info()

Features <Array <shape=[13], dtype=<class 'numpy.float64'>>>, Labels <Array <shape=[], dtype=<class 'numpy.float32'>>>

In [5]:
dl_manager = DownloadManager(download_dir="downloads")
boston_housing.download_and_prepare(dl_manager)

In [24]:
len(boston_housing)

506

Load single sample (features and label)

In [49]:
pprint(boston_housing[0])

(array([6.320e-03, 1.800e+01, 2.310e+00, 0.000e+00, 5.380e-01, 6.575e+00,
       6.520e+01, 4.090e+00, 1.000e+00, 2.960e+02, 1.530e+01, 3.969e+02,
       4.980e+00]),
 24.0)


## Load data into memory

In [6]:
X, y = boston_housing.as_numpy()

In [7]:
X.shape

(506, 13)

In [8]:
y.shape

(506,)

## Load data by batches

In [12]:
data_loader = DataLoader(boston_housing, batch_size=32, drop_last=False)

In [14]:
X_batch, y_batch = data_loader[0]

In [15]:
X_batch.shape, y_batch.shape

((32, 13), (32,))

In [22]:
print("Batch\tFeatures\tLabels")
for idx, (X_batch, y_batch) in enumerate(data_loader):
    print("{}.\t{}\t{}".format(idx, X_batch.shape, y_batch.shape))

Batch	Features	Labels
0.	(32, 13)	(32,)
1.	(32, 13)	(32,)
2.	(32, 13)	(32,)
3.	(32, 13)	(32,)
4.	(32, 13)	(32,)
5.	(32, 13)	(32,)
6.	(32, 13)	(32,)
7.	(32, 13)	(32,)
8.	(32, 13)	(32,)
9.	(32, 13)	(32,)
10.	(32, 13)	(32,)
11.	(32, 13)	(32,)
12.	(32, 13)	(32,)
13.	(32, 13)	(32,)
14.	(32, 13)	(32,)
15.	(26, 13)	(26,)


In [17]:
506 / 32

15

## HDF5

### Export dataset

In [33]:
HDF5_DATASETS_DIR = "hdf5_datasets"

In [34]:
os.makedirs(HDF5_DATASETS_DIR, exist_ok=True)

In [35]:
boston_housing.to_hdf5(os.path.join(HDF5_DATASETS_DIR, "boston_housing.h5"))

100%|██████████| 1/1 [00:01<00:00,  1.53s/it]


### Import dataset

In [36]:
del boston_housing

In [38]:
boston_housing = HDF5Dataset(os.path.join(HDF5_DATASETS_DIR, "boston_housing.h5"))

In [39]:
boston_housing.info()

Features <Array <shape=[13], dtype=<class 'numpy.float64'>>>, Labels <Array <shape=[], dtype=<class 'numpy.float32'>>>

In [40]:
len(boston_housing)

506

In [41]:
boston_housing[0]

(array([6.320e-03, 1.800e+01, 2.310e+00, 0.000e+00, 5.380e-01, 6.575e+00,
        6.520e+01, 4.090e+00, 1.000e+00, 2.960e+02, 1.530e+01, 3.969e+02,
        4.980e+00]), 24.0)

In [42]:
X, y = boston_housing.as_numpy()

In [43]:
X.shape

(506, 13)

In [44]:
y.shape

(506,)