# Datasets

In [1]:
from torchhk import *

## 1. w/o Validation Set

In [2]:
mnist = Datasets("MNIST")

Data Loaded!
Train Data Length : 60000
Test Data Length : 10000


In [3]:
train_data, test_data = mnist.get_data()
train_len, test_len = mnist.get_len()
train_loader, test_loader = mnist.get_loader(batch_size=100)

print("------------------------")
print(mnist.data_name)
print("------------------------")
print("[Train]")
print("* Data :", train_data)
print("* Length :", train_len)
print("* Loader :", train_loader)
print("------------------------")
print("[Test]")
print("* Data :", test_data)
print("* Length :", test_len)
print("* Loader :", test_loader)

------------------------
MNIST
------------------------
[Train]
* Data : Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: ToTensor()
* Length : 60000
* Loader : <torch.utils.data.dataloader.DataLoader object at 0x0000019AC5D2B2E8>
------------------------
[Test]
* Data : Dataset MNIST
    Number of datapoints: 10000
    Root location: ./data
    Split: Test
    StandardTransform
Transform: ToTensor()
* Length : 10000
* Loader : <torch.utils.data.dataloader.DataLoader object at 0x0000019AC5D2B358>


## 2. w/ Validation Set

In [4]:
mnist_val = Datasets("MNIST", val_idx=list(range(1000, 2000)))

Data Loaded (w/ Validation Set)!
Train Data Length : 59000
Val Data Length : 1000
Test Data Length : 10000


In [5]:
train_data, val_data, test_data = mnist_val.get_data()
train_len, val_len, test_len = mnist_val.get_len()
train_loader, val_loader, test_loader = mnist_val.get_loader(batch_size=100)

print("------------------------")
print(mnist.data_name)
print("------------------------")
print("[Train]")
print("* Data :", train_data)
print("* Length :", train_len)
print("* Loader :", train_loader)
print("------------------------")
print("[Val]")
print("* Data :", val_data)
print("* Length :", val_len)
print("* Loader :", val_loader)
print("------------------------")
print("[Test]")
print("* Data :", test_data)
print("* Length :", test_len)
print("* Loader :", test_loader)

------------------------
MNIST
------------------------
[Train]
* Data : <torch.utils.data.dataset.Subset object at 0x0000019AC5D29978>
* Length : 59000
* Loader : <torch.utils.data.dataloader.DataLoader object at 0x0000019AC5D4D7F0>
------------------------
[Val]
* Data : <torch.utils.data.dataset.Subset object at 0x0000019AC5D29940>
* Length : 1000
* Loader : <torch.utils.data.dataloader.DataLoader object at 0x0000019AC5D4D860>
------------------------
[Test]
* Data : Dataset MNIST
    Number of datapoints: 10000
    Root location: ./data
    Split: Test
    StandardTransform
Transform: ToTensor()
* Length : 10000
* Loader : <torch.utils.data.dataloader.DataLoader object at 0x0000019AC5D4D908>


## 3. Label Filtering

In [6]:
mnist = Datasets("CIFAR10", label_filter={0:0, 1:1, 7:2})

Files already downloaded and verified
Files already downloaded and verified
Data Loaded! (w/ Label Filtering)
Train Data Length : 15000
Test Data Length : 3000


In [7]:
train_data, test_data = mnist.get_data()
train_len, test_len = mnist.get_len()
train_loader, test_loader = mnist.get_loader(batch_size=100)

print("------------------------")
print(mnist.data_name)
print("------------------------")
print("[Train]")
print("* Data :", train_data)
print("* Length :", train_len)
print("* Loader :", train_loader)
print("------------------------")
print("[Test]")
print("* Data :", test_data)
print("* Length :", test_len)
print("* Loader :", test_loader)

------------------------
CIFAR10
------------------------
[Train]
* Data : <torch.utils.data.dataset.Subset object at 0x0000019AC5D29B00>
* Length : 15000
* Loader : <torch.utils.data.dataloader.DataLoader object at 0x0000019AC5D4D710>
------------------------
[Test]
* Data : <torch.utils.data.dataset.Subset object at 0x0000019AC5D29A90>
* Length : 3000
* Loader : <torch.utils.data.dataloader.DataLoader object at 0x0000019ACCA1B358>
