# Dataset and DataLoader

In [1]:
from sklearn.datasets import make_classification
import torch

### Step 1: Create a Classification Dataset using Sklearn

In [2]:
X, y = make_classification(
    n_samples=10,
    n_features=2,
    n_informative=2,
    n_redundant=0,
    n_classes=2,
    random_state=42,
)

In [3]:
X.shape

(10, 2)

In [4]:
X

array([[ 1.06833894, -0.97007347],
       [-1.14021544, -0.83879234],
       [-2.8953973 ,  1.97686236],
       [-0.72063436, -0.96059253],
       [-1.96287438, -0.99225135],
       [-0.9382051 , -0.54304815],
       [ 1.72725924, -1.18582677],
       [ 1.77736657,  1.51157598],
       [ 1.89969252,  0.83444483],
       [-0.58723065, -1.97171753]])

### Step 2: Convert numpy array to tensors 

In [5]:
torch.from_numpy(X)
torch.from_numpy(y)

tensor([1, 0, 0, 0, 0, 1, 1, 1, 1, 0])

### Step 3: Import `Dataset` & `DataLoader` class

In [7]:
from torch.utils.data import Dataset, DataLoader

### Step 4: Create Custom Dataset class

In [8]:
class CustomDataset(Dataset):

    def __init__(self, features, labels):
        self.features = features
        self.labels = labels

    def __len__(self):

        return self.features.shape[0]

    def __getitem__(self, index):

        return self.features[index], self.labels[index]

In [9]:
# create object of CustomDataset

dataset = CustomDataset(X, y)

In [10]:
print(len(dataset))

10


In [15]:
print(dataset[3])

(array([-0.72063436, -0.96059253]), 0)


### Step 5: Create object for `DataLoader` class

In [16]:
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

In [23]:
# Get Values by looping
for batch_features, batch_labels in dataloader:
    print(f"Features: {batch_features}")
    print(f"Labels: {batch_labels}")
    print("-" * 75)

Features: tensor([[ 1.7273, -1.1858],
        [ 1.8997,  0.8344]], dtype=torch.float64)
Labels: tensor([1, 1])
---------------------------------------------------------------------------
Features: tensor([[-1.9629, -0.9923],
        [-1.1402, -0.8388]], dtype=torch.float64)
Labels: tensor([0, 0])
---------------------------------------------------------------------------
Features: tensor([[ 1.0683, -0.9701],
        [-0.7206, -0.9606]], dtype=torch.float64)
Labels: tensor([1, 0])
---------------------------------------------------------------------------
Features: tensor([[ 1.7774,  1.5116],
        [-0.5872, -1.9717]], dtype=torch.float64)
Labels: tensor([1, 0])
---------------------------------------------------------------------------
Features: tensor([[-2.8954,  1.9769],
        [-0.9382, -0.5430]], dtype=torch.float64)
Labels: tensor([0, 1])
---------------------------------------------------------------------------
