In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sn
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

In [2]:
iris = sn.load_dataset("iris")
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [3]:
data = torch.tensor(iris[iris.columns[0:4]].values)

labels = torch.zeros(len(iris), dtype=torch.long)
labels[iris.species == "versicolor"] = 1
labels[iris.species == "virginica"] = 2

labels

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2])

# Pytorch's DataLoader
Basically, there are a few steps we should follow to create a DataLoader:
1. Split our data into train and test sets.
2. Convert those sets into pytorch's tensors.
3. Construct a `TensorDataset` using the created tensors from the above step. A TensorDataset for train data and another for test data.
4. Wrap the `TensorDataset`s in `DataLoader`s.

## Split our data into train and test sets
Note: Our data has been converted to pytorch's Tensor from previous code cell

In [4]:
train_data, test_data, train_labels, test_labels = train_test_split(
    data, labels, train_size=0.8
)

In [5]:
print(f"Shape of train data: {train_data.shape}")
print(f"Shape of test data: {test_data.shape}")
print(f"Shape of train labels: {train_labels.shape}")
print(f"Shape of test labels: {test_labels.shape}")

Shape of train data: torch.Size([120, 4])
Shape of test data: torch.Size([30, 4])
Shape of train labels: torch.Size([120])
Shape of test labels: torch.Size([30])


## Create TensorDatasets

In [6]:
train_data = TensorDataset(train_data, train_labels)
test_data = TensorDataset(test_data, test_labels)

In [7]:
test_data.tensors

(tensor([[4.4000, 3.0000, 1.3000, 0.2000],
         [6.9000, 3.1000, 4.9000, 1.5000],
         [6.3000, 3.4000, 5.6000, 2.4000],
         [4.9000, 2.4000, 3.3000, 1.0000],
         [6.0000, 2.2000, 5.0000, 1.5000],
         [6.1000, 3.0000, 4.9000, 1.8000],
         [7.2000, 3.0000, 5.8000, 1.6000],
         [5.9000, 3.0000, 5.1000, 1.8000],
         [7.2000, 3.2000, 6.0000, 1.8000],
         [5.7000, 2.9000, 4.2000, 1.3000],
         [5.6000, 3.0000, 4.1000, 1.3000],
         [6.7000, 3.1000, 4.4000, 1.4000],
         [6.6000, 2.9000, 4.6000, 1.3000],
         [7.1000, 3.0000, 5.9000, 2.1000],
         [5.8000, 2.8000, 5.1000, 2.4000],
         [5.7000, 2.8000, 4.1000, 1.3000],
         [5.0000, 3.5000, 1.6000, 0.6000],
         [5.7000, 2.8000, 4.5000, 1.3000],
         [5.4000, 3.9000, 1.3000, 0.4000],
         [6.0000, 3.0000, 4.8000, 1.8000],
         [6.9000, 3.1000, 5.4000, 2.1000],
         [6.9000, 3.1000, 5.1000, 2.3000],
         [4.9000, 3.1000, 1.5000, 0.2000],
         [4

In [8]:
len(test_data.tensors[0])

30

## Create DataLoaders

In [9]:
train_loader = DataLoader(train_data, batch_size=12, shuffle=True)
test_loader = DataLoader(test_data, batch_size=len(test_data.tensors[0]))

In [10]:
for X, y in train_loader:
    print(f"Train data: {X}, train labels: {y}")

Train data: tensor([[5.6000, 2.9000, 3.6000, 1.3000],
        [7.3000, 2.9000, 6.3000, 1.8000],
        [6.8000, 3.2000, 5.9000, 2.3000],
        [6.5000, 3.0000, 5.5000, 1.8000],
        [5.9000, 3.0000, 4.2000, 1.5000],
        [6.2000, 3.4000, 5.4000, 2.3000],
        [4.7000, 3.2000, 1.6000, 0.2000],
        [5.2000, 3.5000, 1.5000, 0.2000],
        [6.6000, 3.0000, 4.4000, 1.4000],
        [5.0000, 2.3000, 3.3000, 1.0000],
        [5.5000, 3.5000, 1.3000, 0.2000],
        [5.0000, 3.2000, 1.2000, 0.2000]], dtype=torch.float64), train labels: tensor([1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 0, 0])
Train data: tensor([[5.1000, 3.8000, 1.5000, 0.3000],
        [5.5000, 2.6000, 4.4000, 1.2000],
        [6.4000, 3.2000, 4.5000, 1.5000],
        [6.7000, 3.3000, 5.7000, 2.5000],
        [4.6000, 3.6000, 1.0000, 0.2000],
        [5.4000, 3.9000, 1.7000, 0.4000],
        [4.3000, 3.0000, 1.1000, 0.1000],
        [6.8000, 2.8000, 4.8000, 1.4000],
        [5.5000, 2.3000, 4.0000, 1.3000],
        [6.3

In [11]:
for X, y in test_loader:
    print(f"Test data: {X}, test labels: {y}")

Test data: tensor([[4.4000, 3.0000, 1.3000, 0.2000],
        [6.9000, 3.1000, 4.9000, 1.5000],
        [6.3000, 3.4000, 5.6000, 2.4000],
        [4.9000, 2.4000, 3.3000, 1.0000],
        [6.0000, 2.2000, 5.0000, 1.5000],
        [6.1000, 3.0000, 4.9000, 1.8000],
        [7.2000, 3.0000, 5.8000, 1.6000],
        [5.9000, 3.0000, 5.1000, 1.8000],
        [7.2000, 3.2000, 6.0000, 1.8000],
        [5.7000, 2.9000, 4.2000, 1.3000],
        [5.6000, 3.0000, 4.1000, 1.3000],
        [6.7000, 3.1000, 4.4000, 1.4000],
        [6.6000, 2.9000, 4.6000, 1.3000],
        [7.1000, 3.0000, 5.9000, 2.1000],
        [5.8000, 2.8000, 5.1000, 2.4000],
        [5.7000, 2.8000, 4.1000, 1.3000],
        [5.0000, 3.5000, 1.6000, 0.6000],
        [5.7000, 2.8000, 4.5000, 1.3000],
        [5.4000, 3.9000, 1.3000, 0.4000],
        [6.0000, 3.0000, 4.8000, 1.8000],
        [6.9000, 3.1000, 5.4000, 2.1000],
        [6.9000, 3.1000, 5.1000, 2.3000],
        [4.9000, 3.1000, 1.5000, 0.2000],
        [4.9000, 2.5000