# <div style="text-align: center; color: cyan">Data</div>

## <div style="text-align: center; color: lime">Imports</div>

In [18]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, random_split

import numpy as np

from sklearn.datasets import load_iris

## <div style="text-align: center; color: lime">Load iris</div>

In [2]:
iris = load_iris()

In [3]:
print("feature names:")
print(iris.feature_names)

feature names:
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [4]:
print("target names:")
print(iris.target_names)

target names:
['setosa' 'versicolor' 'virginica']


In [5]:
print("Number of samples:", len(iris.data))

Number of samples: 150


In [6]:
chosen_indexes = np.linspace(0, len(iris.data), 10, dtype=int, endpoint=False)
print("Chosen indices:")
print(chosen_indexes)
print()

print("10 samples of data:")
print(iris.data[chosen_indexes])
print()

print("10 samples of target:")
print(iris.target[chosen_indexes])
print()

Chosen indices:
[  0  15  30  45  60  75  90 105 120 135]

10 samples of data:
[[5.1 3.5 1.4 0.2]
 [5.7 4.4 1.5 0.4]
 [4.8 3.1 1.6 0.2]
 [4.8 3.  1.4 0.3]
 [5.  2.  3.5 1. ]
 [6.6 3.  4.4 1.4]
 [5.5 2.6 4.4 1.2]
 [7.6 3.  6.6 2.1]
 [6.9 3.2 5.7 2.3]
 [7.7 3.  6.1 2.3]]

10 samples of target:
[0 0 0 0 1 1 1 2 2 2]



## <div style="text-align: center; color: lime">Make the data Ready for the model</div>

In [7]:
data = torch.tensor(iris.data).to(torch.float)
target = torch.tensor(iris.target).to(torch.float)

In [8]:
class IRISClassifier(nn.Module):
    def __init__(self):
        super().__init__()

        self.layers = nn.Sequential(
            nn.Linear(4, 16),
            nn.Linear(16, 8),
            nn.Linear(8, 3),
        )

    def forward(self, x):
        return self.layers(x)

In [9]:
iris_classifier = IRISClassifier()
print(iris_classifier)

IRISClassifier(
  (layers): Sequential(
    (0): Linear(in_features=4, out_features=16, bias=True)
    (1): Linear(in_features=16, out_features=8, bias=True)
    (2): Linear(in_features=8, out_features=3, bias=True)
  )
)


In [10]:
logits = iris_classifier(data[chosen_indexes])
print(logits)

tensor([[1.2440, 0.8722, 0.5158],
        [1.5019, 0.9612, 0.5241],
        [1.1093, 0.8960, 0.5363],
        [1.0947, 0.8504, 0.5192],
        [0.8081, 1.3979, 0.7693],
        [1.2004, 1.7546, 0.8913],
        [0.9556, 1.6719, 0.8633],
        [1.2313, 2.3932, 1.1527],
        [1.1910, 2.1471, 1.0338],
        [1.2615, 2.2873, 1.1066]], grad_fn=<AddmmBackward0>)


In [11]:
predictions = logits.argmax(dim=1)
for prediction, true_label in zip(predictions, target[chosen_indexes]):
    print(prediction.item(), true_label.item())

0 0.0
0 0.0
0 0.0
0 0.0
1 1.0
1 1.0
1 1.0
1 2.0
1 2.0
1 2.0


## <div style="text-align: center; color: lime">Dataset</div>

In [12]:
class IRISDataset(Dataset):
    def __init__(self, data, target):
        super().__init__()
        self.data = data
        self.target = target

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.target[idx]

In [13]:
iris_dataset = IRISDataset(data, target)

In [14]:
for one_data, one_target in iris_dataset:
    print(one_data)
    print(one_target)
    break

tensor([5.1000, 3.5000, 1.4000, 0.2000])
tensor(0.)


## <div style="text-align: center; color: lime">DataLoader</div>

In [15]:
iris_loader = DataLoader(iris_dataset, batch_size=10, shuffle=True)

In [17]:
for batch_of_data, batch_of_target in iris_loader:
    print(batch_of_data)
    print(batch_of_target)
    break

tensor([[6.4000, 2.9000, 4.3000, 1.3000],
        [6.4000, 3.1000, 5.5000, 1.8000],
        [7.7000, 2.6000, 6.9000, 2.3000],
        [4.8000, 3.4000, 1.9000, 0.2000],
        [4.6000, 3.2000, 1.4000, 0.2000],
        [6.7000, 3.1000, 4.4000, 1.4000],
        [6.2000, 2.8000, 4.8000, 1.8000],
        [6.1000, 3.0000, 4.6000, 1.4000],
        [5.7000, 2.8000, 4.1000, 1.3000],
        [5.4000, 3.9000, 1.3000, 0.4000]])
tensor([1., 2., 2., 0., 0., 1., 2., 1., 1., 0.])


## <div style="text-align: center; color: lime">Train, Validation, Test data</div>

In [22]:
g1 = torch.Generator().manual_seed(20)
train_data, val_data, test_data = random_split(iris_dataset, [0.7, 0.2, 0.1], g1)

In [23]:
print("train_data length:", len(train_data))
print("val_data length:", len(val_data))
print("test_data length:", len(test_data))

train_data length: 105
val_data length: 30
test_data length: 15


In [20]:
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
val_loader = DataLoader(val_data, batch_size=10, shuffle=False)
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)

In [21]:
for batch_of_data, batch_of_target in train_loader:
    logits = iris_classifier(batch_of_data)

    predictions = logits.argmax(dim=1)
    for prediction, true_label in zip(predictions, batch_of_target):
        print(prediction.item(), true_label.item())
    break

1 1.0
1 2.0
0 0.0
1 1.0
0 0.0
1 1.0
1 1.0
0 0.0
1 2.0
1 2.0


<div style="text-align: center">

<div>
    @LiterallyTheOne — PhD Candidate in Artificial Intelligence
</div>

<a style="margin: 1em" href="https://literallytheone.github.io">
https://literallytheone.github.io
</a>

</div>
