<div style="text-align: center">
    <a
     href="https://colab.research.google.com/github/LiterallyTheOne/Pytorch_Tutorial/blob/main/src/3_data.ipynb"
     target="_parent">
        <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
    </a>
</div>


# <div style="text-align: center; color: cyan">Data</div>

## <div style="text-align: center; color: lime">Imports</div>

In [1]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader, random_split

import numpy as np

from sklearn.datasets import load_iris

## <div style="text-align: center; color: lime">Load iris</div>

In [2]:
iris = load_iris()

In [3]:
print("feature names:")
print(iris.feature_names)

feature names:
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [4]:
print("target names:")
print(iris.target_names)

target names:
['setosa' 'versicolor' 'virginica']


In [5]:
print("Number of samples:", len(iris.data))

Number of samples: 150


In [6]:
chosen_indexes = np.linspace(0, len(iris.data), 10, dtype=int, endpoint=False)
print("Chosen indices:")
print(chosen_indexes)
print()

print("10 samples of data:")
print(iris.data[chosen_indexes])
print()

print("10 samples of target:")
print(iris.target[chosen_indexes])
print()

Chosen indices:
[  0  15  30  45  60  75  90 105 120 135]

10 samples of data:
[[5.1 3.5 1.4 0.2]
 [5.7 4.4 1.5 0.4]
 [4.8 3.1 1.6 0.2]
 [4.8 3.  1.4 0.3]
 [5.  2.  3.5 1. ]
 [6.6 3.  4.4 1.4]
 [5.5 2.6 4.4 1.2]
 [7.6 3.  6.6 2.1]
 [6.9 3.2 5.7 2.3]
 [7.7 3.  6.1 2.3]]

10 samples of target:
[0 0 0 0 1 1 1 2 2 2]



## <div style="text-align: center; color: lime">Make the data Ready for the model</div>

In [7]:
data = torch.tensor(iris.data).to(torch.float)
target = torch.tensor(iris.target)

In [8]:
class IRISClassifier(nn.Module):
    def __init__(self):
        super().__init__()

        self.layers = nn.Sequential(
            nn.Linear(4, 16),
            nn.Linear(16, 8),
            nn.Linear(8, 3),
        )

    def forward(self, x):
        return self.layers(x)

In [9]:
iris_classifier = IRISClassifier()
print(iris_classifier)

IRISClassifier(
  (layers): Sequential(
    (0): Linear(in_features=4, out_features=16, bias=True)
    (1): Linear(in_features=16, out_features=8, bias=True)
    (2): Linear(in_features=8, out_features=3, bias=True)
  )
)


In [10]:
logits = iris_classifier(data[chosen_indexes])
print(logits)

tensor([[-0.2037,  0.2888, -0.8635],
        [-0.2014,  0.3559, -1.0159],
        [-0.2132,  0.2667, -0.8344],
        [-0.2306,  0.2433, -0.8410],
        [-0.4830,  0.1544, -1.2539],
        [-0.6414,  0.1873, -1.7033],
        [-0.5178,  0.2197, -1.4673],
        [-0.8885,  0.1719, -2.2540],
        [-0.7698,  0.2043, -2.0934],
        [-0.9115,  0.1397, -2.2729]], grad_fn=<AddmmBackward0>)


In [11]:
predictions = logits.argmax(dim=1)
for prediction, true_label in zip(predictions, target[chosen_indexes]):
    print(prediction.item(), true_label.item())

1 0
1 0
1 0
1 0
1 1
1 1
1 1
1 2
1 2
1 2


## <div style="text-align: center; color: lime">Dataset</div>

In [12]:
class IRISDataset(Dataset):
    def __init__(self, data, target):
        super().__init__()
        self.data = data
        self.target = target

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        data = torch.tensor(self.data[idx]).to(torch.float)
        target = torch.tensor(self.target[idx])
        return data, target

In [13]:
iris = load_iris()

iris_dataset = IRISDataset(iris.data, iris.target)

In [14]:
for one_data, one_target in iris_dataset:
    print(one_data)
    print(one_target)
    break

tensor([5.1000, 3.5000, 1.4000, 0.2000])
tensor(0)


## <div style="text-align: center; color: lime">DataLoader</div>

In [15]:
iris_loader = DataLoader(iris_dataset, batch_size=10, shuffle=True)

In [16]:
for batch_of_data, batch_of_target in iris_loader:
    print(batch_of_data)
    print(batch_of_target)
    break

tensor([[6.3000, 2.5000, 4.9000, 1.5000],
        [6.7000, 3.0000, 5.2000, 2.3000],
        [6.7000, 3.3000, 5.7000, 2.5000],
        [6.1000, 3.0000, 4.6000, 1.4000],
        [4.5000, 2.3000, 1.3000, 0.3000],
        [5.8000, 2.7000, 5.1000, 1.9000],
        [6.7000, 3.1000, 5.6000, 2.4000],
        [6.0000, 2.7000, 5.1000, 1.6000],
        [7.7000, 3.0000, 6.1000, 2.3000],
        [5.0000, 3.5000, 1.3000, 0.3000]])
tensor([1, 2, 2, 1, 0, 2, 2, 1, 2, 0])


## <div style="text-align: center; color: lime">Train, Validation, Test data</div>

In [17]:
g1 = torch.Generator().manual_seed(20)
train_data, val_data, test_data = random_split(iris_dataset, [0.7, 0.2, 0.1], g1)

In [18]:
print("train_data length:", len(train_data))
print("val_data length:", len(val_data))
print("test_data length:", len(test_data))

train_data length: 105
val_data length: 30
test_data length: 15


In [19]:
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
val_loader = DataLoader(val_data, batch_size=10, shuffle=False)
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)

In [20]:
for batch_of_data, batch_of_target in train_loader:
    logits = iris_classifier(batch_of_data)

    predictions = logits.argmax(dim=1)
    for prediction, true_label in zip(predictions, batch_of_target):
        print(prediction.item(), true_label.item())
    break

1 1
1 0
1 0
1 1
1 1
1 1
1 0
1 2
1 1
1 0


<div style="text-align: center">

<div>
    @LiterallyTheOne — PhD Candidate in Artificial Intelligence
</div>

<a style="margin: 1em" href="https://literallytheone.github.io">
https://literallytheone.github.io
</a>

</div>
