# <div style="text-align: center; color: cyan">Train 2</div>

## <div style="text-align: center; color: lime">Imports</div>

In [1]:
import torch
from torch import nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader, random_split

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import numpy as np

## <div style="text-align: center; color: lime">Modular train step and validation step</div>

In [None]:
%run train_v1.py

In [None]:
%run train_v2.py

## <div style="text-align: center; color: lime">Better splitting</div>

In [2]:
class IRISDataset(Dataset):
    def __init__(self, data, target):
        super().__init__()
        self.data = data
        self.target = target

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        data = torch.tensor(self.data[idx]).to(torch.float)
        target = torch.tensor(self.target[idx])
        return data, target


# -------------------[ Load the data ]-------------------
iris = load_iris()

iris_dataset = IRISDataset(iris.data, iris.target)

# -------------------[ Split the data to train, validation, and test ]-------------------
g1 = torch.Generator().manual_seed(20)
train_data, val_data, test_data = random_split(iris_dataset, [0.7, 0.2, 0.1], g1)

In [3]:
label_count = {
    0: 0,
    1: 0,
    2: 0,
}

for data, target in train_data:
    label_count[target.item()] += 1

print(f"train label count: {label_count}")

train label count: {0: 33, 1: 39, 2: 33}


In [4]:
label_count = {
    0: 0,
    1: 0,
    2: 0,
}

for data, target in val_data:
    label_count[target.item()] += 1

print(f"validation label count: {label_count}")

validation label count: {0: 13, 1: 6, 2: 11}


In [5]:
label_count = {
    0: 0,
    1: 0,
    2: 0,
}

for data, target in test_data:
    label_count[target.item()] += 1

print(f"test label count: {label_count}")

test label count: {0: 4, 1: 5, 2: 6}


In [6]:
iris = load_iris()

train_subset, val_subset, train_target, val_target = train_test_split(
    iris.data,
    iris.target,
    test_size=0.3,
    random_state=42,
    stratify=iris.target,
)
val_subset, test_subset, val_target, test_target = train_test_split(
    val_subset,
    val_target,
    test_size=0.33,
    random_state=42,
    stratify=val_target,
)

print("size of each subset: ")
print(f"\ttrain: {train_subset.shape[0]}")
print(f"\tval: {val_subset.shape[0]}")
print(f"\ttest: {test_subset.shape[0]}")

print("target distribution:")
print(f"\ttrain: {np.unique(train_target, return_counts=True)}")
print(f"\tval: {np.unique(val_target, return_counts=True)}")
print(f"\ttest: {np.unique(test_target, return_counts=True)}")

size of each subset: 
	train: 105
	val: 30
	test: 15
target distribution:
	train: (array([0, 1, 2]), array([35, 35, 35]))
	val: (array([0, 1, 2]), array([10, 10, 10]))
	test: (array([0, 1, 2]), array([5, 5, 5]))


In [7]:
train_data = IRISDataset(train_subset, train_target)
val_data = IRISDataset(val_subset, val_target)
test_data = IRISDataset(test_subset, test_target)

In [8]:
%run train_v3.py

mps
--------------------
epoch: 0
train: 
	loss: 1.5452
	accuracy: 0.3333
validation: 
	loss: 1.3648
	accuracy: 0.3333
--------------------
epoch: 1
train: 
	loss: 1.2579
	accuracy: 0.3333
validation: 
	loss: 1.1434
	accuracy: 0.3333
--------------------
epoch: 2
train: 
	loss: 1.0770
	accuracy: 0.2952
validation: 
	loss: 1.0207
	accuracy: 0.5333
--------------------
epoch: 3
train: 
	loss: 0.9885
	accuracy: 0.6095
validation: 
	loss: 0.9602
	accuracy: 0.6333
--------------------
epoch: 4
train: 
	loss: 0.9380
	accuracy: 0.6286
validation: 
	loss: 0.9168
	accuracy: 0.6333
--------------------
test: 
	loss: 0.8766
	accuracy: 0.6667


## <div style="text-align: center; color: lime">Standard Scaler</div>

In [None]:
print(f"Mean of the features:\n\t {train_data.mean(axis=0)}")
print(f"Standard deviation of the features:\n\t {train_data.std(axis=0)}")

In [None]:
scaler = StandardScaler()
scaler.fit(train_data)

train_data_normalized = scaler.transform(train_data)
val_data_normalized = scaler.transform(val_data)
test_data_normalized = scaler.transform(test_data)

print(f"Mean of the features after scaling:")
print(f"\ttrain: {train_data_normalized.mean(axis=0)}")
print(f"\tval: {val_data_normalized.mean(axis=0)}")
print(f"\ttest: {test_data_normalized.mean(axis=0)}")
print(f"Standard deviation of the features after scaling:")
print(f"\ttrain: {train_data_normalized.std(axis=0)}")
print(f"\tval: {val_data_normalized.std(axis=0)}")
print(f"\ttest: {test_data_normalized.std(axis=0)}")

<div style="text-align: center">

<div>
    @LiterallyTheOne — PhD Candidate in Artificial Intelligence
</div>

<a style="margin: 1em" href="https://literallytheone.github.io">
https://literallytheone.github.io
</a>

</div>
