# <div style="text-align: center; color: cyan">Train 2</div>

## <div style="text-align: center; color: lime">Setup</div>

In [1]:
import os

if os.getenv("COLAB_RELEASE_TAG"):
    !git clone https://github.com/LiterallyTheOne/Pytorch_Tutorial.git

    %cd src/6_train

## <div style="text-align: center; color: lime">Imports</div>

In [None]:
import torch
from torch.utils.data import Dataset, random_split

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import numpy as np

## <div style="text-align: center; color: lime">Modular train step and validation step</div>

In [2]:
%run train_v1.py

mps
--------------------
epoch: 0
training average_loss: 1.1738866567611694
validation average_loss: 1.1526515483856201
validation accuracy: 0.16666666666666666
--------------------
epoch: 1
training average_loss: 1.0959971980615095
validation average_loss: 1.076720118522644
validation accuracy: 0.13333333333333333
--------------------
epoch: 2
training average_loss: 1.0411990230733699
validation average_loss: 1.0212507049242656
validation accuracy: 0.5333333333333333
--------------------
epoch: 3
training average_loss: 0.9927879897030917
validation average_loss: 0.9612311323483785
validation accuracy: 0.6
--------------------
epoch: 4
training average_loss: 0.9457751187411222
validation average_loss: 0.8915376663208008
validation accuracy: 0.5333333333333333


In [3]:
%run train_v2.py

mps
--------------------
epoch: 0
train: 
	loss: 1.0087
	accuracy: 0.4762
validation: 
	loss: 0.8719
	accuracy: 0.8000
--------------------
epoch: 1
train: 
	loss: 0.9184
	accuracy: 0.6286
validation: 
	loss: 0.8224
	accuracy: 0.8000
--------------------
epoch: 2
train: 
	loss: 0.8568
	accuracy: 0.6286
validation: 
	loss: 0.7920
	accuracy: 0.8000
--------------------
epoch: 3
train: 
	loss: 0.8101
	accuracy: 0.6762
validation: 
	loss: 0.7389
	accuracy: 0.8000
--------------------
epoch: 4
train: 
	loss: 0.7575
	accuracy: 0.7429
validation: 
	loss: 0.6811
	accuracy: 0.8333
--------------------
test: 
	loss: 0.7422
	accuracy: 0.8667


## <div style="text-align: center; color: lime">Better splitting</div>

In [4]:
class IRISDataset(Dataset):
    def __init__(self, data, target):
        super().__init__()
        self.data = data
        self.target = target

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        data = torch.tensor(self.data[idx]).to(torch.float)
        target = torch.tensor(self.target[idx])
        return data, target


# -------------------[ Load the data ]-------------------
iris = load_iris()

iris_dataset = IRISDataset(iris.data, iris.target)

# -------------------[ Split the data to train, validation, and test ]-------------------
g1 = torch.Generator().manual_seed(20)
train_data, val_data, test_data = random_split(iris_dataset, [0.7, 0.2, 0.1], g1)

In [5]:
label_count = {
    0: 0,
    1: 0,
    2: 0,
}

for data, target in train_data:
    label_count[target.item()] += 1

print(f"train label count: {label_count}")

train label count: {0: 33, 1: 39, 2: 33}


In [6]:
label_count = {
    0: 0,
    1: 0,
    2: 0,
}

for data, target in val_data:
    label_count[target.item()] += 1

print(f"validation label count: {label_count}")

validation label count: {0: 13, 1: 6, 2: 11}


In [7]:
label_count = {
    0: 0,
    1: 0,
    2: 0,
}

for data, target in test_data:
    label_count[target.item()] += 1

print(f"test label count: {label_count}")

test label count: {0: 4, 1: 5, 2: 6}


In [8]:
iris = load_iris()

train_subset, val_subset, train_target, val_target = train_test_split(
    iris.data,
    iris.target,
    test_size=0.3,
    random_state=42,
    stratify=iris.target,
)
val_subset, test_subset, val_target, test_target = train_test_split(
    val_subset,
    val_target,
    test_size=0.33,
    random_state=42,
    stratify=val_target,
)

print("size of each subset: ")
print(f"\ttrain: {train_subset.shape[0]}")
print(f"\tval: {val_subset.shape[0]}")
print(f"\ttest: {test_subset.shape[0]}")

print("target distribution:")
print(f"\ttrain: {np.unique(train_target, return_counts=True)}")
print(f"\tval: {np.unique(val_target, return_counts=True)}")
print(f"\ttest: {np.unique(test_target, return_counts=True)}")

size of each subset: 
	train: 105
	val: 30
	test: 15
target distribution:
	train: (array([0, 1, 2]), array([35, 35, 35]))
	val: (array([0, 1, 2]), array([10, 10, 10]))
	test: (array([0, 1, 2]), array([5, 5, 5]))


In [9]:
train_data = IRISDataset(train_subset, train_target)
val_data = IRISDataset(val_subset, val_target)
test_data = IRISDataset(test_subset, test_target)

In [10]:
%run train_v3.py

mps
--------------------
epoch: 0
train: 
	loss: 1.3017
	accuracy: 0.3333
validation: 
	loss: 1.1956
	accuracy: 0.3333
--------------------
epoch: 1
train: 
	loss: 1.1484
	accuracy: 0.3333
validation: 
	loss: 1.1054
	accuracy: 0.3333
--------------------
epoch: 2
train: 
	loss: 1.0816
	accuracy: 0.3619
validation: 
	loss: 1.0478
	accuracy: 0.3333
--------------------
epoch: 3
train: 
	loss: 1.0252
	accuracy: 0.3524
validation: 
	loss: 0.9994
	accuracy: 0.5000
--------------------
epoch: 4
train: 
	loss: 0.9773
	accuracy: 0.6000
validation: 
	loss: 0.9490
	accuracy: 0.6667
--------------------
test: 
	loss: 0.9289
	accuracy: 0.6667


## <div style="text-align: center; color: lime">Standard Scaler</div>

In [11]:
print(f"Mean of the features:\n\t {train_subset.mean(axis=0)}")
print(f"Standard deviation of the features:\n\t {train_subset.std(axis=0)}")

Mean of the features:
	 [5.87333333 3.0552381  3.7847619  1.20571429]
Standard deviation of the features:
	 [0.85882164 0.45502087 1.77553646 0.77383751]


In [12]:
scaler = StandardScaler()
scaler.fit(train_subset)

train_subset_normalized = scaler.transform(train_subset)
val_subset_normalized = scaler.transform(val_subset)
test_subset_normalized = scaler.transform(test_subset)

print(f"Mean of the features after scaling:")
print(f"\ttrain: {train_subset_normalized.mean(axis=0)}")
print(f"\tval: {val_subset_normalized.mean(axis=0)}")
print(f"\ttest: {test_subset_normalized.mean(axis=0)}")
print(f"Standard deviation of the features after scaling:")
print(f"\ttrain: {train_subset_normalized.std(axis=0)}")
print(f"\tval: {val_subset_normalized.std(axis=0)}")
print(f"\ttest: {test_subset_normalized.std(axis=0)}")

Mean of the features after scaling:
	train: [ 2.38327876e-15 -1.12145742e-15 -1.37456184e-16 -6.97854473e-17]
	val: [-0.14360762  0.06174494 -0.04398402 -0.02030696]
	test: [-0.06210059 -0.07744281 -0.06275769 -0.04184464]
Standard deviation of the features after scaling:
	train: [1. 1. 1. 1.]
	val: [0.88306745 0.81063775 0.97257027 0.93027831]
	test: [0.80131426 0.8871022  0.96009651 0.9513319 ]


In [None]:
train_data = IRISDataset(train_subset_normalized, train_target)
val_data = IRISDataset(val_subset_normalized, val_target)
test_data = IRISDataset(test_subset_normalized, test_target)

In [2]:
%run train_v4.py

mps
--------------------
epoch: 0
train: 
	loss: 1.0162
	accuracy: 0.3810
validation: 
	loss: 0.9660
	accuracy: 0.5667
--------------------
epoch: 1
train: 
	loss: 0.8919
	accuracy: 0.6476
validation: 
	loss: 0.8754
	accuracy: 0.6667
--------------------
epoch: 2
train: 
	loss: 0.7995
	accuracy: 0.6857
validation: 
	loss: 0.7918
	accuracy: 0.7000
--------------------
epoch: 3
train: 
	loss: 0.7132
	accuracy: 0.7238
validation: 
	loss: 0.7197
	accuracy: 0.7333
--------------------
epoch: 4
train: 
	loss: 0.6421
	accuracy: 0.7524
validation: 
	loss: 0.6616
	accuracy: 0.7333
--------------------
test: 
	loss: 0.6261
	accuracy: 0.7333


<div style="text-align: center">

<div>
    @LiterallyTheOne â€” PhD Candidate in Artificial Intelligence
</div>

<a style="margin: 1em" href="https://literallytheone.github.io">
https://literallytheone.github.io
</a>

</div>
