In [40]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam

import lightning as L # additional tooling for neural networks
from torch.utils.data import TensorDataset, DataLoader # needed for training data in a more efficient manner

import pandas as pd
from sklearn.model_selection import train_test_split

In [41]:
url = "https://raw.githubusercontent.com/StatQuest/signa/main/chapter_03/iris.txt"
df = pd.read_table(url, sep = ',', header = None)

In [42]:
df.head()

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [43]:
df.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']

df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [44]:
# See how many rows and columns we have
df.shape

(150, 5)

In [45]:
# see how many classes we have
df['class'].nunique()

3

In [46]:
# print the number in each class
for class_name in df['class'].unique():
    print(class_name, ':', sum(df['class'] == class_name), sep = '')

Iris-setosa:50
Iris-versicolor:50
Iris-virginica:50


In [47]:
df[['petal_width', 'sepal_width']].head()

Unnamed: 0,petal_width,sepal_width
0,0.2,3.5
1,0.2,3.0
2,0.2,3.2
3,0.2,3.1
4,0.2,3.6


In [48]:
input_values = df[['petal_width', 'sepal_width']]
input_values.head()

Unnamed: 0,petal_width,sepal_width
0,0.2,3.5
1,0.2,3.0
2,0.2,3.2
3,0.2,3.1
4,0.2,3.6


In [49]:
label_values = df['class']
label_values.head()

0    Iris-setosa
1    Iris-setosa
2    Iris-setosa
3    Iris-setosa
4    Iris-setosa
Name: class, dtype: object

In [50]:
classes_as_numbers = label_values.factorize()[0] # This is label encoding not good cause the data isn't ordinal

classes_as_numbers

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [51]:
input_train, input_test, label_train, label_test = train_test_split(
    input_values,
    classes_as_numbers,
    test_size = 0.25,
    stratify = classes_as_numbers # used to make sure a good split of classes among both the train and test data
)

In [52]:
input_train.shape

(112, 2)

In [53]:
label_train.shape

(112,)

In [54]:
input_test.shape

(38, 2)

In [55]:
input_train.shape

(112, 2)

In [56]:
# use one hot encoding on the classes
one_hot_label_train = F.one_hot(torch.tensor(label_train)).type(torch.float32)

In [57]:
one_hot_label_train[:10]

tensor([[0., 0., 1.],
        [0., 1., 0.],
        [0., 1., 0.],
        [1., 0., 0.],
        [0., 0., 1.],
        [0., 1., 0.],
        [0., 0., 1.],
        [1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]])

### Feature scaling by normalizing the data (min max scaling)

In [58]:
max_vals_in_input_train = input_train.max()

max_vals_in_input_train

petal_width    2.5
sepal_width    4.4
dtype: float64

In [59]:
min_vals_in_input_train = input_train.min()

min_vals_in_input_train

petal_width    0.1
sepal_width    2.0
dtype: float64

In [60]:
# normalize the input train data
input_train = (input_train - min_vals_in_input_train) / (max_vals_in_input_train - min_vals_in_input_train)
input_train.head()

Unnamed: 0,petal_width,sepal_width
113,0.791667,0.208333
76,0.541667,0.333333
72,0.583333,0.208333
31,0.125,0.583333
100,1.0,0.541667


In [61]:
# normalize the input test data
input_test = (input_test - min_vals_in_input_train) / (max_vals_in_input_train - min_vals_in_input_train)
input_test.head()

Unnamed: 0,petal_width,sepal_width
33,0.041667,0.916667
39,0.041667,0.583333
138,0.708333,0.416667
132,0.875,0.333333
134,0.541667,0.25


### Put our data into a dataloader

In [62]:
# They are good for large datasets cause they make it easy to access data in batches

In [63]:
## Convert input_train into tensors
input_train_tensors = torch.tensor(input_train.values).type(torch.float32)

input_train_tensors[:5]

tensor([[0.7917, 0.2083],
        [0.5417, 0.3333],
        [0.5833, 0.2083],
        [0.1250, 0.5833],
        [1.0000, 0.5417]])

In [64]:
## Convert input_test into tensors
input_test_tensors = torch.tensor(input_test.values).type(torch.float32)

input_test_tensors[:5]

tensor([[0.0417, 0.9167],
        [0.0417, 0.5833],
        [0.7083, 0.4167],
        [0.8750, 0.3333],
        [0.5417, 0.2500]])

In [65]:
train_dataset = TensorDataset(input_train_tensors, one_hot_label_train)
train_dataloader = DataLoader(train_dataset)

## Building a neural network with mulitple inputs and outputs 

In [69]:
class MultipleInsOuts(L.LightningModule):

    def __init__(self):
        super().__init__()
        L.seed_everything(seed=42)

        self.input_to_hidden = nn.Linear(in_features = 2, out_features = 2, bias = True)

        self.hidden_to_output = nn.Linear(in_features = 2, out_features = 3, bias = True)

        self.loss = nn.MSELoss(reduction = 'sum')

    def forward(self, input):

        hidden = self.input_to_hidden(input)

        output_values = self.hidden_to_output(torch.relu(hidden))

        return(output_values)

    def configure_optimizers(self):

        return Adam(self.parameters(), lr = 0.001)

    def training_step(self, batch, batch_idx):

        inputs, labels = batch

        outputs = self.forward(inputs)

        loss = self.loss(outputs, labels)

        return loss


In [70]:
model = MultipleInsOuts()

for name, param in model.named_parameters():
    print(name, torch.round(param.data, decimals = 2))

Seed set to 42


input_to_hidden.weight tensor([[ 0.5400,  0.5900],
        [-0.1700,  0.6500]])
input_to_hidden.bias tensor([-0.1500,  0.1400])
hidden_to_output.weight tensor([[-0.3400,  0.4200],
        [ 0.6200, -0.5200],
        [ 0.6100,  0.1300]])
hidden_to_output.bias tensor([0.5200, 0.1000, 0.3400])


In [71]:
model = MultipleInsOuts()

Seed set to 42


In [None]:
# Train the neural network
trainer = L.Trainer(max_epochs = 10)
trainer.fit(model, train_dataloaders = train_dataloader)

💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name             | Type    | Params | Mode 
-----------------------------------------------------
0 | input_to_hidden  | Linear  | 6      | train
1 | hidden_to_output | Linear  | 9      | train
2 | loss             | MSELoss | 0      | train
-----------------------------------------------------
15        Trainable params
0         Non-trainable params
15        Total params
0.000     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode
c:\Users\Taylor.Odhiambo\OneDrive - Kenya Airways PLC\Documents\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:433: The 'train_dataloader' does not have many 

Epoch 9: 100%|██████████| 112/112 [00:00<00:00, 416.33it/s, v_num=0]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 112/112 [00:00<00:00, 416.33it/s, v_num=0]


In [74]:
predictions = model(input_test_tensors)

In [75]:
# because the outputs(classifications) are 3 we should get 3 values for each row
predictions[0:4]

tensor([[ 0.9072, -0.0385,  0.0388],
        [ 0.7924,  0.0157,  0.0315],
        [ 0.0666,  0.4828,  0.5488],
        [-0.1292,  0.6064,  0.6772]], grad_fn=<SliceBackward0>)

In [76]:
predicted_labels = torch.argmax(predictions, dim = 1) # dim = 0 applies to argmax to rows, dim = 1 applies argmax to columms
predicted_labels[0:4]

tensor([0, 0, 2, 2])

In [77]:
# look at the percentage of correctly predicted values by comparing with test tensor
# convert label_test to a tensor first as it is current just a numpy array
# add up all cases where predicted labels and the test labels are the same class

torch.sum(torch.eq(torch.tensor(label_test), predicted_labels)) / len(predicted_labels)

tensor(0.7105)

# Training the model further to get higher accuracy as 71% is quite low

In [78]:
# find the path to the checkpoint file so we can pick up where we left off.

path_to_checkpoint = trainer.checkpoint_callback.best_model_path # by default bests = most recent

In [79]:
# Create a new trainer
trainer = L.Trainer(max_epochs = 100)

trainer.fit(model, train_dataloaders = train_dataloader, ckpt_path = path_to_checkpoint)

💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at c:\Users\Taylor.Odhiambo\OneDrive - Kenya Airways PLC\Documents\NeuralNetworks\lightning_logs\version_0\checkpoints\epoch=9-step=1120.ckpt
c:\Users\Taylor.Odhiambo\OneDrive - Kenya Airways PLC\Documents\.venv\Lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:445: The dirpath has changed from 'c:\\Users\\Taylor.Odhiambo\\OneDrive - Kenya Airways PLC\\Documents\\NeuralNetworks\\lightning_logs\\version_0\\checkpoints' to 'c:\\Users\\Taylor.Odhiambo\\OneDrive - Kenya Airways PLC\\Documents\\NeuralNetworks\\lightning_logs\\version_1\\checkpoints', therefore `best_model_score`, `kth_best_model_path`, `kth_value`, `last_mo

Epoch 99: 100%|██████████| 112/112 [00:00<00:00, 318.71it/s, v_num=1]

`Trainer.fit` stopped: `max_epochs=100` reached.


Epoch 99: 100%|██████████| 112/112 [00:00<00:00, 316.60it/s, v_num=1]


In [80]:
predictions = model(input_test_tensors)

predicted_labels = torch.argmax(predictions, dim = 1)

torch.sum(torch.eq(torch.tensor(label_test), predicted_labels)) / len(predicted_labels)

tensor(0.8421)

Train with new data

In [81]:
normalized_values = ([0.2, 3.0] - min_vals_in_input_train) / (max_vals_in_input_train - min_vals_in_input_train)
normalized_values


petal_width    0.041667
sepal_width    0.416667
dtype: float64

In [82]:
model(torch.tensor(normalized_values).type(torch.float32))

  model(torch.tensor(normalized_values).type(torch.float32))


tensor([ 0.6896,  0.3253, -0.0843], grad_fn=<ViewBackward0>)