In [4]:
import torch #allows us to use tensors
import torch.nn as nn # Allows us to create neural networks
import torch.nn.functional as F # gives us access to activation and loss functions
from torch.optim import Adam #optim has access to many optimizers

import lightning as L # tooling for neural networks
from torch.utils.data import TensorDataset, DataLoader

import pandas as pd
from sklearn.model_selection import train_test_split # used to create train and test data

In [5]:
# get data from github

url = "https://raw.githubusercontent.com/StatQuest/signa/main/chapter_04/iris.txt"
df = pd.read_table(url, sep=",", header=None)

In [6]:
df.head()

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [7]:
df.columns = ['sepal_length',
            'sepal_width',
            'petal_length',
            'petal_width',
            'class']

df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [8]:
df.shape

(150, 5)

In [9]:
df['class'].nunique()

3

In [10]:
for class_name in df['class'].unique():
    print(class_name, ':', sum(df['class'] == class_name), sep = "")

Iris-setosa:50
Iris-versicolor:50
Iris-virginica:50


In [11]:
df[['petal_width', 'sepal_width']].head()

Unnamed: 0,petal_width,sepal_width
0,0.2,3.5
1,0.2,3.0
2,0.2,3.2
3,0.2,3.1
4,0.2,3.6


In [12]:
input_values = df[['petal_width', 'sepal_width']]
input_values.head()

Unnamed: 0,petal_width,sepal_width
0,0.2,3.5
1,0.2,3.0
2,0.2,3.2
3,0.2,3.1
4,0.2,3.6


In [13]:
label_values = df['class']
label_values.head()

0    Iris-setosa
1    Iris-setosa
2    Iris-setosa
3    Iris-setosa
4    Iris-setosa
Name: class, dtype: object

In [14]:
classes_as_numbers = label_values.factorize()[0]
classes_as_numbers

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [15]:
input_train, input_test, label_train, label_test = train_test_split(
                                                                    input_values,
                                                                    classes_as_numbers,
                                                                    train_size = 0.75,
                                                                    stratify = classes_as_numbers
                                                                    )

Checking that 75% of the data went to train data

In [16]:
input_train.shape

(112, 2)

In [17]:
label_train.shape

(112,)

In [18]:
input_test.shape

(38, 2)

In [19]:
label_test.shape

(38,)

Use one hot encoding on the labels to prevent the network thinking the data is ordinal

In [20]:
one_hot_label_train = F.one_hot(torch.tensor(label_train)).type(torch.float32)

In [21]:
one_hot_label_train[:10]

tensor([[0., 0., 1.],
        [1., 0., 0.],
        [0., 0., 1.],
        [1., 0., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 0., 1.],
        [1., 0., 0.],
        [0., 1., 0.]])

In [22]:
max_vals_in_input_train = input_train.max()
max_vals_in_input_train

petal_width    2.5
sepal_width    4.1
dtype: float64

In [23]:
min_vals_in_input_train = input_train.min()
min_vals_in_input_train

petal_width    0.1
sepal_width    2.0
dtype: float64

In [24]:
input_train = (input_train - min_vals_in_input_train) / (max_vals_in_input_train - min_vals_in_input_train)
input_train.head()

Unnamed: 0,petal_width,sepal_width
121,0.791667,0.380952
43,0.208333,0.714286
110,0.791667,0.571429
24,0.041667,0.666667
57,0.375,0.190476


In [25]:
input_test = (input_test - min_vals_in_input_train) / (max_vals_in_input_train - min_vals_in_input_train)
input_test.head()

Unnamed: 0,petal_width,sepal_width
33,0.041667,1.047619
49,0.041667,0.619048
137,0.708333,0.52381
17,0.083333,0.714286
2,0.041667,0.571429


Put the data into a dataloader

In [26]:
input_train_tensors = torch.tensor(input_train.values).type(torch.float32)
input_train_tensors[:5]

tensor([[0.7917, 0.3810],
        [0.2083, 0.7143],
        [0.7917, 0.5714],
        [0.0417, 0.6667],
        [0.3750, 0.1905]])

In [27]:
input_test_tensors = torch.tensor(input_test.values).type(torch.float32)
input_test_tensors[:5]

tensor([[0.0417, 1.0476],
        [0.0417, 0.6190],
        [0.7083, 0.5238],
        [0.0833, 0.7143],
        [0.0417, 0.5714]])

In [31]:
train_dataset = TensorDataset(input_train_tensors, one_hot_label_train)
train_dataloader = DataLoader(train_dataset)

## Build the neural network


In [35]:
class MultipleInsOuts(L.LightningModule):
    
    def __init__(self):
        super().__init__()

        # in the constructor define layers and loss function
        L.seed_everything(seed=42)

        self.input_to_hidden = nn.Linear(in_features = 2, out_features = 2, bias = True)
        self.hidden_to_output = nn.Linear(in_features = 2, out_features = 3, bias = True)

        self.loss = nn.CrossEntropyLoss()

    def forward(self, input): #data goes here when passed to the model
        # pass the data to the hidden layer
        hidden = self.input_to_hidden(input)
        # pass the data to the final layer with activation functions
        output_values = self.hidden_to_output(F.relu(hidden))

        return output_values
    
    def configure_optimizers(self):
        # pass the weights and biases (parametets) to the optimizer
        return Adam(self.parameters(), lr = 0.001)
    
    def training_step(self, batch, batch_idx):
        # split train and batch into input and label values
        inputs, labels = batch
        # run the inputs through the neural networks
        outputs = self.forward(inputs)
        # calculate the loss
        loss = self.loss(outputs,labels)

        #log the loss to observe later to see if we've done enough training
        # self.log(loss)

        return loss


## Train the Neural Network

In [36]:
model = MultipleInsOuts()

Seed set to 42


In [37]:
# Train the model
trainer = L.Trainer(max_epochs = 10)
trainer.fit(model, train_dataloaders = train_dataloader)

💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name             | Type             | Params | Mode 
--------------------------------------------------------------
0 | input_to_hidden  | Linear           | 6      | train
1 | hidden_to_output | Linear           | 9      | train
2 | loss             | CrossEntropyLoss | 0      | train
--------------------------------------------------------------
15        Trainable params
0         Non-trainable params
15        Total params
0.000     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode
c:\Users\Taylor.Odhiambo\OneDrive - Kenya Airways PLC\Documents\.venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connec

Epoch 9: 100%|██████████| 112/112 [00:00<00:00, 439.42it/s, v_num=5]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 112/112 [00:00<00:00, 432.08it/s, v_num=5]


## Use the model

In [38]:
predictions = model(input_test_tensors)

In [39]:
predictions[0:4]

tensor([[ 1.1645, -0.2852,  0.1368],
        [ 0.9872, -0.1462,  0.1079],
        [-0.0686,  0.6718,  0.9825],
        [ 0.9631, -0.1279,  0.1694]], grad_fn=<SliceBackward0>)

In [40]:
predicted_labels = torch.argmax(predictions, dim = 1) # dim applies it to columns
predicted_labels[0:4]

tensor([0, 0, 2, 0])

In [42]:
torch.sum(torch.eq(predicted_labels, torch.tensor(label_test))) / len(predicted_labels)

tensor(0.6579)

## Train the model further from where we left off

In [None]:
# get the path to a file that shows where out model stopped in its previous training
path_to_checkpoint = trainer.checkpoint_callback.best_model_path # best by default is most recent

In [44]:
# Create a new trainer
trainer = L.Trainer(max_epochs = 100)

trainer.fit(model, train_dataloaders = train_dataloader, ckpt_path = path_to_checkpoint)

💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at c:\Users\Taylor.Odhiambo\OneDrive - Kenya Airways PLC\Documents\NeuralNetworks\lightning_logs\version_5\checkpoints\epoch=9-step=1120.ckpt
c:\Users\Taylor.Odhiambo\OneDrive - Kenya Airways PLC\Documents\.venv\Lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:445: The dirpath has changed from 'c:\\Users\\Taylor.Odhiambo\\OneDrive - Kenya Airways PLC\\Documents\\NeuralNetworks\\lightning_logs\\version_5\\checkpoints' to 'c:\\Users\\Taylor.Odhiambo\\OneDrive - Kenya Airways PLC\\Documents\\NeuralNetworks\\lightning_logs\\version_6\\checkpoints', therefore `best_model_score`, `kth_best_model_path`, `kth_value`, `last_mo

Epoch 99: 100%|██████████| 112/112 [00:00<00:00, 371.91it/s, v_num=6]

`Trainer.fit` stopped: `max_epochs=100` reached.


Epoch 99: 100%|██████████| 112/112 [00:00<00:00, 364.65it/s, v_num=6]


In [45]:
predictions = model(input_test_tensors)
predicted_labels = torch.argmax(predictions, dim = 1)

torch.sum(torch.eq(predicted_labels, torch.tensor(label_test))) / len (predicted_labels)

tensor(0.8947)

# Make a prediction with new data

In [46]:

normalized_values = ([0.2, 3.0] - min_vals_in_input_train) / (max_vals_in_input_train - min_vals_in_input_train)
normalized_values

petal_width    0.041667
sepal_width    0.476190
dtype: float64

In [47]:
torch.argmax(model(torch.tensor(normalized_values).type(torch.float32)))

  torch.argmax(model(torch.tensor(normalized_values).type(torch.float32)))


tensor(0)