In [1]:
import torch # torch will allow us to create tensors.
import torch.nn as nn # torch.nn allows us to create a neural network.
import torch.nn.functional as F # nn.functional give us access to the activation and loss functions.
from torch.optim import Adam # optim contains many optimizers. This time we're using Adam

import lightning as L # lightning has tons of cool tools that make neural networks easier
from torch.utils.data import TensorDataset, DataLoader # these are needed for the training data

import pandas as pd # We'll use pandas to read in the data and normalize it
from sklearn.model_selection import train_test_split # train_test_split will help us split the data into training and validation sets

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class MultiInputModel(L.LightningModule):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(in_features=2, out_features=2, bias=True)
        self.fc2 = nn.Linear(in_features=2, out_features=3, bias=True)
        self.loss = nn.CrossEntropyLoss()

    def forward(self, input):
        hidden = self.fc1(input)
        output_values = self.fc2(F.relu(hidden))
        
        return output_values
    
    def configure_optimizers(self):
        return Adam(self.parameters(), lr=0.001)
    
    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        output_values = self.forward(inputs)
        loss = self.loss(output_values, labels)
                
        return loss


In [3]:
class MultiInOut2(L.LightningModule):

    L.seed_everything(42)

    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(in_features=4, out_features=2, bias=True)
        self.fc2 = nn.Linear(in_features=2, out_features=3, bias=True)
        self.loss = nn.CrossEntropyLoss()

    def forward(self, input):
        hidden = self.fc1(input)
        logits = self.fc2(F.relu(hidden))
        return logits
    
    def configure_optimizers(self):
        return Adam(self.parameters(), lr=0.001)
    
    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        output_values = self.forward(inputs)
        loss = self.loss(output_values, labels)
                
        return loss

Seed set to 42


In [4]:
from sklearn.datasets import load_iris
from torch.utils.data import DataLoader, TensorDataset

In [5]:
iris = load_iris()
X = torch.tensor(iris.data, dtype=torch.float32)
y = torch.tensor(iris.target, dtype=torch.long)

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
iris_dataset = TensorDataset(X_train, y_train)
iris_loader = DataLoader(iris_dataset, batch_size=32, shuffle=True)

In [8]:
model = MultiInOut2()
trainer = L.Trainer(max_epochs=100, accelerator="cpu", devices=1)

💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/opt/anaconda3/envs/nn/lib/python3.12/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
/opt/anaconda3/envs/nn/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:76: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable 

In [9]:
trainer.fit(model, iris_loader)


  | Name | Type             | Params | Mode 
--------------------------------------------------
0 | fc1  | Linear           | 10     | train
1 | fc2  | Linear           | 9      | train
2 | loss | CrossEntropyLoss | 0      | train
--------------------------------------------------
19        Trainable params
0         Non-trainable params
19        Total params
0.000     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode
/opt/anaconda3/envs/nn/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/anaconda3/envs/nn/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (4) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower val

Epoch 99: 100%|██████████| 4/4 [00:00<00:00, 500.01it/s, v_num=35]

`Trainer.fit` stopped: `max_epochs=100` reached.


Epoch 99: 100%|██████████| 4/4 [00:00<00:00, 248.03it/s, v_num=35]



In [10]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.data)

fc1.weight tensor([[ 0.3036,  0.3176, -0.1817,  0.3952],
        [-0.1096,  0.1009, -0.2434,  0.2936]])
fc1.bias tensor([ 0.3556, -0.3668])
fc2.weight tensor([[ 0.4407,  0.1323],
        [ 0.4834,  0.0958],
        [ 0.5372, -0.0998]])
fc2.bias tensor([ 0.3268,  0.1346, -0.1055])
tensor([[ 0.3036,  0.3176, -0.1817,  0.3952],
        [-0.1096,  0.1009, -0.2434,  0.2936]])
fc1.bias tensor([ 0.3556, -0.3668])
fc2.weight tensor([[ 0.4407,  0.1323],
        [ 0.4834,  0.0958],
        [ 0.5372, -0.0998]])
fc2.bias tensor([ 0.3268,  0.1346, -0.1055])


In [11]:
pred = model(X_test)
predicted_classes = torch.argmax(pred, dim=1)
accuracy = (predicted_classes == y_test).float().mean()
print(f"Test Accuracy: {accuracy.item() * 100:.2f}%")

Test Accuracy: 33.33%


In [12]:
path_to_checkpoint = trainer.checkpoint_callback.best_model_path

In [13]:
trainer1 = L.Trainer(max_epochs=200)

💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [14]:
trainer1.fit(model, iris_loader, ckpt_path=path_to_checkpoint)

Restoring states from the checkpoint path at /Users/archbaer/projects/nn_experiments/nn/lightning_logs/version_35/checkpoints/epoch=99-step=400.ckpt
/opt/anaconda3/envs/nn/lib/python3.12/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:366: The dirpath has changed from '/Users/archbaer/projects/nn_experiments/nn/lightning_logs/version_35/checkpoints' to '/Users/archbaer/projects/nn_experiments/nn/lightning_logs/version_36/checkpoints', therefore `best_model_score`, `kth_best_model_path`, `kth_value`, `last_model_path` and `best_k_models` won't be reloaded. Only `best_model_path` will be reloaded.

  | Name | Type             | Params | Mode 
--------------------------------------------------
0 | fc1  | Linear           | 10     | train
1 | fc2  | Linear           | 9      | train
2 | loss | CrossEntropyLoss | 0      | train
--------------------------------------------------
19        Trainable params
0         Non-trainable params
19        Total params
0.000     Total est

Epoch 199: 100%|██████████| 4/4 [00:00<00:00, 213.07it/s, v_num=36]

`Trainer.fit` stopped: `max_epochs=200` reached.


Epoch 199: 100%|██████████| 4/4 [00:00<00:00, 158.75it/s, v_num=36]



In [15]:
predictions = model(X_test)
predicted_classes = torch.argmax(predictions, dim=1)

torch.sum(torch.eq(predicted_classes, y_test)).item() / len(y_test)

0.36666666666666664

* Now let's try with a modified dataset

In [16]:
url = "https://raw.githubusercontent.com/StatQuest/signa/main/chapter_04/iris.txt"
df = pd.read_table(url, sep=",", header=None)

In [17]:
df.head()

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [18]:
df.columns = ["sepal_length",
              "sepal_width",
              "petal_length",
              "petal_width",
              "class"]

In [19]:
input_values = df[["sepal_length", "sepal_width"]]
label_values = df["class"]

In [20]:
classes_as_numbers = label_values.factorize()[0] ## NOTE: factorize() returns a list of lists,
                                                 ## and since we only need the first list of values,
                                                 ## we index the output of factorize() with [0].
classes_as_numbers ## print out the numbers

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [21]:
input_train, input_test, label_train, label_test = train_test_split(input_values,
                                                                    classes_as_numbers,
                                                                    test_size=0.25,
                                                                    stratify=classes_as_numbers)

In [22]:
one_hot_label_train = F.one_hot(torch.tensor(label_train)).type(torch.float32)

In [23]:
one_hot_label_train[:5]

tensor([[0., 0., 1.],
        [0., 0., 1.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.]])

In [24]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
input_train_scaled = scaler.fit_transform(input_train)
input_test_scaled = scaler.transform(input_test)

In [25]:
tensor_input_train = torch.tensor(input_train_scaled, dtype=torch.float32)
tensor_input_test = torch.tensor(input_test_scaled, dtype=torch.float32)

In [26]:
train_dataset = TensorDataset(tensor_input_train, one_hot_label_train)
train_dataloader = DataLoader(train_dataset)

In [27]:
trainer = L.Trainer(accelerator="cpu", devices=1, max_epochs=100)
model = MultiInputModel()

💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/opt/anaconda3/envs/nn/lib/python3.12/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
HPU available: False, using: 0 HPUs
/opt/anaconda3/envs/nn/lib/python3.12/site-packages/lightning/pytorch/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.


In [28]:
trainer.fit(model, train_dataloader)


  | Name | Type             | Params | Mode 
--------------------------------------------------
0 | fc1  | Linear           | 6      | train
1 | fc2  | Linear           | 9      | train
2 | loss | CrossEntropyLoss | 0      | train
--------------------------------------------------
15        Trainable params
0         Non-trainable params
15        Total params
0.000     Total estimated model params size (MB)
3         Modules in train mode
0         Modules in eval mode
/opt/anaconda3/envs/nn/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/opt/anaconda3/envs/nn/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing th

Epoch 99: 100%|██████████| 112/112 [00:00<00:00, 372.97it/s, v_num=37]

`Trainer.fit` stopped: `max_epochs=100` reached.


Epoch 99: 100%|██████████| 112/112 [00:00<00:00, 369.00it/s, v_num=37]



In [29]:
predictions = model(tensor_input_test) 
predicted_classes = torch.argmax(predictions, dim=1)  

# To compare with true labels and get accuracy:
accuracy = (predicted_classes == torch.tensor(label_test)).float().mean()
print(f"Test Accuracy: {accuracy.item() * 100:.2f}%")

Test Accuracy: 81.58%


In [None]:
class MultiInputModel(L.LightningModule):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(in_features=2, out_features=8, bias=True)
        self.fc2 = nn.Linear(in_features=8, out_features=16, bias=True)
        self.fc3 = nn.Linear(in_features=16, out_features=32, bias=True)
        self.fc4 = nn.Linear(in_features=32, out_features=3, bias=True)
        self.loss = nn.CrossEntropyLoss()

    def forward(self, input):
        hidden = self.fc1(input)
        hidden2 = self.fc2(F.relu(hidden))
        hidden3 = self.fc3(F.relu(hidden2))
        output_values = self.fc4(F.relu(hidden3))

        return output_values
    
    def configure_optimizers(self):
        return Adam(self.parameters(), lr=0.001)
    
    def training_step(self, batch, batch_idx):
        inputs, labels = batch
        output_values = self.forward(inputs)
        loss = self.loss(output_values, labels)
        
        return loss

In [32]:
model = MultiInputModel()
trainer = L.Trainer(accelerator="cpu", devices=1, max_epochs=100)
trainer.fit(model, train_dataloader)

💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name | Type             | Params | Mode 
--------------------------------------------------
0 | fc1  | Linear           | 24     | train
1 | fc2  | Linear           | 144    | train
2 | fc3  | Linear           | 544    | train
3 | fc4  | Linear           | 99     | train
4 | loss | CrossEntropyLoss | 0      | train
--------------------------------------------------
811       Trainable params
0         Non-trainable params
811       Total params
0.003     Total estimated model params size (MB)
5         Modules in train mode
0         Modules in eval mode
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HP

Epoch 99: 100%|██████████| 112/112 [00:00<00:00, 322.23it/s, v_num=38]

`Trainer.fit` stopped: `max_epochs=100` reached.


Epoch 99: 100%|██████████| 112/112 [00:00<00:00, 318.81it/s, v_num=38]



In [33]:
predictions = model(tensor_input_test)  
predicted_classes = torch.argmax(predictions, dim=1) 

# To compare with true labels and get accuracy:
accuracy = (predicted_classes == torch.tensor(label_test)).float().mean()
print(f"Test Accuracy: {accuracy.item() * 100:.2f}%")

Test Accuracy: 78.95%
