Libraries and Imports

In [1]:
import json
import os
import sys

import numpy as np
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.utils.data as data
import torchmetrics
from ncps.torch import LTC
from ncps.wirings import AutoNCP
from plyer import notification
# from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import CSVLogger

sys.path.append(os.path.abspath("funcs"))

from config_reading import read_configs
from timer_callback import TimingCallback

Opening configuration file

In [2]:
configs = read_configs()
processing_configs = configs["processing"]
training_configs = configs["training"]

Defining Model Name

In [3]:
model_name = f"c_{configs["model_name"]}"

Setting the CUDA float32 precision.

In [4]:
torch.set_float32_matmul_precision(training_configs["float_precision"])

Set the seed manually to ensure reproducibility

In [5]:
seed = training_configs["seed"]

torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
pl.seed_everything(seed, workers=True)

Seed set to 1234


1234

Reading the dataset.

In [6]:
has_validation = processing_configs["validation_proportion"] > 0

In [7]:
train_x = torch.load(os.path.join(processing_configs["save_path"], "tensor_train_x.pt"))
train_y = torch.load(os.path.join(processing_configs["save_path"], "tensor_train_y.pt"))
if has_validation:
	val_x = torch.load(os.path.join(processing_configs["save_path"], "tensor_val_x.pt"))
	val_y = torch.load(os.path.join(processing_configs["save_path"], "tensor_val_y.pt"))
test_x = torch.load(os.path.join(processing_configs["save_path"], "tensor_test_x.pt"))
test_y = torch.load(os.path.join(processing_configs["save_path"], "tensor_test_y.pt"))

Defining loaders, models phases and model configuration

In [8]:
train_batch, val_batch, test_batch = training_configs["batch_sizes"]

if train_batch == "all":
  train_batch = train_x.shape[0]
if has_validation and val_batch == "all":
  val_batch = val_x.shape[0]
if test_batch == "all":
  test_batch = test_x.shape[0]

In [9]:
train_dataloader = data.DataLoader(data.TensorDataset(train_x, train_y), shuffle=True, num_workers=16, persistent_workers=True, batch_size=train_batch)
if has_validation:
	val_dataloader = data.DataLoader(data.TensorDataset(val_x, val_y), num_workers=16, persistent_workers=True, batch_size=val_batch)
test_dataloader = data.DataLoader(data.TensorDataset(test_x, test_y), num_workers=16, persistent_workers=True, batch_size=test_batch)

In [10]:
class SequenceLearner(pl.LightningModule):
	def __init__(self, model, lr):
		super().__init__()
		self.model = model
		self.lr = lr
		self.loss_fn = nn.CrossEntropyLoss()
		self.acc_fn = torchmetrics.Accuracy(task="multiclass", num_classes=6)

	def training_step(self, batch):
		x, y = batch
		y_hat, _ = self.model.forward(x)
		loss = self.loss_fn(y_hat, y)
		y_pred = y_hat.argmax(dim=-1)
		self.log("train_loss", loss, on_step=False, on_epoch=True)
		self.log("train_acc", self.acc_fn(y_pred, y), on_step=False, on_epoch=True)
		return {"loss": loss}

	def validation_step(self, batch):
		x, y = batch
		y_hat, _ = self.model.forward(x)
		loss = self.loss_fn(y_hat, y)
		y_pred = y_hat.argmax(dim=-1)
		self.log("val_loss", loss, on_step=False, on_epoch=True)
		self.log("val_acc", self.acc_fn(y_pred, y), on_step=False, on_epoch=True)
		return {"loss": loss}
	
	def test_step(self, batch):
		x, y = batch
		y_hat, _ = self.model.forward(x)
		loss = self.loss_fn(y_hat, y)
		y_pred = y_hat.argmax(dim=-1)
		self.log("test_loss", loss)
		self.log("test_acc", self.acc_fn(y_pred, y))
		return {"loss": loss}

	def configure_optimizers(self):
		return torch.optim.Adam(self.model.parameters(), lr=self.lr)

In [11]:
out_features = 6 # Output
in_features = 561 # Input

In [12]:
wiring = AutoNCP(training_configs["num_neurons"], out_features)

In [13]:
ltc_model = LTC(in_features, wiring, batch_first=True)
learn = SequenceLearner(ltc_model, lr=training_configs["learning_rate"])

log_dir = f"logs"
logger = CSVLogger(log_dir, name=model_name)

# checkpoint_dir = f"{log_dir}/{model_name}/checkpoints"
# last_checkpoint_path = f"{checkpoint_dir}/last.ckpt"
# checkpoint_callback = ModelCheckpoint(
# 	dirpath=checkpoint_dir,
# 	filename="{epoch}-{val_loss:.2f}",
# 	save_top_k=1,
# 	monitor="val_loss",
# 	mode="min",
# 	save_last=True
# )

trainer = pl.Trainer(
	logger=logger,
	max_epochs=training_configs["max_epochs"],
  check_val_every_n_epoch=1,
  callbacks=[TimingCallback()],
	gradient_clip_val=1  # Clip gradient to stabilize training
)

GPU available: True (cuda), used: True


alloc!


TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Training

In [14]:
if has_validation:
	trainer.fit(learn, train_dataloader, val_dataloader)
else:
	trainer.fit(learn, train_dataloader)

Missing logger folder: logs\c_ex003
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type               | Params
-----------------------------------------------
0 | model   | LTC                | 442 K 
1 | loss_fn | CrossEntropyLoss   | 0     
2 | acc_fn  | MulticlassAccuracy | 0     
-----------------------------------------------
354 K     Trainable params
88.2 K    Non-trainable params
442 K     Total params
1.770     Total estimated model params size (MB)


                                                                           

c:\Users\Gusta\AppData\Local\Programs\Python\Python312\Lib\site-packages\pytorch_lightning\loops\fit_loop.py:298: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 1:   0%|          | 0/1 [00:00<?, ?it/s, v_num=0]        

Testing

In [None]:
trainer.test(learn, test_dataloader)

Saving the trained model

In [None]:
if not os.path.exists("models"):
	os.makedirs("models")

torch.save(ltc_model, f"models/{model_name}.pt")

Saving the configuration used

In [None]:
config_file_path = os.path.join(logger.log_dir, "config.json")
with open(config_file_path, 'w') as config_file:
	json.dump(configs, config_file, indent=2)

Notification for finishing the training

In [None]:
notification.notify(
	title="Training ended",
	message=f"The training of the model {model_name} with {training_configs["max_epochs"]} epochs has been completed.",
	timeout=10
)