You can run this notebook directly on Google Colab

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/DaniAffCH/Vessel-Geometric-Transformers/blob/main/main.ipynb)

In [None]:
import sys
import warnings

warnings.filterwarnings('ignore')

COLAB_RUNTIME = 'google.colab' in sys.modules
!nvidia-smi &> /dev/null || echo -e "\e[31mWarning: No GPU found. Please check your runtime settings.\e[0m"
if COLAB_RUNTIME:
    !git config --global init.defaultBranch main
    !git init
    !git remote add origin https://github.com/DaniAffCH/Vessel-Geometric-Transformers.git
    !git pull origin main
    !pip install -q -r requirements.txt
else: # Development mode, setting precommit checks 
    !pip install -r requirements.txt
    !pre-commit autoupdate
    !pre-commit install


Loading the configuration (customizable changing config.yaml)

In [2]:
from src.utils import load_config
import os

config_path = os.path.join("config","config.yaml")
config = load_config(config_path)

---

Loading the dataset and showing statistics

In [None]:
from src.data import VesselDataModule
from src.utils.data_analysis import data_info

data = VesselDataModule(config.dataset)

data_info(data)

Showing label distribution to check whether train, validation and test set are balanced

In [None]:
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

train_labels = data.train_set.getLabels()
val_labels = data.val_set.getLabels()
test_labels = data.test_set.getLabels()

combined_labels = np.concatenate([train_labels, val_labels, test_labels])
subsets = ['train'] * len(train_labels) + ['val'] * len(val_labels) + ['test'] * len(test_labels)

df = pd.DataFrame({'label': combined_labels, 'subset': subsets})

df['count'] = df.groupby(['subset', 'label'])['label'].transform('count')
df['total'] = df.groupby('subset')['label'].transform('count')
df['frequency'] = df['count'] / df['total']

df_normalized = df.drop_duplicates(subset=['label', 'subset'])

sns.set_theme(style='whitegrid')

plt.figure(figsize=(12, 6))

sns.barplot(x='subset', y='frequency', hue='label', data=df_normalized)

plt.title('Normalized Label Distribution Across Train, Validation, and Test Sets')
plt.xlabel('Dataset Subset')
plt.ylabel('Normalized Frequency')
plt.legend(title='Label', loc='upper right')
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()


Plotting the data distribution projected in 2D using Principal Component Analysis.

In [None]:
from src.utils.definitions import Feature, Category
from src.utils.data_analysis import plot_data

wss, labels = data.extract_feature(Feature.WSS)
pos, labels = data.extract_feature(Feature.POS)
pressure, labels = data.extract_feature(Feature.PRESSURE)
face, labels = data.extract_feature(Feature.FACE)
plot_data(pos, labels, Category, "Position")
plot_data(wss, labels, Category, "Wall Shear Stress")
plot_data(pressure, labels, Category, "Pressure")
plot_data(face, labels, Category, "Face")

---

Performing equivariance check using random samples from the dataset.  
Geometric Layer is expected to fail due to lack of distance-aware dot product

In [None]:
from src.lib.geometricAlgebraElements import GeometricAlgebraBase
from src.test.test_equivariance import TestEquivariance
import unittest

dl = data.train_dataloader()

batch = next(iter(dl)).data[0]
batch = batch.view(-1, GeometricAlgebraBase.GA_size)[:10]
TestEquivariance.INPUT_DATA = batch

suite = unittest.TestSuite()
suite.addTests(unittest.TestLoader().loadTestsFromTestCase(TestEquivariance))
test_runner = unittest.TextTestRunner(verbosity=0)
restResult = test_runner.run(suite)

---

# Attention based Baseline

Running hyperparameter optimization to find the best hyperparameters maximizing the validation accuracy

In [None]:
from src.utils.hpo import baseline_hpo

baseline_hpo(config, data) # Hyperparameter optimization: writes the config file with the best hyperparameters

Training the model using the best hyperparameters

In [None]:
from src.trainer import VesselTrainer
from src.models import BaselineTransformer

model = BaselineTransformer(config.baseline)
trainer = VesselTrainer(config.trainer)
trainer.fit(model, data)

Test the model performance on unseen test data.

In [None]:
trainer.test(model, data)

---

# GATr

Running hyperparameter optimization to find the best hyperparameters maximizing the validation accuracy

In [None]:
from src.models import Gatr
from src.utils.hpo import gatr_hpo

gatr_hpo(config, data) # Hyperparameter optimization: writes the config file with the best hyperparameters

Training the model using the best hyperparameters

In [None]:
from src.trainer import VesselTrainer

model = Gatr(config.gatr)
trainer = VesselTrainer(config.trainer)
trainer.fit(model, data)

Test the model performance on unseen test data.

In [None]:
trainer.test(model, data)

----