In [18]:
# setup_and_imports.ipynb
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch_geometric as pyg
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from tqdm import tqdm
from lab_gatr import PointCloudPoolingScales, LaBGATr
import matplotlib.pyplot as plt

# Ensure reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Check for GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


In [19]:
# data_loading.ipynb
import os
from scipy.spatial import ConvexHull

class ConvexHullDataset(Dataset):
    def __init__(self, data_dir):
        self.data_dir = data_dir
        self.file_names = sorted([f for f in os.listdir(data_dir) if f.endswith('.txt')])
        self.samples = []
        self._prepare_dataset()
    
    def _prepare_dataset(self):
        for file_name in tqdm(self.file_names, desc="Loading data"):
            file_path = os.path.join(self.data_dir, file_name)
            with open(file_path, 'r') as f:
                lines = f.readlines()[1:]  # Skip header
                points = []
                for line in lines:
                    x, y, z = map(float, line.strip().split())
                    points.append([x, y, z])
                points = np.array(points)
                if points.shape[0] < 4:
                    # Convex hull in 3D requires at least 4 non-coplanar points
                    volume = 0.0
                else:
                    try:
                        hull = ConvexHull(points)
                        volume = hull.volume
                    except:
                        # In case points are coplanar or singular
                        volume = 0.0
                self.samples.append({'points': points, 'volume': volume})
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        sample = self.samples[idx]
        points = sample['points']
        volume = sample['volume']
        return {'points': torch.tensor(points, dtype=torch.float32), 'volume': torch.tensor(volume, dtype=torch.float32)}

In [20]:
# dataloader_definition.ipynb
def collate_fn(batch):
    points = torch.stack([item['points'] for item in batch], dim=0)  # Shape: [batch_size, num_points, 3]
    volumes = torch.stack([item['volume'] for item in batch], dim=0)  # Shape: [batch_size]
    return {'points': points, 'volume': volumes}

batch_size = 32

data_dir = '3d_point_cloud_dataset'  # Ensure this path is correct
dataset = ConvexHullDataset(data_dir)

train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

print(f"Training samples: {len(train_dataset)}")
print(f"Testing samples: {len(test_dataset)}")

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

Loading data: 100%|██████████| 5000/5000 [00:00<00:00, 6895.92it/s]

Training samples: 4000
Testing samples: 1000





In [21]:
# custom_data.py
import torch

class CustomData:
    def __init__(self, points: torch.Tensor, volume: torch.Tensor, device: torch.device):
        """
        Custom data class to include necessary attributes for LaBGATr.

        Parameters
        ----------
        points : torch.Tensor
            Tensor of shape [batch_size, num_points, 3]
        volume : torch.Tensor
            Tensor of shape [batch_size]
        device : torch.device
            Device to move tensors to
        """
        self.points = points.to(device)        # [batch_size, num_points, 3]
        self.volume = volume.to(device)        # [batch_size]
        self.batch = torch.arange(self.points.size(0)).repeat_interleave(self.points.size(1)).to(device)  # [batch_size * num_points]
        self.scale0_sampling_index = torch.zeros(self.points.size(0) * self.points.size(1), dtype=torch.long).to(device)  # [batch_size * num_points]
        self.pos = self.points

In [22]:
import torch
from gatr.interface.point import embed_point, extract_point

class GeometricAlgebraInterface:
    num_input_channels = 1
    num_output_channels = 1
    num_input_scalars = 1
    num_output_scalars = 1

    @staticmethod
    @torch.no_grad()
    def embed(data):
        """
        Embeds 3D points into multivectors.

        Parameters
        ----------
        data : CustomData
            An instance of CustomData containing 'points' and 'volume'.

        Returns
        -------
        multivectors : torch.Tensor
            Embedded multivectors of shape [batch_size * num_points, 1, 16]
        scalars : torch.Tensor
            Corresponding scalar features of shape [batch_size * num_points, 1]
        """
        points = data.points  # [batch_size, num_points, 3]
        volumes = data.volume  # [batch_size]

        print(f"[Embed] Points shape: {points.shape}")      # Debug
        print(f"[Embed] Volumes shape: {volumes.shape}")  # Debug

        # Flatten points for embedding
        batch_size, num_points, _ = points.shape
        points_flat = points.view(-1, 3)  # [batch_size * num_points, 3]
        print(f"[Embed] Points_flat shape: {points_flat.shape}")  # Debug

        # Embed points into multivectors
        multivectors = embed_point(points_flat)  # [batch_size * num_points, 16]
        print(f"[Embed] Multivectors shape after embedding: {multivectors.shape}")  # Debug

        # Reshape to [batch_size * num_points, 1, 16]
        multivectors = multivectors.unsqueeze(1)  # [160, 1, 16]
        print(f"[Embed] Multivectors reshaped: {multivectors.shape}")  # Debug

        # Replicate volume for each point
        scalars = volumes.view(batch_size, 1).repeat(1, num_points)  # [batch_size, num_points]
        scalars = scalars.view(-1, 1)  # [160, 1]
        print(f"[Embed] Scalars shape: {scalars.shape}")  # Debug

        return multivectors, scalars

    @staticmethod
    def dislodge(multivectors, scalars):
        """
        Extracts 3D points from multivectors.

        Parameters
        ----------
        multivectors : torch.Tensor
            Embedded multivectors of shape [batch_size * num_points, 1, 16]
        scalars : torch.Tensor
            Corresponding scalar features of shape [batch_size * num_points, 1]

        Returns
        -------
        points : torch.Tensor
            Extracted 3D points of shape [batch_size, num_points, 3]
        """
        # Remove the singleton dimension
        multivectors = multivectors.squeeze(1)  # [160, 16]
        print(f"[Dislodge] Multivectors shape after squeezing: {multivectors.shape}")  # Debug

        # Extract points from multivectors
        points = extract_point(multivectors)  # [160, 3]
        print(f"[Dislodge] Points shape after extraction: {points.shape}")  # Debug

        # Reshape back to [batch_size, num_points, 3]
        batch_size = scalars.shape[0] // 5  # Assuming num_points = 5
        num_points = 5
        points = points.view(batch_size, num_points, 3)
        print(f"[Dislodge] Points reshaped: {points.shape}")  # Debug

        return points

In [23]:
# transformer_model.ipynb
class TransformerRegressor(nn.Module):
    def __init__(self, input_dim=3, embed_dim=64, num_heads=8, num_layers=3, dropout=0.1):
        super(TransformerRegressor, self).__init__()
        self.embedding = nn.Linear(input_dim, embed_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads, dropout=dropout)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.pooling = nn.AdaptiveAvgPool1d(1)
        self.regressor = nn.Sequential(
            nn.Linear(embed_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )
    
    def forward(self, x):
        """
        x: [batch_size, num_points, 3]
        """
        x = self.embedding(x)  # [batch_size, num_points, embed_dim]
        x = x.permute(1, 0, 2)  # [num_points, batch_size, embed_dim] for Transformer
        x = self.transformer(x)  # [num_points, batch_size, embed_dim]
        x = x.permute(1, 2, 0)  # [batch_size, embed_dim, num_points]
        x = self.pooling(x).squeeze(-1)  # [batch_size, embed_dim]
        x = self.regressor(x).squeeze(-1)  # [batch_size]
        return x

In [24]:
# gatr_model_initialization.ipynb
#from geometric_algebra_interface import GeometricAlgebraInterface
#from custom_data import CustomData  # Ensure correct import path

# Initialize GATr model with the updated interface
gatr_model = LaBGATr(
    GeometricAlgebraInterface,
    d_model=8,
    num_blocks=10,
    num_attn_heads=4,
    use_class_token=False
).to(device)

print(f"LaB-GATr (number of parameters): {sum(p.numel() for p in gatr_model.parameters())}")

LaB-GATr (261761 parameters)
LaB-GATr (number of parameters): 261761


In [25]:
# train_model_transformer.ipynb
def train_model_transformer(model, train_loader, test_loader, epochs=50, lr=1e-3):
    """
    Training loop for the TransformerRegressor model.

    Parameters
    ----------
    model : torch.nn.Module
        The TransformerRegressor model.
    train_loader : torch.utils.data.DataLoader
        DataLoader for the training set.
    test_loader : torch.utils.data.DataLoader
        DataLoader for the testing set.
    epochs : int
        Number of training epochs.
    lr : float
        Learning rate.

    Returns
    -------
    tuple
        Training and testing losses.
    """
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    train_losses = []
    test_losses = []
    
    for epoch in range(1, epochs + 1):
        model.train()
        running_loss = 0.0
        for batch in train_loader:
            # Extract 'points' and 'volume' from the batch
            points = batch['points'].to(device)    # [batch_size, num_points, 3]
            volumes = batch['volume'].to(device)  # [batch_size]
            
            optimizer.zero_grad()
            outputs = model(points)                # [batch_size]
            loss = criterion(outputs, volumes)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * points.size(0)
        
        epoch_loss = running_loss / len(train_loader.dataset)
        train_losses.append(epoch_loss)
        
        # Evaluation
        model.eval()
        test_loss = 0.0
        with torch.no_grad():
            for batch in test_loader:
                points = batch['points'].to(device)
                volumes = batch['volume'].to(device)
                outputs = model(points)
                loss = criterion(outputs, volumes)
                test_loss += loss.item() * points.size(0)
        test_loss /= len(test_loader.dataset)
        test_losses.append(test_loss)
        
        print(f"Epoch {epoch}/{epochs} - Train Loss: {epoch_loss:.4f}, Test Loss: {test_loss:.4f}")
    
    return train_losses, test_losses

In [26]:
# train_model_gatr.ipynb
#from custom_data import CustomData  # Ensure correct import path

def train_model_gatr(model, train_loader, test_loader, epochs=50, lr=1e-3):
    """
    Training loop for the GATr model.

    Parameters
    ----------
    model : torch.nn.Module
        The GATr model.
    train_loader : torch.utils.data.DataLoader
        DataLoader for the training set.
    test_loader : torch.utils.data.DataLoader
        DataLoader for the testing set.
    epochs : int
        Number of training epochs.
    lr : float
        Learning rate.

    Returns
    -------
    tuple
        Training and testing losses.
    """
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    train_losses = []
    test_losses = []
    
    for epoch in range(1, epochs + 1):
        model.train()
        running_loss = 0.0
        for batch in train_loader:
            # Instantiate CustomData
            custom_data = CustomData(batch['points'], batch['volume'], device)
            
            optimizer.zero_grad()
            outputs = model(custom_data)  # Pass the CustomData object
            loss = criterion(outputs, custom_data.volume)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * custom_data.points.size(0)
        
        epoch_loss = running_loss / len(train_loader.dataset)
        train_losses.append(epoch_loss)
        
        # Evaluation
        model.eval()
        test_loss = 0.0
        with torch.no_grad():
            for batch in test_loader:
                custom_data = CustomData(batch['points'], batch['volume'], device)
                outputs = model(custom_data)
                loss = criterion(outputs, custom_data.volume)
                test_loss += loss.item() * custom_data.points.size(0)
        test_loss /= len(test_loader.dataset)
        test_losses.append(test_loss)
        
        print(f"Epoch {epoch}/{epochs} - Train Loss: {epoch_loss:.4f}, Test Loss: {test_loss:.4f}")
    
    return train_losses, test_losses

In [27]:
# test_forward_pass.ipynb
#from custom_data import CustomData  # Ensure correct import path

# Fetch a single batch from the train_loader
batch = next(iter(train_loader))
print(f"Batch 'points' shape: {batch['points'].shape}")   # Expected: [32, 5, 3]
print(f"Batch 'volume' shape: {batch['volume'].shape}") # Expected: [32]

# Create CustomData instance
custom_data = CustomData(batch['points'], batch['volume'], device)
print(f"CustomData.batch shape: {custom_data.batch.shape}")  # Expected: [160]
print(f"CustomData.scale0_sampling_index shape: {custom_data.scale0_sampling_index.shape}")  # Expected: [160]

# Pass through the model
try:
    outputs = gatr_model(custom_data)
    print(f"Model outputs shape: {outputs.shape}")           # Expected: [32]
except Exception as e:
    print(f"Error during forward pass: {e}")

Batch 'points' shape: torch.Size([32, 5, 3])
Batch 'volume' shape: torch.Size([32])
CustomData.batch shape: torch.Size([160])
CustomData.scale0_sampling_index shape: torch.Size([160])
[Embed] Points shape: torch.Size([32, 5, 3])
[Embed] Volumes shape: torch.Size([32])
[Embed] Points_flat shape: torch.Size([160, 3])
[Embed] Multivectors shape after embedding: torch.Size([160, 16])
[Embed] Multivectors reshaped: torch.Size([160, 1, 16])
[Embed] Scalars shape: torch.Size([160, 1])
Error during forward pass: 


In [28]:

# train_transformer.ipynb
# Initialize TransformerRegressor model
transformer_model = TransformerRegressor().to(device)

# Train the Transformer model
transformer_train_losses, transformer_test_losses = train_model_transformer(
    transformer_model,
    train_loader,
    test_loader,
    epochs=50,
    lr=1e-3
)




Epoch 1/50 - Train Loss: 879.0779, Test Loss: 623.7451
Epoch 2/50 - Train Loss: 583.0120, Test Loss: 616.7980
Epoch 3/50 - Train Loss: 574.8533, Test Loss: 583.0727
Epoch 4/50 - Train Loss: 539.6826, Test Loss: 608.2105
Epoch 5/50 - Train Loss: 587.2325, Test Loss: 617.1289
Epoch 6/50 - Train Loss: 588.2393, Test Loss: 616.6116
Epoch 7/50 - Train Loss: 587.8776, Test Loss: 617.1104
Epoch 8/50 - Train Loss: 586.4686, Test Loss: 617.0470
Epoch 9/50 - Train Loss: 569.8059, Test Loss: 551.1265
Epoch 10/50 - Train Loss: 500.9128, Test Loss: 474.3313
Epoch 11/50 - Train Loss: 478.6215, Test Loss: 505.7599
Epoch 12/50 - Train Loss: 464.4069, Test Loss: 454.4986
Epoch 13/50 - Train Loss: 439.7087, Test Loss: 449.5160
Epoch 14/50 - Train Loss: 432.3169, Test Loss: 428.3028
Epoch 15/50 - Train Loss: 400.9624, Test Loss: 423.5585
Epoch 16/50 - Train Loss: 382.0725, Test Loss: 352.9266
Epoch 17/50 - Train Loss: 349.6240, Test Loss: 346.4014
Epoch 18/50 - Train Loss: 337.4675, Test Loss: 315.7315
E

In [30]:
# train_gatr.ipynb
#from geometric_algebra_interface import GeometricAlgebraInterface  # Update the path accordingly
#from custom_data import CustomData  # Ensure correct import path

# Initialize GATr model with the updated interface
gatr_model = LaBGATr(
    GeometricAlgebraInterface,
    d_model=8,
    num_blocks=10,
    num_attn_heads=4,
    use_class_token=False
).to(device)

# Verify the model with a single forward pass (if not done in previous cell)
outputs = gatr_model(custom_data)
print(f"Model outputs shape: {outputs.shape}")  # Expected: [32]

# Train the GATr model
gatr_train_losses, gatr_test_losses = train_model_gatr(
    gatr_model,
    train_loader,
    test_loader,
    epochs=50,
    lr=1e-3
)

LaB-GATr (261761 parameters)
[Embed] Points shape: torch.Size([32, 5, 3])
[Embed] Volumes shape: torch.Size([32])
[Embed] Points_flat shape: torch.Size([160, 3])
[Embed] Multivectors shape after embedding: torch.Size([160, 16])
[Embed] Multivectors reshaped: torch.Size([160, 1, 16])
[Embed] Scalars shape: torch.Size([160, 1])


AssertionError: 