In [5]:
!python -c "import torch; print(torch.__version__)"
!python -c "import torch; print(torch.version.cuda)"
!pip install torch-geometric-temporal

2.0.1+cu118
11.8
Collecting torch-geometric-temporal
  Downloading torch_geometric_temporal-0.54.0.tar.gz (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.1/48.1 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pandas<=1.3.5 (from torch-geometric-temporal)
  Downloading pandas-1.3.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.5/11.5 MB[0m [31m80.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_sparse (from torch-geometric-temporal)
  Downloading torch_sparse-0.6.17.tar.gz (209 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.2/209.2 kB[0m [31m20.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting torch_scatter (from torch-geometric-temporal)
  Downloading torch_scatter-2.1.1.tar.gz (107 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━

In [7]:
# prompt: mount google drive

from google.colab import drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


In [21]:
import torch
import os
import json
import numpy as np
from torch_geometric.data import Data


class ASLDatasetLoader:
    def __init__(self, directory_path):
        self.directory_path = directory_path
        self.sign_to_label = self._create_sign_to_label_map()

    def _create_sign_to_label_map(self):
        signs = [os.path.splitext(filename)[0] for filename in os.listdir(self.directory_path)]
        return {sign: i for i, sign in enumerate(signs)}

    def _read_file_data(self, file_path):
        with open(file_path, 'r') as f:
            return json.load(f)

    def _augment_data(self, frame_data, rotation_range=10, translation_range=0.05, scaling_range=0.1):
        """
        Augment the frame data with random rotation, translation, and scaling.

        :param frame_data: Dictionary containing frame landmarks and deltas.
        :param rotation_range: Maximum rotation angle in degrees.
        :param translation_range: Maximum translation as a fraction of landmark range.
        :param scaling_range: Maximum scaling factor.
        :return: Augmented frame data.
        """
        landmarks = np.array(frame_data["landmarks"])
        centroid = np.mean(landmarks, axis=0)

        # Random rotation
        theta = np.radians(np.random.uniform(-rotation_range, rotation_range))
        rotation_matrix = np.array([
            [np.cos(theta), -np.sin(theta)],
            [np.sin(theta), np.cos(theta)]
        ])
        landmarks = np.dot(landmarks - centroid, rotation_matrix) + centroid

        # Random translation
        max_translation = translation_range * (landmarks.max(axis=0) - landmarks.min(axis=0))
        translations = np.random.uniform(-max_translation, max_translation)
        landmarks += translations

        # Random scaling
        scale = np.random.uniform(1 - scaling_range, 1 + scaling_range)
        landmarks = centroid + scale * (landmarks - centroid)

        frame_data["landmarks"] = landmarks.tolist()
        return frame_data

    def _create_graph_from_frame(self, sign_name, frame_data):
        landmarks = np.array(frame_data["landmarks"])
        deltas = np.array(frame_data["deltas"])

        # Adjust lengths for concatenation
        n_landmarks = len(landmarks)
        landmarks = landmarks[:n_landmarks-1]
        deltas = deltas[:n_landmarks-1]

        # Create edges based on the number of available landmarks (or nodes)
        edges = [[i, i+1] for i in range(len(landmarks) - 1)]

        edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
        x = torch.tensor(np.hstack((landmarks, deltas)), dtype=torch.float)
        y = torch.tensor([self.sign_to_label[sign_name]], dtype=torch.long)

        return Data(x=x, edge_index=edge_index, y=y)


    def get_dataset(self, augment=False):
        dataset = []

        for filename in os.listdir(self.directory_path):
            sign_name = os.path.splitext(filename)[0]
            file_path = os.path.join(self.directory_path, filename)
            sign_data = self._read_file_data(file_path)

            for frame_data in sign_data["frames"]:
                if augment:
                  frame_data = self._augment_data(frame_data)
                graph_data = self._create_graph_from_frame(sign_name, frame_data)

                dataset.append(graph_data)

        return dataset

    def number_of_classes(self):
        return len(self.sign_to_label)

In [22]:
from torch_geometric.nn import GCNConv, global_max_pool, BatchNorm  # Notice the change in the import

class GraphClassifier(torch.nn.Module):
    def __init__(self, num_node_features, num_classes, dropout_rate=0.5):
        super(GraphClassifier, self).__init__()
        self.conv1 = GCNConv(num_node_features, 128)
        self.bn1 = BatchNorm(128)
        self.dropout1 = torch.nn.Dropout(dropout_rate)  # Dropout after first layer
        self.conv2 = GCNConv(128, 64)
        self.bn2 = BatchNorm(64)
        self.dropout2 = torch.nn.Dropout(dropout_rate)  # Dropout after second layer
        self.fc = torch.nn.Linear(64, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        # First GCN layer
        x = self.conv1(x, edge_index)
        x = self.bn1(x)
        x = F.leaky_relu(x)  # Use LeakyReLU
        x = self.dropout1(x)

        # Second GCN layer
        x = self.conv2(x, edge_index)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.dropout2(x)

        # Global pooling across nodes
        x = global_max_pool(x, data.batch)  # Here's the change from mean pooling to max pooling

        # Final classification layer
        x = self.fc(x)

        return F.log_softmax(x, dim=1)

In [23]:
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_max_pool, global_mean_pool

class ExtendedGraphClassifier(torch.nn.Module):

    def __init__(self, num_features, num_classes):
        super(ExtendedGraphClassifier, self).__init__()
        self.conv1 = GCNConv(num_features, 128)
        self.conv2 = GCNConv(128, 256)
        self.lin1 = torch.nn.Linear(256, 128)
        self.lin2 = torch.nn.Linear(128, num_classes)
        self.dropout = torch.nn.Dropout(p=0.2)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = self.dropout(F.relu(self.conv1(x, edge_index)))
        x = self.dropout(F.relu(self.conv2(x, edge_index)))
        x = global_max_pool(x, batch)
        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.lin2(x)
        return F.log_softmax(x, dim=1)


In [26]:
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch_geometric.loader import DataLoader
from collections import Counter
import random

EPOCHS = 250
LEARNING_RATE = 0.001


def stratified_data_split(data_list, test_size=0.2):
    # Extract labels from data list
    labels = [data.y.item() for data in data_list]

    # Use sklearn's train_test_split with stratify option
    train_data, test_data = train_test_split(data_list, test_size=test_size, stratify=labels, random_state=42)

    return train_data, test_data


def validate(loader, model, device):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        with torch.no_grad():
            out = model(data)
        pred = out.argmax(dim=1)
        correct += int((pred == data.y).sum())
    return correct / len(loader.dataset)

def train():
    directory_path = "/content/drive/MyDrive/Colab Notebooks/DGMD E-14 Project/Datasets/ASL"
    loader = ASLDatasetLoader(directory_path)

    # Create the entire dataset without augmentation and then perform stratified split
    data_list = loader.get_dataset()
    train_dataset, test_dataset = stratified_data_split(data_list, test_size=0.2)

    # Now augment only the training dataset
    augmented_train_dataset = loader.get_dataset(augment=True)

    num_classes = loader.number_of_classes()

    train_labels = [data.y.item() for data in train_dataset]
    test_labels = [data.y.item() for data in test_dataset]

    print("Training label distribution:", Counter(train_labels))
    print("Test label distribution:", Counter(test_labels))

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = ExtendedGraphClassifier(num_features=4, num_classes=num_classes).to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=5e-4)
    scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.7, patience=5, verbose=True)

    max_epochs_without_improvement = 20
    epochs_without_improvement = 0
    best_val_accuracy = 0

    model.train()
    for epoch in range(EPOCHS):
        total_loss = 0
        for batch in train_loader:
            batch = batch.to(device)
            optimizer.zero_grad()
            out = model(batch)
            loss = F.nll_loss(out, batch.y)
            loss.backward()

            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)

            optimizer.step()
            total_loss += loss.item()

            # Check for NaN loss
            if np.isnan(loss.item()):
                print("Warning: NaN loss detected!")

        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch}, Loss: {avg_loss}")

        val_accuracy = validate(test_loader, model, device)
        scheduler.step(val_accuracy)

        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1

        if epochs_without_improvement >= max_epochs_without_improvement:
            print("Early stopping triggered.")
            break

    model.eval()
    correct = 0
    all_preds = []
    all_labels = []

    for batch in test_loader:
        batch = batch.to(device)
        with torch.no_grad():
            pred = model(batch).max(dim=1)[1]
            all_preds.extend(pred.cpu().numpy())
            all_labels.extend(batch.y.cpu().numpy())
            correct += pred.eq(batch.y).sum().item()

    print(f"Accuracy: {correct / len(test_dataset)}")
    print("Sample predictions:", all_preds[:20])
    print("Sample true labels:", all_labels[:20])

In [27]:
train()

Training label distribution: Counter({15: 402, 7: 363, 9: 298, 3: 282, 13: 269, 16: 265, 19: 258, 2: 247, 8: 246, 4: 243, 5: 243, 10: 236, 14: 223, 18: 220, 17: 215, 12: 214, 1: 210, 0: 203, 11: 199, 6: 199})
Test label distribution: Counter({15: 100, 7: 91, 9: 75, 3: 70, 13: 67, 16: 66, 19: 65, 2: 62, 8: 62, 4: 61, 5: 60, 10: 59, 14: 56, 18: 55, 17: 54, 12: 53, 1: 52, 0: 51, 6: 50, 11: 50})
Epoch 0, Loss: 2.9783951134621343
Epoch 1, Loss: 2.7325360246851473
Epoch 2, Loss: 2.5616929244391526
Epoch 3, Loss: 2.5553990859019606
Epoch 4, Loss: 2.530746081207372
Epoch 5, Loss: 2.523849629148652
Epoch 6, Loss: 2.511371390729011
Epoch 7, Loss: 2.52093997484521
Epoch 8, Loss: 2.5140357289133193
Epoch 9, Loss: 2.4862496596348436
Epoch 10, Loss: 2.482298280619368
Epoch 11, Loss: 2.4806352280363253
Epoch 12, Loss: 2.4762717953211144
Epoch 13, Loss: 2.4677184608918203
Epoch 14, Loss: 2.4751703890064096
Epoch 00015: reducing learning rate of group 0 to 7.0000e-04.
Epoch 15, Loss: 2.456407485128958
