### Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


#### Install Pytorch Geometric Temporal

In [None]:
!python -c "import torch; print(torch.__version__)"
!python -c "import torch; print(torch.version.cuda)"
!source /content/drive/MyDrive/dgmd-e-14-colab-env/bin/activate; pip install torch-geometric-temporal

2.0.1+cu118
11.8


### `ASLDatasetLoader` Class

The `ASLDatasetLoader` class is designed for loading and processing the ASL dataset. Given a directory, it reads sign language data from JSON files and constructs graph representations suitable for graph-based neural networks. Crucially, the class converts JSON data into PyTorch Geometric (PyG) `Data` objects comprising `x` (node features), `edge_index` (graph connectivity), and `y' (labels) attributes.

**Methods**:

- `_create_sign_to_label_map`: Generates a mapping from sign names to unique labels.

- `_read_file_data`: Reads data from a given JSON file.

- `_augment_data`: Implements data augmentation by applying random rotation, translation, and scaling to landmarks, which can enhance the model's robustness.

- `_create_graph_from_frame`: Constructs a PyG `Data` object from frame data, concentrating on hand and face landmarks. Edges are created between consecutive landmarks and between left and right hand landmarks. Additional features, like hand-to-face distances, are also computed.

- `get_dataset`: Assembles the dataset, optionally incorporating data augmentation. The function outputs a list of PyG `Data` objects ready for graph neural network processing.

In [None]:
import torch
import os
import json
import numpy as np
from torch_geometric.data import Data

HAND_TO_FACE_THRESHOLD = 0.05
DATA_DIRECTORY = "/content/drive/MyDrive/Colab Notebooks/DGMD E-14 Project/Datasets/ASL"

class ASLDatasetLoader:
    def __init__(self, directory_path):
        self.directory_path = directory_path
        self.sign_to_label = self._create_sign_to_label_map()

    def _create_sign_to_label_map(self):
        signs = [os.path.splitext(filename)[0] for filename in os.listdir(self.directory_path)]
        return {sign: i for i, sign in enumerate(signs)}

    def _read_file_data(self, file_path):
        with open(file_path, 'r') as f:
            return json.load(f)

    def _augment_data(self, frame_data, rotation_range=10, translation_range=0.05, scaling_range=0.1):
        """
        Augment the frame data with random rotation, translation, and scaling.

        :param frame_data: Dictionary containing frame landmarks and deltas.
        :param rotation_range: Maximum rotation angle in degrees.
        :param translation_range: Maximum translation as a fraction of landmark range.
        :param scaling_range: Maximum scaling factor.
        :return: Augmented frame data.
        """
        landmarks = np.array(frame_data["landmarks"])
        centroid = np.mean(landmarks, axis=0)

        # Random rotation
        theta = np.radians(np.random.uniform(-rotation_range, rotation_range))
        rotation_matrix = np.array([
            [np.cos(theta), -np.sin(theta)],
            [np.sin(theta), np.cos(theta)]
        ])
        landmarks = np.dot(landmarks - centroid, rotation_matrix) + centroid

        # Random translation
        max_translation = translation_range * (landmarks.max(axis=0) - landmarks.min(axis=0))
        translations = np.random.uniform(-max_translation, max_translation)
        landmarks += translations

        # Random scaling
        scale = np.random.uniform(1 - scaling_range, 1 + scaling_range)
        landmarks = centroid + scale * (landmarks - centroid)

        frame_data["landmarks"] = landmarks.tolist()
        return frame_data

    def _create_graph_from_frame(self, sign_name, frame_data, landmark_types):
        left_hand_indices = [i for i, t in enumerate(landmark_types) if t == "L"]
        right_hand_indices = [i for i, t in enumerate(landmark_types) if t == "R"]
        face_indices = [i for i, t in enumerate(landmark_types) if t == "F"]

        landmarks = np.array(frame_data["landmarks"])
        deltas = np.array(frame_data["deltas"])

        # Create weights based on landmark importance
        weights = [2 if t == "L" or t == "R" else 1 for t in landmark_types]

        # Create edges based on the number of available landmarks (or nodes)
        edges = [[i, i + 1] for i in range(len(landmarks) - 1)]

        # Add edges between the left and right hand landmarks
        for i in left_hand_indices:
            for j in right_hand_indices:
                edges.append([i, j])

        edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()

        # Compute additional features like hand-to-face and hand-to-body distances
        hand_to_face_contact = []
        for idx, ltype in enumerate(landmark_types):
            if ltype in ["L", "R"] and any(t == "F" for t in landmark_types):
                min_distance = min([np.linalg.norm(landmarks[idx] - landmarks[j]) for j, t in enumerate(landmark_types) if t == "F"])
                hand_to_face_contact.append(1 if min_distance < HAND_TO_FACE_THRESHOLD else 0)
            else:
                hand_to_face_contact.append(0)

        # Reshape the 1D arrays to 2D for concatenation
        weights_2d = np.array(weights)[:, np.newaxis]
        hand_to_face_contact_2d = np.array(hand_to_face_contact)[:, np.newaxis]

        # Concatenate landmarks, deltas, importance weights, and hand-to-face contact features
        x = torch.tensor(np.hstack((landmarks, deltas, weights_2d, hand_to_face_contact_2d)), dtype=torch.float)
        y = torch.tensor([self.sign_to_label[sign_name]], dtype=torch.long)

        return Data(x=x, edge_index=edge_index, y=y)


    def get_dataset(self, augment=False):
        dataset = []

        for filename in os.listdir(self.directory_path):
            sign_name = os.path.splitext(filename)[0]
            file_path = os.path.join(self.directory_path, filename)
            sign_data = self._read_file_data(file_path)

            for frame_data in sign_data["frames"]:
                landmark_types = sign_data.get("landmark_types", ["F", "L", "P", "R"])  # defaulting to all types

                if augment:
                  frame_data = self._augment_data(frame_data)
                graph_data = self._create_graph_from_frame(sign_name, frame_data, landmark_types)

                dataset.append(graph_data)

        return dataset

    def number_of_classes(self):
        return len(self.sign_to_label)

In [None]:
import imgaug.augmenters as iaa

class ASLDataset(torch.utils.data.Dataset):
    def __init__(self, directory_path, augment=False):
        self.loader = ASLDatasetLoader(directory_path)
        self.data_list = self.loader.get_dataset()
        self.augment = augment

        # Define the augmentation pipeline
        self.aug_pipeline = iaa.Sequential([
            iaa.Affine(translate_percent={"x": (-0.05, 0.05), "y": (-0.05, 0.05)},  # Random translation
                       rotate=(-10, 10),  # Random rotation
                       scale={"x": (0.9, 1.1), "y": (0.9, 1.1)}),  # Random scaling
            iaa.AdditiveGaussianNoise(scale=(0, 0.05*100))  # Add Gaussian Noise
        ])

    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, idx):
        data = self.data_list[idx]
        if self.augment:
            landmarks = np.array(data.x[:, :2].tolist())  # Extract landmarks from feature
            augmented_landmarks = self.aug_pipeline(images=[landmarks])[0]
            data.x[:, :2] = torch.tensor(augmented_landmarks, dtype=torch.float)
        return data

    def number_of_classes(self):
        return len(self.loader.sign_to_label)


### `ASLGraphClassifier` Class

The `ASLGraphClassifier`, features deeper GCN layers and additional channels to capture intricate data patterns potentially. It takes a PyG `Data` object as input, and its forward pass emits class logits.

**Methods**:

- `forward`: Details the forward pass, accepting a PyG `Data` object. Two GCN layers with subsequent batch normalization and dropout layers process the input. Post global max-pooling, two linear layers coupled with dropout ensure final classification, leading to log-softmax outputs.

In [None]:
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_max_pool, global_mean_pool, BatchNorm

class ASLGraphClassifier(torch.nn.Module):
    def __init__(self, num_node_features, num_classes):  # <- Added num_node_features
        super(ASLGraphClassifier, self).__init__()
        self.conv1 = GCNConv(num_node_features, 256)  # Increased channels
        self.bn1 = torch.nn.BatchNorm1d(256)    # Batch normalization layer
        self.conv2 = GCNConv(256, 512)          # Increased channels
        self.bn2 = torch.nn.BatchNorm1d(512)    # Batch normalization layer
        self.lin1 = torch.nn.Linear(512, 256)
        self.lin2 = torch.nn.Linear(256, num_classes)
        self.dropout = torch.nn.Dropout(p=0.5)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = F.relu(self.bn1(self.conv1(x, edge_index)))
        x = self.dropout(x)
        x = F.relu(self.bn2(self.conv2(x, edge_index)))
        x = self.dropout(x)
        x = global_max_pool(x, batch)
        x = F.relu(self.lin1(x))
        x = self.dropout(x)
        x = self.lin2(x)
        return F.log_softmax(x, dim=1)

In [None]:
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch_geometric.loader import DataLoader
from collections import Counter
import random

EPOCHS = 1
LEARNING_RATE = 0.001
NUM_WORKERS = 2

def stratified_data_split(data, test_size=0.2):
    labels = [d.y.item() for d in data]
    train_data, test_data, _, _ = train_test_split(data, labels, test_size=test_size, stratify=labels, random_state=42)
    return train_data, test_data

def evaluate(model, loader, device, detailed=False):
    model.eval()
    correct = 0
    all_preds = []
    all_labels = []

    for batch in loader:
        batch = batch.to(device)
        with torch.no_grad():
            pred = model(batch).max(dim=1)[1]
            all_preds.extend(pred.cpu().numpy())
            all_labels.extend(batch.y.cpu().numpy())
            correct += pred.eq(batch.y).sum().item()

    accuracy = correct / len(loader.dataset)

    if detailed:
        return accuracy, all_preds, all_labels
    else:
        return accuracy


def train():
    loader = ASLDatasetLoader(DATA_DIRECTORY)

    # Splitting data using the stratified_data_split method
    train_dataset, val_dataset = stratified_data_split(loader.get_dataset(augment=True), test_size=0.2)

    num_classes = loader.number_of_classes()

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=NUM_WORKERS)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=NUM_WORKERS)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = ASLGraphClassifier(num_node_features=6, num_classes=num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=5e-4)

    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=True)

    for epoch in range(EPOCHS):
        model.train()
        total_loss = 0
        for batch in train_loader:
            batch = batch.to(device)
            optimizer.zero_grad()
            out = model(batch)
            loss = F.nll_loss(out, batch.y)
            loss.backward()

            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)

            optimizer.step()
            total_loss += loss.item()

            # Check for NaN loss
            if np.isnan(loss.item()):
                print("Warning: NaN loss detected!")

        avg_loss = total_loss / len(train_loader)
        val_accuracy = evaluate(model, val_loader, device)

        print(f"Epoch {epoch}, Loss: {avg_loss}, Validation Accuracy: {val_accuracy:.4f}")

        scheduler.step(avg_loss)

    # Evaluating on validation set to get sample predictions
    _, all_preds, all_labels = evaluate(model, val_loader, device, detailed=True)
    print("Sample predictions:", all_preds[:20])
    print("Sample true labels:", all_labels[:20])

In [None]:
train()

Epoch 0, Loss: 5.126807749170425, Validation Accuracy: 0.0710
Sample predictions: [216, 216, 121, 113, 150, 2, 82, 82, 145, 93, 215, 192, 145, 216, 133, 160, 216, 216, 147, 189]
Sample true labels: [88, 21, 173, 38, 175, 64, 171, 41, 72, 88, 196, 145, 119, 212, 1, 195, 163, 12, 16, 16]
