In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.optim import Adam
from sklearn.metrics import mean_absolute_error, mean_squared_error
import torch
import torch.nn as nn
import torch_geometric.nn as pyg_nn

In [9]:
print("Ages shape:", ages.shape)
print("Brain characteristics shape:", brain_characteristics.shape)
print("Connectivity shape:", connectivity.shape)
print("Number of unique ages:", len(np.unique(ages)))

Ages shape: (587,)
Brain characteristics shape: (2, 587, 360)
Connectivity shape: (587, 360, 360)
Number of unique ages: 410


In [15]:
import numpy as np
import pandas as pd
import torch
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GATConv, global_mean_pool
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

# Step 1: Load and preprocess the data
# Load files
age_labels = np.load("Downloads/DL_Project/age_labels_592_sbj_filtered.npy")[:587]  # Adjust to the smallest dimension
features = np.load("Downloads/DL_Project/cam_can_thicks_myelins_tensor_592_filtered.npy")[:, :587, :]  # Match samples
connectivity = np.load("Downloads/DL_Project/plv_tensor_592_sbj_filtered.npy")[:587, :, :]
brain_regions = pd.read_csv("Downloads/DL_Project/HCP-MMP1_UniqueRegionList.csv")

# Transpose features to match (num_samples, num_regions, num_features)
features = np.transpose(features, (1, 2, 0))  # Shape: (587, 360, 2)

# Create edges and weights
def create_edges(plv_tensor):
    avg_connectivity = np.mean(plv_tensor, axis=0)  # Shape: (360, 360)
    row, col = np.where(avg_connectivity > 0)  # Extract non-zero connections
    edge_weights = avg_connectivity[row, col]
    edge_index = torch.tensor([row, col], dtype=torch.long)  # Shape: (2, num_edges)
    edge_weights = torch.tensor(edge_weights, dtype=torch.float)  # Shape: (num_edges,)
    return edge_index, edge_weights
    
edge_weights = (edge_weights - edge_weights.mean()) / edge_weights.std()
edge_index, edge_weights = create_edges(connectivity)
node_features = torch.tensor(features, dtype=torch.float)
labels = torch.tensor(age_labels, dtype=torch.float)

# Prepare PyTorch Geometric dataset
dataset = []
for i in range(len(labels)):
    data = Data(x=node_features[i], edge_index=edge_index, edge_attr=edge_weights, y=labels[i])
    dataset.append(data)

# Split the dataset
train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)
train_data, val_data = train_test_split(train_data, test_size=0.25, random_state=42)  # 0.25 * 0.8 = 0.2

train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
val_loader = DataLoader(val_data, batch_size=16, shuffle=False)
test_loader = DataLoader(test_data, batch_size=16, shuffle=False)

# Step 2: Define the GNN Model
class WeightedHyperbolicGNN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.gat1 = GATConv(input_dim, hidden_dim, edge_dim=1)
        self.gat2 = GATConv(hidden_dim, hidden_dim, edge_dim=1)
        self.gat3 = GATConv(hidden_dim, hidden_dim, edge_dim=1)  # New Layer
        self.fc1 = torch.nn.Linear(hidden_dim, hidden_dim // 2)
        self.fc2 = torch.nn.Linear(hidden_dim // 2, output_dim)
        self.dropout = torch.nn.Dropout(p=0.5)

    def forward(self, x, edge_index, edge_attr, batch):
        x = torch.relu(self.gat1(x, edge_index, edge_attr))
        x = torch.relu(self.gat2(x, edge_index, edge_attr))
        x = torch.relu(self.gat3(x, edge_index, edge_attr))
        x = global_mean_pool(x, batch)
        x = self.dropout(torch.relu(self.fc1(x)))
        return self.fc2(x)

# Step 3: Train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = WeightedHyperbolicGNN(input_dim=2, hidden_dim=64, output_dim=1).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-4)
loss_fn = torch.nn.MSELoss()

def train(loader):
    model.train()
    total_loss = 0
    for batch in loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        pred = model(batch.x, batch.edge_index, batch.edge_attr, batch.batch).squeeze()
        loss = loss_fn(pred, batch.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def evaluate(loader):
    model.eval()
    total_loss = 0
    preds, truths = [], []
    with torch.no_grad():
        for batch in loader:
            batch = batch.to(device)
            pred = model(batch.x, batch.edge_index, batch.edge_attr, batch.batch).squeeze()
            loss = loss_fn(pred, batch.y)
            total_loss += loss.item()
            preds.extend(pred.cpu().numpy())
            truths.extend(batch.y.cpu().numpy())
    return total_loss / len(loader), preds, truths

for epoch in range(50):  # Adjust epochs as needed
    train_loss = train(train_loader)
    val_loss, _, _ = evaluate(val_loader)
    print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

# Step 4: Test the model and compute MAE
_, preds, truths = evaluate(test_loader)
mae = mean_absolute_error(truths, preds)
print(f"Test MAE: {mae:.4f}")



Epoch 1, Train Loss: 1947.2012, Val Loss: 1045.7918
Epoch 2, Train Loss: 757.2323, Val Loss: 446.6912
Epoch 3, Train Loss: 695.2520, Val Loss: 481.8288
Epoch 4, Train Loss: 577.7260, Val Loss: 477.8602
Epoch 5, Train Loss: 581.4380, Val Loss: 430.8341
Epoch 6, Train Loss: 620.1038, Val Loss: 477.0501
Epoch 7, Train Loss: 605.3998, Val Loss: 514.4586
Epoch 8, Train Loss: 621.5810, Val Loss: 589.5982
Epoch 9, Train Loss: 611.6444, Val Loss: 419.7325
Epoch 10, Train Loss: 630.2848, Val Loss: 413.6206
Epoch 11, Train Loss: 605.7658, Val Loss: 452.1151
Epoch 12, Train Loss: 665.5902, Val Loss: 504.2598
Epoch 13, Train Loss: 575.3516, Val Loss: 414.7028
Epoch 14, Train Loss: 611.6040, Val Loss: 403.9059
Epoch 15, Train Loss: 599.1761, Val Loss: 545.0967
Epoch 16, Train Loss: 680.9378, Val Loss: 442.9479
Epoch 17, Train Loss: 537.1679, Val Loss: 452.5381
Epoch 18, Train Loss: 505.3424, Val Loss: 424.0148
Epoch 19, Train Loss: 510.6759, Val Loss: 411.8120
Epoch 20, Train Loss: 590.4283, Val Lo

In [17]:
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GATConv, global_mean_pool
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

# 1. Data Preprocessing with Advanced Normalization
# Load files
age_labels = np.load("Downloads/DL_Project/age_labels_592_sbj_filtered.npy")[:587]
features = np.load("Downloads/DL_Project/cam_can_thicks_myelins_tensor_592_filtered.npy")[:, :587, :]
connectivity = np.load("Downloads/DL_Project/plv_tensor_592_sbj_filtered.npy")[:587, :, :]
brain_regions = pd.read_csv("Downloads/DL_Project/HCP-MMP1_UniqueRegionList.csv")

# Advanced Normalization
age_scaler = StandardScaler()
normalized_ages = age_scaler.fit_transform(age_labels.reshape(-1, 1)).squeeze()

# Normalize features per subject and per feature dimension
features = np.transpose(features, (1, 2, 0))  # Shape: (587, 360, 2)
normalized_features = np.zeros_like(features, dtype=float)
for i in range(features.shape[0]):
    for j in range(features.shape[2]):
        normalized_features[i, :, j] = StandardScaler().fit_transform(features[i, :, j].reshape(-1, 1)).squeeze()

# Create edges and weights with robust connectivity calculation
def create_edges(plv_tensor):
    avg_connectivity = np.mean(plv_tensor, axis=0)
    threshold = np.percentile(avg_connectivity[avg_connectivity > 0], 50)  # Dynamic thresholding
    row, col = np.where(avg_connectivity >= threshold)
    edge_weights = avg_connectivity[row, col]
    
    # Normalize edge weights
    edge_weights = (edge_weights - edge_weights.mean()) / (edge_weights.std() + 1e-7)
    
    edge_index = torch.tensor([row, col], dtype=torch.long)
    edge_weights = torch.tensor(edge_weights, dtype=torch.float)
    return edge_index, edge_weights

edge_index, edge_weights = create_edges(connectivity)

# Prepare PyTorch Geometric dataset
dataset = []
for i in range(len(normalized_ages)):
    node_features = torch.tensor(normalized_features[i], dtype=torch.float)
    label = torch.tensor(normalized_ages[i], dtype=torch.float)
    data = Data(x=node_features, edge_index=edge_index, edge_attr=edge_weights, y=label)
    dataset.append(data)

# Split the dataset
train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)
train_data, val_data = train_test_split(train_data, test_size=0.25, random_state=42)

train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
val_loader = DataLoader(val_data, batch_size=16, shuffle=False)
test_loader = DataLoader(test_data, batch_size=16, shuffle=False)

# 2. Advanced GNN Model with Multiple Optimizations
class RobustWeightedHyperbolicGNN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        # Graph Attention Layers
        self.gat1 = GATConv(input_dim, hidden_dim, heads=4, concat=True, edge_dim=1)
        self.gat2 = GATConv(hidden_dim * 4, hidden_dim * 2, heads=4, concat=True, edge_dim=1)
        self.gat3 = GATConv(hidden_dim * 8, hidden_dim, heads=2, concat=False, edge_dim=1)
        
        # Batch Normalization
        self.batch_norm1 = torch.nn.BatchNorm1d(hidden_dim * 4)
        self.batch_norm2 = torch.nn.BatchNorm1d(hidden_dim * 8)

        # Fully Connected Layers
        self.fc1 = torch.nn.Linear(hidden_dim, hidden_dim // 2)
        self.fc2 = torch.nn.Linear(hidden_dim // 2, output_dim)

        # Dropout layers
        self.dropout1 = torch.nn.Dropout(p=0.3)
        self.dropout2 = torch.nn.Dropout(p=0.2)

    def forward(self, x, edge_index, edge_attr, batch):
        # First GAT layer
        x = self.gat1(x, edge_index, edge_attr)
        x = self.batch_norm1(x)
        x = F.leaky_relu(x)
        x = self.dropout1(x)

        # Second GAT layer
        x = self.gat2(x, edge_index, edge_attr)
        x = self.batch_norm2(x)
        x = F.leaky_relu(x)
        x = self.dropout2(x)

        # Third GAT layer
        x = self.gat3(x, edge_index, edge_attr)

        # Global Mean Pooling
        x = global_mean_pool(x, batch)

        # Fully connected layers
        x = F.leaky_relu(self.fc1(x))
        x = self.fc2(x)
        return x

# 3. Device and Model Initialization
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RobustWeightedHyperbolicGNN(input_dim=2, hidden_dim=64, output_dim=1).to(device)

# 4. Multiple Loss Functions and Optimizers
# Huber Loss for robustness
loss_fn1 = torch.nn.SmoothL1Loss()  # Huber Loss
loss_fn2 = torch.nn.MSELoss()

# Multiple Optimizers
optimizer1 = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-4)
optimizer2 = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)

# Learning Rate Schedulers
scheduler1 = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer1, 'min', patience=3, factor=0.5)
scheduler2 = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer2, T_max=10)

# 5. Training Function with Multiple Strategies
def train_and_evaluate(model, train_loader, val_loader, optimizer, loss_fn, scheduler, epochs=10):
    print(f"\nTraining with {type(optimizer).__name__} and {type(loss_fn).__name__}")
    model.train()
    
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        
        for batch in train_loader:
            batch = batch.to(device)
            optimizer.zero_grad()
            
            pred = model(batch.x, batch.edge_index, batch.edge_attr, batch.batch).squeeze()
            loss = loss_fn(pred, batch.y)
            
            # Gradient Clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
            
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
        
        # Validation
        model.eval()
        val_loss = 0
        val_preds, val_truths = [], []
        
        with torch.no_grad():
            for batch in val_loader:
                batch = batch.to(device)
                pred = model(batch.x, batch.edge_index, batch.edge_attr, batch.batch).squeeze()
                val_loss += loss_fn(pred, batch.y).item()
                val_preds.extend(pred.cpu().numpy())
                val_truths.extend(batch.y.cpu().numpy())
        
        val_loss /= len(val_loader)
        val_mae = mean_absolute_error(val_truths, val_preds)
        
        # Learning Rate Scheduling
        if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
            scheduler.step(val_loss)
        else:
            scheduler.step()
        
        print(f"Epoch {epoch+1}, Val Loss: {val_loss:.4f}, Val MAE: {val_mae:.4f}")
    
    return val_mae

# 6. Comparative Experiments
experiments = [
    (model, train_loader, val_loader, optimizer1, loss_fn1, scheduler1),
    (model, train_loader, val_loader, optimizer2, loss_fn2, scheduler2)
]

results = []
for exp in experiments:
    # Reset model weights before each experiment
    for layer in model.children():
        if hasattr(layer, 'reset_parameters'):
            layer.reset_parameters()
    
    mae = train_and_evaluate(*exp)
    results.append(mae)

print("\nFinal Results:")
for i, mae in enumerate(results, 1):
    print(f"Experiment {i} MAE: {mae:.4f}")




Training with Adam and SmoothL1Loss
Epoch 1, Val Loss: 0.5321, Val MAE: 0.9597
Epoch 2, Val Loss: 0.5590, Val MAE: 0.9737
Epoch 3, Val Loss: 0.4262, Val MAE: 0.8209
Epoch 4, Val Loss: 0.4644, Val MAE: 0.8594
Epoch 5, Val Loss: 0.3739, Val MAE: 0.7593
Epoch 6, Val Loss: 0.2855, Val MAE: 0.6250
Epoch 7, Val Loss: 0.3038, Val MAE: 0.6654
Epoch 8, Val Loss: 0.2868, Val MAE: 0.6266
Epoch 9, Val Loss: 0.3191, Val MAE: 0.6852
Epoch 10, Val Loss: 0.2210, Val MAE: 0.5472

Training with AdamW and MSELoss
Epoch 1, Val Loss: 1.5766, Val MAE: 1.0431
Epoch 2, Val Loss: 3.3631, Val MAE: 1.5983
Epoch 3, Val Loss: 1.4247, Val MAE: 0.9806
Epoch 4, Val Loss: 0.5368, Val MAE: 0.5885
Epoch 5, Val Loss: 0.4805, Val MAE: 0.5078
Epoch 6, Val Loss: 0.4998, Val MAE: 0.5307
Epoch 7, Val Loss: 0.5043, Val MAE: 0.5378
Epoch 8, Val Loss: 0.4766, Val MAE: 0.5124
Epoch 9, Val Loss: 0.4752, Val MAE: 0.5128
Epoch 10, Val Loss: 0.4711, Val MAE: 0.5151

Final Results:
Experiment 1 MAE: 0.5472
Experiment 2 MAE: 0.5151
