In [1]:
import torch

dataset_path = "/home/rafael/Área de trabalho/Linux/graph_dataset.pt"
dataset = torch.load(dataset_path)

print(f"Graphs loaded: {len(dataset)}")

  dataset = torch.load(dataset_path)


Graphs loaded: 880


In [2]:
from sklearn.model_selection import KFold
from orguel_ml import BalanceClassWeights

# Device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Setup
k_folds = 5
epochs = 20
batch_size = 2
learning_rate = 0.007

class_weights = BalanceClassWeights(dataset, device)

# K-Fold cross validation
kf = KFold(n_splits=k_folds, shuffle=True, random_state=42)

In [3]:
import torch.nn.functional as F
from orguel_ml import GraphGPSNetwork
from torch_geometric.loader import DataLoader
from torch.utils.tensorboard import SummaryWriter

for fold, (train_indices, validation_indices) in enumerate(kf.split(dataset)):
    print(f"\nStarting Fold {fold}")
    writer = SummaryWriter(log_dir=f"runs/kfold/fold_{fold}")

    trainData = [dataset[i] for i in train_indices]
    validationData = [dataset[i] for i in validation_indices]

    trainLoader = DataLoader(trainData, batch_size=batch_size, shuffle=True)
    validationLoader = DataLoader(validationData, batch_size=batch_size)
    
    model = GraphGPSNetwork().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.001, max_lr=0.01, step_size_up=5, mode="triangular")
    
    # Train
    for epoch in range(epochs):
        model.train()
        acumulateLoss = 0
        correctPredictions = 0
        totalNodesProcessed = 0

        for batch in trainLoader:
            batch = batch.to(device)
            optimizer.zero_grad()
            output = model(batch)
            loss = F.cross_entropy(output, batch.y, weight=class_weights, label_smoothing=0.1)
            loss.backward()
            optimizer.step()
            scheduler.step()

            acumulateLoss += loss.item()
            prediction = output.argmax(dim=1)
            correctPredictions += (prediction == batch.y).sum().item()
            totalNodesProcessed += batch.num_nodes

        trainAccuracy = correctPredictions / totalNodesProcessed
        averageTrainLoss = acumulateLoss / len(trainLoader)

        # Validation
        model.eval()
        acumulateLoss = 0
        correctPredictions = 0
        totalNodesProcessed = 0

        with torch.no_grad():
            for batch in validationLoader:
                batch = batch.to(device)
                output = model(batch)
                loss = F.cross_entropy(output, batch.y, weight=class_weights)
                acumulateLoss += loss.item()
                prediction = output.argmax(dim=1)
                correctPredictions += (prediction == batch.y).sum().item()
                totalNodesProcessed += batch.num_nodes

        validationAccuracy = correctPredictions / totalNodesProcessed
        averageValidationLoss = acumulateLoss / len(validationLoader)

        # Logging
        writer.add_scalar("Loss/train", averageTrainLoss, epoch)
        writer.add_scalar("Loss/val", averageValidationLoss, epoch)
        writer.add_scalar("Accuracy/train", trainAccuracy, epoch)
        writer.add_scalar("Accuracy/val", validationAccuracy, epoch)
        print(f"Fold {fold} | Epoch {epoch+1} | Train Loss: {averageTrainLoss:.4f} | Val Loss: {averageValidationLoss:.4f} | Train Acc: {trainAccuracy:.2f} | Val Acc: {validationAccuracy:.2f}")

    writer.close()

print("\nAll folds complete. You can now launch TensorBoard:")


Starting Fold 0
Fold 0 | Epoch 1 | Train Loss: 0.9528 | Val Loss: 0.3885 | Train Acc: 0.71 | Val Acc: 0.90
Fold 0 | Epoch 2 | Train Loss: 0.6424 | Val Loss: 0.2839 | Train Acc: 0.91 | Val Acc: 0.93
Fold 0 | Epoch 3 | Train Loss: 0.5940 | Val Loss: 0.2381 | Train Acc: 0.93 | Val Acc: 0.95
Fold 0 | Epoch 4 | Train Loss: 0.5635 | Val Loss: 0.2493 | Train Acc: 0.95 | Val Acc: 0.95
Fold 0 | Epoch 5 | Train Loss: 0.5646 | Val Loss: 0.2105 | Train Acc: 0.95 | Val Acc: 0.96
Fold 0 | Epoch 6 | Train Loss: 0.5565 | Val Loss: 0.2474 | Train Acc: 0.95 | Val Acc: 0.96
Fold 0 | Epoch 7 | Train Loss: 0.5451 | Val Loss: 0.2072 | Train Acc: 0.95 | Val Acc: 0.96
Fold 0 | Epoch 8 | Train Loss: 0.5353 | Val Loss: 0.1931 | Train Acc: 0.96 | Val Acc: 0.97
Fold 0 | Epoch 9 | Train Loss: 0.5251 | Val Loss: 0.2186 | Train Acc: 0.96 | Val Acc: 0.96
Fold 0 | Epoch 10 | Train Loss: 0.5243 | Val Loss: 0.2071 | Train Acc: 0.96 | Val Acc: 0.96
Fold 0 | Epoch 11 | Train Loss: 0.5208 | Val Loss: 0.1790 | Train Acc: 0

In [7]:
# Start tensorboard
%load_ext tensorboard
%tensorboard --logdir runs/kfold/

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 6211), started 0:00:08 ago. (Use '!kill 6211' to kill it.)

In [6]:
# Save model to a file
save_path = "/home/rafael/Área de trabalho/Linux/GraphGPSNetwork.pt"
torch.save(model.state_dict(), save_path)

print(f"Model saved to {save_path}")

Model saved to /home/rafael/Área de trabalho/Linux/GraphGPSNetwork.pt


In [7]:
from collections import Counter

# number of labels of each class
labels = [data.y.tolist() for data in dataset]
flat_labels = [item for sublist in labels for item in sublist]
print(Counter(flat_labels))

Counter({0: 152448, 1: 133504, 3: 130224, 2: 7648})
