In [134]:
import os
import torch
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from sklearn.preprocessing import LabelEncoder
import numpy as np

In [135]:
# Define the root directory containing your dataset
root_dir = "D:/Projects/ML/GCN/Dataset_GCN_classifier_mobilenetv3"

In [136]:
def label_mapping(label):
    return label

In [137]:
# Helper function to load data
def load_data(folder):
    data_list = []
    for filename in os.listdir(folder):
        if filename.endswith(".txt"):
            filepath = os.path.join(folder, filename)
            with open(filepath, 'r') as f:
                lines = f.readlines()
                coords = []
                features = []
                labels = []
                for line in lines:
                    parts = line.strip().split()
                    label = int(parts[0])
                    x1, y1, x2, y2 = map(int, parts[1:5])
                    feature_vector = list(map(float, parts[5:]))
                    coords.append([x1, y1, x2, y2])
                    features.append(feature_vector)
                    labels.append(label)
                
                coords = np.array(coords)
                features = np.array(features)
                
        # Compute edges based on distance threshold
        edges = []
        distances = []
        # mean_length = np.mean(np.linalg.norm(coords[:, :2] - coords[:, 2:]))
        mean_length = 10000
        
        for i in range(len(coords)):
            for j in range(i+1, len(coords)):
                distance = np.linalg.norm(
                    (coords[i][:2] + coords[i][2:]) / 2 - 
                    (coords[j][:2] + coords[j][2:]) / 2)
                distances.append(distance)
        
        if len(distances) == 0:
            continue
        mean_distance = sum(distances) / len(distances)
        
        for i in range(len(coords)):
            for j in range(i+1, len(coords)):
                distance = np.linalg.norm(
                    (coords[i][:2] + coords[i][2:]) / 2 - 
                    (coords[j][:2] + coords[j][2:]) / 2)
                if distance < mean_distance:
                    edges.append([i, j])
                    
        edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
        x = torch.tensor(features, dtype=torch.float)
        y = torch.tensor(labels, dtype=torch.long)
        
        data = Data(x=x, edge_index=edge_index, y=y)
        data_list.append(data)
    return data_list

In [138]:
# Load datasets
valid_data = load_data(os.path.join(root_dir, 'valid'))
print("valid.OK")
test_data = load_data(os.path.join(root_dir, 'test'))
print("test.OK")
train_data = load_data(os.path.join(root_dir, 'train'))
print("train.OK")

valid.OK
test.OK
train.OK


In [139]:
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=32, shuffle=False)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

Define the model

In [140]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool

In [141]:
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

In [142]:
model = GCN(in_channels=256, hidden_channels=82, out_channels=4)

Train

In [143]:
import torch.optim as optim

In [144]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

In [145]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)
criterion = torch.nn.CrossEntropyLoss()

In [146]:
def train():
    model.train()
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()

In [147]:
def test(loader):
    model.eval()
    correct = 0
    for data in loader:
        data = data.to(device)
        out = model(data)
        pred = out.argmax(dim=1)
        correct += (pred == data.y).sum().item()
    return correct / len(loader.dataset)

In [148]:
for epoch in range(1, 500):
    train()
    train_acc = test(train_loader)
    val_acc = test(valid_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.6f}, '
          f'Val Acc: {val_acc:.6f}')
    torch.save(model.state_dict(), os.path.join(
        "D:/Projects/ML/GCN/Framework/cuda_tree_model/gcn_model_v8", 
        f'gcn_epoch_{epoch}.pth'))


Epoch: 001, Train Acc: 24.989309, Val Acc: 25.573333
Epoch: 002, Train Acc: 42.435855, Val Acc: 40.726667
Epoch: 003, Train Acc: 49.762336, Val Acc: 48.506667
Epoch: 004, Train Acc: 51.881579, Val Acc: 51.206667
Epoch: 005, Train Acc: 52.567434, Val Acc: 51.913333
Epoch: 006, Train Acc: 52.934211, Val Acc: 52.300000
Epoch: 007, Train Acc: 53.174342, Val Acc: 52.813333
Epoch: 008, Train Acc: 53.363487, Val Acc: 53.040000
Epoch: 009, Train Acc: 53.473684, Val Acc: 53.373333
Epoch: 010, Train Acc: 53.566612, Val Acc: 53.660000
Epoch: 011, Train Acc: 53.639803, Val Acc: 53.873333
Epoch: 012, Train Acc: 53.708882, Val Acc: 53.960000
Epoch: 013, Train Acc: 53.800164, Val Acc: 53.993333
Epoch: 014, Train Acc: 53.847862, Val Acc: 54.060000
Epoch: 015, Train Acc: 53.958882, Val Acc: 54.173333
Epoch: 016, Train Acc: 54.039474, Val Acc: 54.226667
Epoch: 017, Train Acc: 54.097862, Val Acc: 54.213333
Epoch: 018, Train Acc: 54.190789, Val Acc: 54.266667
Epoch: 019, Train Acc: 54.202303, Val Acc: 54.

KeyboardInterrupt: 