# GPU-Accelerated GNN

## Install dependencies

In [None]:
%pip install torch

## Declare dependencies

In [None]:
import torch
import time
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric.datasets as Planetoid
import torch_geometric.nn as GCNConv


## Check for CUDA

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


## Define the Baseline GCN Model

In [None]:
class GCN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

## Load Dataset

In [None]:
# Load the Cora dataset
dataset = Planetoid(root='/tmp/Cora', name='Cora')
data = dataset[0].to(device)

print(f'Dataset: {dataset}')
print(f'Number of classes: {dataset.num_classes}')
print(f'Number of node features: {dataset.num_node_features}')

## Train the Baseline GCN Model

In [None]:
# Initialize the model
model = GCN(input_dim=dataset.num_node_features, hidden_dim=16, output_dim=dataset.num_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

def train():
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

for epoch in range(201):
    loss = train()
    if epoch % 20 == 0:
        print(f'Epoch {epoch}, Loss: {loss:.4f}')


## Evaluate the Baseline GCN Model

In [None]:
def test():
    model.eval()
    out = model(data)
    pred = out.argmax(dim=1)
    
    correct = pred[data.test_mask] == data.y[data.test_mask]
    acc = int(correct.sum()) / int(data.test_mask.sum())
    return acc

accuracy = test()
print(f'Test Accuracy: {accuracy:.4f}')

## CUDA Kernel Optimization

In [None]:
# Placeholder for custom CUDA operations
def custom_cuda_operation():
    # Custom CUDA kernel will be implemented here
    pass

print("Placeholder for custom CUDA kernel.")

## Profiling Inference Time

In [None]:
def profile_model():
    model.eval()
    start_time = time.time()
    out = model(data)
    end_time = time.time()
    print(f'Inference time: {end_time - start_time:.6f} seconds')

profile_model()