In [343]:
import numpy as np
import pandas as pd

import networkx as nx

## Setting up graph

In [344]:
node_df = pd.read_csv('/kaggle/input/ue20cs344-nam-assignment-2/node_data.csv')
edge_df = pd.read_csv('/kaggle/input/ue20cs344-nam-assignment-2/edgelist.csv')

# edge_df.head()

In [345]:
node_df.head()

Unnamed: 0,id,branch,train_mask,test_mask
0,0,ECE,True,False
1,1,ECE,True,False
2,2,CSE,True,False
3,3,EEE,True,False
4,4,,False,True


In [346]:
G = nx.Graph()

# Add nodes to the graph
for i, row in node_df.iterrows():
    G.add_node(row['id'], **row.to_dict())

# Add edges to the graph
for i, row in edge_df.iterrows():
    G.add_edge(row['source'], row['target'], **row.to_dict())

In [347]:
print(G)

Graph with 19717 nodes and 44324 edges


## GNN

In [348]:
!pip install torch-geometric

[0m

In [349]:
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv

In [352]:
# Load node features from file
node_features = torch.load('/kaggle/input/ue20cs344-nam-assignment-2/features.pt')

# Convert networkx graph to PyTorch Geometric graph
edge_index = torch.tensor(list(G.edges())).t().contiguous()
node_indices = [node_id for node_id in G.nodes()]
node_feature_list = [node_features[idx] for idx in node_indices]
x = torch.stack(node_feature_list)
y = torch.tensor([1 if G.nodes[node]['branch'] == "CSE" 
                  else 2 if G.nodes[node]['branch'] == "ECE" 
                  else 3 if G.nodes[node]['branch'] == "EEE" 
                  else 0 for node in G.nodes()])
data = Data(x=x, edge_index=edge_index, y=y)


In [None]:
#Define GCN model
class GCN(torch.nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(node_features.shape[1], 16)
        self.conv2 = GCNConv(16, 4)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = self.conv2(x, edge_index)
        return x

# Train GCN model
model = GCN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()
for epoch in range(200):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = criterion(out[train_mask], data.y[train_mask])
    loss.backward()
    optimizer.step()



In [None]:
# Predict on test nodes
model.eval()
out = model(data.x, data.edge_index)
pred = out[test_mask].argmax(dim=1)


## Sorting out the output

In [None]:
tmp = pred.tolist()

In [None]:
result = []
index = []

In [None]:
for i in range(len(tmp)):
    if i in test_indices:
        index.append(i)
        if(tmp[i]==1):
            result.append("CSE")
        elif(tmp[i]==2):
            result.append("ECE")
        else:
            result.append("EEE")

In [None]:
len(index)

In [None]:
df = pd.DataFrame(result, index = index)
df.head()

In [None]:
df.to_csv("Output.csv")