# 1 Prepare Data

## 1.1 Import Library and Load Data

In [1]:
import torch
from torch import nn
import torch.nn.functional as F

In [2]:
# import the lib and load the data

import scipy.sparse as sp # we need to go to the gnn environment to install scipy - conda activate gnn, pip3 install scipy
import numpy as np
import json
adj = sp.load_npz('/Users/xinyun/Desktop/2023Spring/CSE881/data_2023/adj.npz')
feat  = np.load('/Users/xinyun/Desktop/2023Spring/CSE881/data_2023/features.npy')
labels = np.load('/Users/xinyun/Desktop/2023Spring/CSE881/data_2023/labels.npy')
splits = json.load(open('/Users/xinyun/Desktop/2023Spring/CSE881/data_2023/splits.json'))
idx_train, idx_test = splits['idx_train'], splits['idx_test']

In [3]:
feat_train = feat[splits['idx_train']]
feat_train

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [739]:
#from numpy.linalg import matrix_rank
#matrix_rank(feat)

1385

In [752]:
#A=np.corrcoef(feat, rowvar=False)

In [None]:
#for i in range(0,1390):
#    for j in range(0,1390):
#        if (i!=j) & (A[i,j]>0.9):
#            print(i,j,A[i,j])

In [4]:
# ! structure
# load structure 
from torch_geometric.utils import from_scipy_sparse_matrix
edge_index = from_scipy_sparse_matrix(adj)

## 1.2 Done with Structure

In [5]:
# ! structure
edge_index #edges with index pair
edge_index = edge_index[0]
# 1) show what edge_index looks like - a tensor; 2) show the size of the edge index, 2 row, 10100 colums -> 10100 edges 
# for the whole graph
# edge_index,edge_index.shape

## 1.3 Done with Feature

In [6]:
# feat
# ! for our project, we only use the feature of the first 496 nodes/rows 
# ! the reason: the rest is used by TA to test our performance.
# feat.shape 
# shape of feat is 2480 x 1390; there is 2480 nodes; each node has 1390 features. 
feat_tsr = torch.from_numpy(feat) # data type: 2480 x 1390 tensor

In [7]:
# convert all features to float type
feat_tsr = feat_tsr.float()

## 1.4 Done with Labels

In [8]:
 labels.shape
# size of training: 496; we only have labels for training set 
# y = torch.from_numpy(labels)
# labels.max()

(496,)

In [9]:
# labels
# test_size = len(splits['idx_test'])

# I use 9, because we have 7 classes (0:6), hence, any value >= 7 is ok for value of other rows (i.e., test set) 
y = np.array([-1 for _ in range(feat.shape[0])])

# set labels for trainning set 
y[idx_train] = labels

# convert labels as tensor
y_tsr = torch.from_numpy(y)

In [10]:
len(splits['idx_train']), len(splits['idx_test'])
# train VS. test
# splits['idx_train']

(496, 1984)

## 1.5 Create Train and Validation Set 

In [11]:
# create validation set 
validation_split = 0.2 # 80% for validation
train_size = len(labels)

# I split 20% of the traning set for validation. 
mysplit = int(np.floor(validation_split*train_size))

#np.random.shuffle(indices)

# I set train_indices and val_indices
train_indices, val_indices = idx_train[mysplit:], idx_train[:mysplit]


# that is 397 for training and 99 for testing
train_size, mysplit, len(train_indices), len(val_indices), 
#len(y_train), len(y_test)

(496, 99, 397, 99)

## 1.6 Create Tensor for Data

In [12]:
# get a true or false tensor
def getTensorBool(indices):
    result = np.zeros(feat.shape[0], dtype=bool)
    result[indices] = True
    result = torch.from_numpy(result)
    return result

In [13]:
train_tsr = getTensorBool(train_indices)
val_tsr = getTensorBool(val_indices)
#test_tsr = getTensorBool(idx_test)

## 1.7  Create Data 

In [14]:
from torch_geometric.data import Data

# create input
# x: all the attributes for all nodes !!!
# edge_index: graph structure !!!
# y: all the labels 
# ...
data = Data(x=feat_tsr, edge_index = edge_index, y = y_tsr, train_mask = train_tsr, val_mask=val_tsr, test_mask=False)#like a dictionary

data, data.x.dtype, data.y.dtype, data.edge_index.dtype
#data

(Data(x=[2480, 1390], edge_index=[2, 10100], y=[2480], train_mask=[2480], val_mask=[2480], test_mask=False),
 torch.float32,
 torch.int64,
 torch.int64)

# 2 Set our Model

a model considering both network structure and features 

## 2.1 Main Model

In [63]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GraphConv, SAGEConv, APPNP

class GCN(torch.nn.Module):
    def __init__(self, num_node_features, num_hidden, num_classes):
        super().__init__()
        
        # structure
        self.conv1 = GCNConv(num_node_features, num_hidden) # two-layer GCN
        self.conv2 = GCNConv(num_hidden, num_classes)
        
        # optional 
        #self.conv1 = APPNP(50, 0.1)
        #self.conv2 = APPNP(20, 0.2)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index # feature and structure
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x,training=self.training) # randomly dropout some features.
        x = self.conv2(x, edge_index)
        #x = F.dropout(x, p=0.5, training=self.training)
        
        return F.log_softmax(x, dim=1)#calculate loss.

## 2.2 Optional Models

In [129]:
class MLP(nn.Module):
    def __init__(self, in_features, hidden_features, out_features):
        super(MLP, self).__init__()
        self.in_features = in_features
        self.layer1 = nn.Linear(in_features, hidden_features) # nn.Linear is a class in PyTorch
        self.layer2 = nn.Linear(hidden_features, out_features)
        
    def forward(self, x):
        #x = x.view(-1, self.in_features)
        x = self.layer1(x)
        x = F.relu(x)
        return self.layer2(x)

In [130]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GraphConv, SAGEConv, APPNP

class GCN(torch.nn.Module):
    def __init__(self, num_node_features, num_hidden, num_classes):
        super().__init__()
        
        # optional       
        self.MLP = MLP(num_node_features, num_hidden, num_classes)
        self.conv = APPNP(5,0.2) # k: 5, 10, 20, alpha: 0, 0.1, 0.2, 0.3
        # k: 5; alpha: 0.2
    def forward(self, data):
        x, edge_index = data.x, data.edge_index # feature and structure
        # appnp
        x = self.MLP(x)
        x = self.conv(x, edge_index) # appnp
        return F.log_softmax(x, dim=1)#calculate loss.

In [819]:
class GCN1(torch.nn.Module):
    def __init__(self, num_node_features, num_hidden, num_classes):
        super().__init__()
        
        # structure
        self.conv1 = GraphConv(num_node_features, num_hidden) # two-layer GCN
        self.conv2 = GraphConv(num_hidden, num_classes)
        

    def forward(self, data):
        x, edge_index = data.x, data.edge_index # feature and structure
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x,training=self.training) # randomly dropout some features.
        x = self.conv2(x, edge_index)
        #x = F.dropout(x, p=0.5, training=self.training)
        
        return F.log_softmax(x, dim=1)#calculate loss.

In [1057]:
class GCN2(torch.nn.Module):
    def __init__(self, num_node_features, num_hidden, num_classes):
        super().__init__()
        
        # structure
        self.conv1 = SAGEConv(num_node_features, num_hidden) # two-layer GCN
        self.conv2 = SAGEConv(num_hidden, num_classes)
        

    def forward(self, data):
        x, edge_index = data.x, data.edge_index # feature and structure
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x,training=self.training) # randomly dropout some features.
        x = self.conv2(x, edge_index)
        #x = F.dropout(x, p=0.5, training=self.training)
        
        return F.log_softmax(x, dim=1)#calculate loss.

## 2.3 Train Our Model 

##### Hyperparameters we tuned: 

1) loss rate, 2) weight decay, and 3) num_hidden

##### After tuning, we decide to choose:

1) 0.01, 2) 0.001, and 3) 64

In [54]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = data.to(device)

# setting the model
model = GCN(num_node_features=data.x.shape[1], 
            num_hidden=64, #46?
            num_classes=(data.y.max()+1).item() #label
           ).to(device)

# set learning rate 
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-3)
# ...
# training the model
model.train() # train model
for epoch in range(200):
    optimizer.zero_grad() #clear the gradient 
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        acc = (out.argmax(axis=1)[data.val_mask].eq(data.y[data.val_mask]).sum().item())/(data.val_mask.sum().item())
        print('Epoch {0}: {1}'.format(epoch, loss.item()), 'ACC:', acc)
        

Epoch 0: 1.9260088205337524 ACC: 0.2828282828282828
Epoch 10: 0.4641801416873932 ACC: 0.797979797979798
Epoch 20: 0.06438255310058594 ACC: 0.7878787878787878
Epoch 30: 0.035822365432977676 ACC: 0.7878787878787878
Epoch 40: 0.046079929918050766 ACC: 0.8181818181818182
Epoch 50: 0.04308245703577995 ACC: 0.8282828282828283
Epoch 60: 0.03629332780838013 ACC: 0.8181818181818182
Epoch 70: 0.03363652899861336 ACC: 0.8181818181818182
Epoch 80: 0.03139854967594147 ACC: 0.8181818181818182
Epoch 90: 0.02937909960746765 ACC: 0.8080808080808081
Epoch 100: 0.027875317260622978 ACC: 0.8080808080808081
Epoch 110: 0.026622600853443146 ACC: 0.8080808080808081
Epoch 120: 0.025604259222745895 ACC: 0.8080808080808081
Epoch 130: 0.024743717163801193 ACC: 0.8080808080808081
Epoch 140: 0.024038663133978844 ACC: 0.8080808080808081
Epoch 150: 0.023449977859854698 ACC: 0.8080808080808081
Epoch 160: 0.022945530712604523 ACC: 0.8080808080808081
Epoch 170: 0.02251851186156273 ACC: 0.8080808080808081
Epoch 180: 0.02

### 2.3.1 Evaluate our Model Using Validation Set

In [24]:
model.eval()
# evaluate validation group
pred = model(data).argmax(dim=1)
correct = (pred[data.val_mask] == data.y[data.val_mask]).sum()
acc = int(correct) / int(data.val_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.7980


## 2.4 Prepare for Submission

In [146]:
preds = pred[idx_test]
np.savetxt('submission0.txt', preds, fmt='%d')

In [147]:
preds

tensor([6, 2, 6,  ..., 1, 1, 5])

In [148]:
for i in preds.numpy():
    print(i)

6
2
6
1
0
1
2
1
0
6
4
2
2
1
0
2
3
2
0
6
1
0
0
6
1
1
2
0
0
0
2
6
0
1
4
1
1
0
3
6
1
1
3
4
2
0
2
3
1
6
0
3
5
5
5
3
1
4
1
2
6
6
6
6
4
1
6
5
5
1
5
2
4
0
1
4
0
3
6
0
1
2
3
2
6
3
6
6
1
1
2
1
0
1
6
2
6
4
6
1
2
6
1
5
2
6
4
2
2
6
0
2
6
0
5
3
2
2
0
1
3
2
0
6
4
2
6
2
0
5
1
0
5
3
0
2
4
2
2
6
6
1
2
0
4
6
1
2
6
1
2
3
6
2
2
2
3
2
3
2
4
0
1
3
6
6
1
6
3
1
1
2
6
1
1
5
6
3
5
0
4
4
1
1
2
3
0
5
6
0
1
2
2
2
2
0
6
0
5
2
0
2
2
0
4
2
1
0
1
3
6
0
1
2
2
3
5
1
2
1
0
2
1
3
6
3
2
6
4
1
2
3
6
2
2
1
6
2
1
6
1
0
5
1
3
2
1
2
6
1
0
6
2
2
2
1
6
2
6
6
3
0
2
4
2
0
2
2
2
3
1
2
6
2
5
6
3
2
3
2
2
4
3
4
1
4
0
3
0
2
2
2
2
2
3
2
4
2
4
6
3
2
1
5
6
0
6
2
3
5
2
6
2
6
1
3
2
6
4
6
3
3
2
2
6
3
5
6
3
6
2
6
2
3
2
6
6
3
3
4
4
1
4
6
2
3
0
2
0
5
1
2
0
6
6
1
1
6
1
0
2
1
6
3
3
3
2
3
6
3
6
2
2
6
6
1
3
6
3
6
2
6
5
2
6
1
1
2
6
3
6
2
2
6
3
6
0
6
1
3
2
2
2
1
3
1
4
2
1
6
4
6
6
6
0
6
4
2
4
6
3
2
2
1
2
4
6
6
6
6
3
6
3
3
2
6
2
1
0
5
2
4
1
6
6
2
2
0
0
2
2
2
0
1
3
2
2
6
0
1
2
0
6
0
2
6
3
2
4
1
2
4
3
2
2
2
6
0
1
2
2
5
2
3
2
6
1
3
3
1
1
5
3
2
5
3
6
6
2
6


# 3 Improvement

## 3.1 Emsemble Methods

In [20]:
num_models = 5
models = []
for i in range(num_models):
    model = GCN(num_node_features=data.x.shape[1], 
            num_hidden=64, 
            num_classes=(data.y.max()+1).item() #label
           ).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-3)
    model.train()
    for epoch in range(101):
            optimizer.zero_grad()
            out = model(data)
            loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
            loss.backward()
            optimizer.step()
    models.append(model)

In [102]:
for model in models:
    model.eval()
    # evaluate validation group
    pred = model(data).argmax(dim=1)
    correct = (pred[data.val_mask] == data.y[data.val_mask]).sum()
    acc = int(correct) / int(data.val_mask.sum())
    print(f'Validation Set Accuracy: {acc:.4f}')
    pred = model(data).argmax(dim=1)
    correct = (pred[data.train_mask] == data.y[data.train_mask]).sum()
    acc = int(correct) / int(data.train_mask.sum())
    print(f'Training Set Accuracy: {acc:.4f}')

Validation Set Accuracy: 0.9697
Training Set Accuracy: 0.9521
Validation Set Accuracy: 0.9697
Training Set Accuracy: 0.9496
Validation Set Accuracy: 0.9697
Training Set Accuracy: 0.9547
Validation Set Accuracy: 0.9697
Training Set Accuracy: 0.9521
Validation Set Accuracy: 0.9697
Training Set Accuracy: 0.9496
Validation Set Accuracy: 0.9697
Training Set Accuracy: 0.9496
Validation Set Accuracy: 0.9697
Training Set Accuracy: 0.9496
Validation Set Accuracy: 0.9697
Training Set Accuracy: 0.9496
Validation Set Accuracy: 0.9697
Training Set Accuracy: 0.9496
Validation Set Accuracy: 0.9697
Training Set Accuracy: 0.9496


In [133]:
# evaluate per model
model_predictions = []
for model in models:
    model.eval()
    # evaluate validation group
    pred = model(data).argmax(dim=1)
    result = pred[data.val_mask]
    print(result)
    model_predictions.append(result.cpu().numpy())
    #print((torch.from_numpy(result.numpy())==data.y[data.val_mask]).sum())
    correct = (pred[data.val_mask] == data.y[data.val_mask]).sum()
    acc = int(correct) / int(data.val_mask.sum())
    print(f'Accuracy: {acc:.4f}')

tensor([1, 0, 2, 4, 4, 6, 0, 6, 1, 3, 2, 2, 1, 3, 3, 0, 2, 3, 5, 0, 4, 1, 2, 5,
        1, 5, 1, 1, 1, 2, 2, 3, 3, 3, 1, 3, 2, 2, 6, 2, 2, 2, 4, 2, 2, 6, 1, 2,
        2, 2, 3, 4, 0, 2, 5, 2, 1, 6, 2, 2, 6, 2, 6, 4, 4, 4, 1, 3, 6, 0, 6, 3,
        0, 0, 6, 6, 0, 2, 2, 3, 3, 6, 5, 1, 1, 1, 1, 2, 3, 3, 3, 0, 3, 1, 4, 2,
        0, 1, 1])
Accuracy: 0.9899
tensor([1, 0, 2, 4, 4, 6, 0, 6, 1, 3, 2, 2, 1, 3, 3, 0, 2, 3, 5, 0, 4, 1, 2, 5,
        1, 5, 1, 1, 1, 2, 2, 3, 3, 3, 1, 3, 2, 2, 6, 2, 2, 2, 4, 2, 2, 6, 1, 2,
        2, 2, 3, 4, 0, 2, 5, 2, 1, 6, 2, 2, 6, 2, 6, 4, 4, 4, 1, 3, 6, 0, 6, 3,
        0, 0, 6, 6, 0, 2, 2, 3, 3, 6, 5, 1, 1, 1, 1, 2, 3, 3, 3, 0, 3, 1, 4, 2,
        0, 1, 1])
Accuracy: 0.9899
tensor([1, 0, 2, 4, 4, 6, 0, 6, 1, 3, 2, 2, 1, 3, 3, 0, 2, 3, 5, 0, 4, 1, 2, 5,
        1, 5, 1, 1, 1, 2, 2, 3, 3, 3, 1, 3, 2, 2, 6, 2, 2, 2, 4, 2, 2, 6, 1, 2,
        2, 2, 3, 4, 0, 2, 5, 2, 1, 6, 2, 2, 6, 2, 6, 4, 4, 4, 1, 3, 6, 0, 6, 3,
        0, 0, 6, 6, 0, 2, 2, 3, 3, 6, 5, 1, 1, 1, 

In [134]:
# esemble the result
from scipy.stats import mode
result = mode(np.stack(model_predictions), axis=0)[0][0]
acc = int((torch.from_numpy(result)==data.y[data.val_mask]).sum())/ int(data.val_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.9899


  result = mode(np.stack(model_predictions), axis=0)[0][0]


In [31]:
result

array([1, 0, 2, 4, 4, 6, 0, 6, 1, 3, 2, 2, 1, 3, 2, 0, 2, 3, 5, 0, 4, 1,
       2, 5, 1, 5, 1, 1, 1, 2, 2, 3, 3, 3, 1, 3, 2, 2, 6, 2, 2, 2, 4, 2,
       2, 6, 1, 2, 2, 2, 3, 4, 0, 2, 5, 2, 1, 6, 2, 2, 6, 2, 6, 4, 4, 4,
       1, 3, 6, 0, 6, 3, 0, 0, 6, 6, 0, 2, 2, 3, 3, 6, 5, 1, 1, 1, 1, 2,
       3, 3, 3, 0, 3, 1, 4, 2, 0, 1, 1])

## Result from Ensemble Method + Cross Validation

In [135]:
model_predictions = []
for model in models:
    model.eval()
    # evaluate validation group
    pred = model(data).argmax(dim=1)
    result = pred[idx_test]
    print(result)
    model_predictions.append(result.cpu().numpy())
    #print((torch.from_numpy(result.numpy())==data.y[data.val_mask]).sum())
    #correct = (pred[data.val_mask] == data.y[data.val_mask]).sum()
    #acc = int(correct) / int(data.val_mask.sum())
    #print(f'Accuracy: {acc:.4f}')

tensor([6, 2, 6,  ..., 1, 1, 5])
tensor([6, 2, 6,  ..., 1, 1, 5])
tensor([6, 2, 6,  ..., 1, 1, 5])
tensor([6, 2, 6,  ..., 1, 1, 5])
tensor([6, 2, 6,  ..., 1, 1, 5])
tensor([6, 2, 6,  ..., 1, 1, 5])
tensor([6, 2, 6,  ..., 1, 1, 5])
tensor([6, 2, 6,  ..., 1, 1, 5])
tensor([6, 2, 6,  ..., 1, 1, 5])
tensor([6, 2, 6,  ..., 1, 1, 5])
tensor([6, 2, 6,  ..., 1, 1, 5])


In [136]:
result = mode(np.stack(model_predictions), axis=0)[0][0]
#(torch.from_numpy(result)==data.y[data.val_mask]).sum()

  result = mode(np.stack(model_predictions), axis=0)[0][0]


In [137]:
for i in result:
    print(i)

6
2
6
1
0
1
2
1
0
6
4
2
2
1
0
2
3
2
0
3
1
0
0
6
1
1
2
0
0
0
6
6
0
1
4
1
1
0
3
6
1
1
3
4
2
0
2
3
1
6
0
3
0
5
5
3
1
4
1
2
6
6
6
6
4
1
6
5
5
1
6
2
4
0
1
4
0
3
6
0
1
2
3
2
6
3
6
6
1
1
2
1
0
1
6
2
6
1
6
1
2
6
1
5
2
3
4
2
2
3
0
2
6
0
5
3
2
2
0
1
3
2
0
6
2
2
6
2
0
0
1
0
5
3
0
2
4
2
2
2
2
1
2
0
4
6
1
2
6
0
2
3
2
2
2
2
3
2
3
2
4
0
1
3
6
6
1
0
3
1
1
2
6
1
1
0
6
3
6
0
4
4
1
1
2
2
0
2
6
0
1
2
0
2
2
0
6
0
0
2
0
2
2
0
4
2
1
0
1
2
6
0
1
2
2
3
5
1
2
1
0
2
1
3
6
3
2
2
4
1
2
3
6
2
2
1
6
2
1
6
1
0
5
1
3
2
1
2
6
1
0
6
2
2
2
1
6
2
6
0
2
0
2
4
3
0
2
2
2
2
1
2
6
2
5
6
3
2
3
2
2
4
3
4
1
4
0
3
0
2
2
4
2
2
3
2
4
2
4
6
2
2
1
5
0
0
6
2
3
5
2
6
2
2
1
3
2
6
4
6
3
3
2
2
6
3
5
6
3
6
2
6
2
3
2
6
6
6
3
4
4
2
4
0
2
3
0
2
0
0
3
2
0
6
6
1
1
1
1
0
4
1
6
3
3
3
2
3
6
3
6
2
2
6
6
1
3
6
3
6
2
6
5
2
6
1
1
2
6
2
6
2
2
2
3
2
0
6
1
3
2
2
2
2
3
1
4
2
1
6
4
6
6
6
0
2
4
2
4
2
3
1
2
1
2
4
6
6
6
6
3
6
3
3
2
0
2
1
0
5
2
4
1
2
6
2
4
0
0
2
2
2
0
1
3
2
2
6
0
1
3
0
6
0
2
6
3
2
4
1
2
4
3
2
2
2
6
0
1
2
2
0
2
3
2
6
1
2
3
0
0
5
3
2
5
3
6
6
3
6


In [138]:
result
np.savetxt('submission999.txt', result, fmt='%d')

## 3.2 Cross Validation + Hyperparameter Tuning 

In [64]:
from sklearn.model_selection import KFold
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

In [105]:
def k_cv(lr, wdecay, num_hidden, data, train_indices_):
    avg = 0
    num_models = 5
    models = []
    for fold, (train_indices, val_indices) in enumerate(kfold.split(train_indices_)):  
    # setting the model
        model = GCN(num_node_features=data.x.shape[1], 
            num_hidden=num_hidden, #46?
            num_classes=7 #label
           ).to(device)
        data_k = Data(x=feat_tsr, 
                edge_index = edge_index, 
                y = y_tsr, 
                train_mask = getTensorBool(np.array(splits['idx_train'])[train_indices]), 
                val_mask = getTensorBool(np.array(splits['idx_train'])[val_indices]), test_mask=False)#like a dictionary
        device_k = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        data_k = data_k.to(device_k)
        # set learning rate 
        optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wdecay)
        for epoch in range(50):
            optimizer.zero_grad() #clear the gradient 
            out = model(data_k)
            loss = F.nll_loss(out[data_k.train_mask], data.y[data_k.train_mask])
            loss.backward() #calculate the gradients
            optimizer.step() # the step() is to update the param
    
            # evaluate validation group
            model.eval()
            pred = model(data).argmax(dim=1)
            correct = (pred[data.val_mask] == data.y[data.val_mask]).sum()
            acc = int(correct) / int(data.val_mask.sum())
            avg+=acc
            if epoch % 10 == 0:
                print('Epoch {0}: {1}'.format(epoch, loss.item()))
                print(f'Accuracy per 10 Epoch: {acc:.4f}')
        #print(model)
        models.append(model)
                
                
            
    avg = avg/250
    print(f'Accuracy: {avg:.4f}', lr, wdecay, num_hidden)
    return models


In [None]:
models = k_cv(0.01, 0.001, 128, data, train_indices)

In [None]:
for lr in [1e-2, 1e-3]:
    for wd in [1e-3, 1e-4, 5e-4]:
        for num_hidden in [16,32,64,128] :
            k_cv(lr, wd, num_hidden,data, train_indices)
#Accuracy: 0.8804 0.01 0.001 16
#Accuracy: 0.9102 0.01 0.001 32
#Accuracy: 0.9273 0.01 0.001 64             - good!
#Accuracy: 0.9293 0.01 0.001 128            - good!
#Accuracy: 0.8890 0.01 0.0001 16
#Accuracy: 0.9121 0.01 0.0001 32             
#Accuracy: 0.9257 0.01 0.0001 64             
#Accuracy: 0.9346 0.01 0.0001 128            - this is good
#Accuracy: 0.8768 0.01 0.0005 16
#Accuracy: 0.9114 0.01 0.0005 32
#Accuracy: 0.9259 0.01 0.0005 64
#Accuracy: 0.9304 0.01 0.0005 128
#Accuracy: 0.6095 0.001 0.001 16
#Accuracy: 0.6705 0.001 0.001 32
#Accuracy: 0.7539 0.001 0.001 64
#Accuracy: 0.7971 0.001 0.001 128
#Accuracy: 0.5186 0.001 0.0001 16
#Accuracy: 0.7233 0.001 0.0001 32
#Accuracy: 0.7406 0.001 0.0001 64
#Accuracy: 0.7904 0.001 0.0001 128
#Accuracy: 0.5942 0.001 0.0005 16
#Accuracy: 0.7038 0.001 0.0005 32
#Accuracy: 0.7662 0.001 0.0005 64
#Accuracy: 0.8017 0.001 0.0005 128

## Cross validation + Ensemble  (Version 2)

In [131]:
def k_cv(lr, wdecay, num_hidden, data, train_indices_):
    avg = 0
    num_models = 5
    model = GCN(num_node_features=data.x.shape[1], 
            num_hidden=num_hidden, #46?
            num_classes=7 #label
           ).to(device)
    max_acc = 0
    model_opt = None
    for fold, (train_indices, val_indices) in enumerate(kfold.split(train_indices_)):  
    # setting the model
        data_k = Data(x=feat_tsr, 
                edge_index = edge_index, 
                y = y_tsr, 
                train_mask = getTensorBool(np.array(splits['idx_train'])[train_indices]), 
                val_mask = getTensorBool(np.array(splits['idx_train'])[val_indices]), test_mask=False)#like a dictionary
        device_k = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        data_k = data_k.to(device_k)
        # set learning rate 
        optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wdecay)
        for epoch in range(50):
            optimizer.zero_grad() #clear the gradient 
            out = model(data_k)
            loss = F.nll_loss(out[data_k.train_mask], data.y[data_k.train_mask])
            loss.backward() #calculate the gradients
            optimizer.step() # the step() is to update the param
    
            # evaluate validation group
            model.eval()
            pred = model(data).argmax(dim=1)
            correct = (pred[data.val_mask] == data.y[data.val_mask]).sum()
            acc = int(correct) / int(data.val_mask.sum())
            avg+=acc
            if(acc>max_acc): 
                model_opt = model
                max_acc = acc
            #if epoch % 10 == 0:
                #print('Epoch {0}: {1}'.format(epoch, loss.item()))
                #print(f'Accuracy per 10 Epoch: {acc:.4f}')
        #print(model)
        #models.append(model)
                
                
            
    avg = avg/250
    print(f'Accuracy: {avg:.4f}', lr, wdecay, num_hidden)
    return model_opt


In [132]:
models = []
for i in range(1,12):
    model = k_cv(0.01, 0.001, 128, data, train_indices)
    print(model)
    models.append(model)

Accuracy: 0.9581 0.01 0.001 128
GCN(
  (MLP): MLP(
    (layer1): Linear(in_features=1390, out_features=128, bias=True)
    (layer2): Linear(in_features=128, out_features=7, bias=True)
  )
  (conv): APPNP(K=5, alpha=0.2)
)
Accuracy: 0.9604 0.01 0.001 128
GCN(
  (MLP): MLP(
    (layer1): Linear(in_features=1390, out_features=128, bias=True)
    (layer2): Linear(in_features=128, out_features=7, bias=True)
  )
  (conv): APPNP(K=5, alpha=0.2)
)
Accuracy: 0.9602 0.01 0.001 128
GCN(
  (MLP): MLP(
    (layer1): Linear(in_features=1390, out_features=128, bias=True)
    (layer2): Linear(in_features=128, out_features=7, bias=True)
  )
  (conv): APPNP(K=5, alpha=0.2)
)
Accuracy: 0.9594 0.01 0.001 128
GCN(
  (MLP): MLP(
    (layer1): Linear(in_features=1390, out_features=128, bias=True)
    (layer2): Linear(in_features=128, out_features=7, bias=True)
  )
  (conv): APPNP(K=5, alpha=0.2)
)
Accuracy: 0.9592 0.01 0.001 128
GCN(
  (MLP): MLP(
    (layer1): Linear(in_features=1390, out_features=128, bias

## Others

In [470]:
model = GCN(num_node_features=data.x.shape[1], 
            num_hidden=16, #46?
            num_classes=(data.y.max()+1).item() #label
           ).to(device)
def train(model, data, num_epochs=30, learning_rate=1e-2, weight_decay=1e-5):
    # define an optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate, 
                                 weight_decay=weight_decay) # weight_decay is the L2 Regularization
    
    # define loss function
    criterion = nn.CrossEntropyLoss()
    
    for epoch in range(num_epochs):
        optimizer.zero_grad() #clear the gradient 
            
        out = model(data) #foward
        loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward() #calculate the gradients
            
        optimizer.step() # the step() is to update the param
        if epoch % 10 == 0:
            print('Epoch {0}: {1}'.format(epoch, loss.item()))
            model.eval()
            # evaluate validation group
            pred = model(data).argmax(dim=1)
            correct = (pred[data.val_mask] == data.y[data.val_mask]).sum()
            acc = int(correct) / int(data.val_mask.sum())
            print(f'Accuracy: {acc:.4f}')
    

In [None]:
for nepoch in range(4,10):
    print(nepoch)
    train(model, data_, 2**nepoch)

In [471]:
train(model, data)

Epoch 0: 2.3089566230773926
Accuracy: 0.3145
Epoch 10: 1.1150527000427246
Accuracy: 0.7903
Epoch 20: 0.42526018619537354
Accuracy: 0.8548


# evaluation

In [1253]:
model.eval()
# evaluate validation group
pred = model(data).argmax(dim=1)
correct = (pred[data.val_mask] == data.y[data.val_mask]).sum()
acc = int(correct) / int(data.val_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.8687


In [1254]:
# evaluate training group
pred = model(data).argmax(dim=1)
correct = (pred[data.train_mask] == data.y[data.train_mask]).sum()
acc = int(correct) / int(data.train_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 1.0000


In [1085]:
pred = model(data).argmax(dim=1)
correct = (pred[data_.train_mask] == data_.y[data_.train_mask]).sum()
acc = int(correct) / int(data_.train_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.2015


In [1105]:
model.eval()
pred = model(data_).argmax(dim=1)
correct = (pred[data_.test_mask] == data_.y[data_.test_mask]).sum()
acc = int(correct) / int(data_.test_mask.sum())
print(f'Accuracy: {acc:.4f}')

Accuracy: 0.0000


In [30]:
pred[data.test_mask]

tensor([5, 2, 0,  ..., 1, 2, 6])

# check my new result with old

In [199]:
preds = pred[idx_test]
print(preds)
data.test_mask.shape
np.savetxt('submission.txt', preds, fmt='%d')

tensor([6, 2, 6,  ..., 1, 1, 5])


In [1015]:
result = mode(np.stack(model_predictions), axis=0)[0][0]
result = torch.from_numpy(result)
print(result)
np.savetxt('submission5.txt', result, fmt='%d')

tensor([5, 2, 0,  ..., 1, 2, 6])


  result = mode(np.stack(model_predictions), axis=0)[0][0]


In [200]:
today = np.loadtxt('/Users/xinyun/Desktop/2023Spring/CSE881/submission.txt')

In [201]:
today

array([6., 2., 6., ..., 1., 1., 5.])

In [1156]:
last = np.loadtxt('/Users/xinyun/Desktop/2023Spring/CSE881/submission7.txt')

In [852]:
last_ = np.loadtxt('/Users/xinyun/Desktop/2023Spring/CSE881/submission2.txt')

In [203]:
old = np.loadtxt('/Users/xinyun/Downloads/submission.txt')

In [204]:
old

array([5., 2., 0., ..., 1., 2., 6.])

In [205]:
np.count_nonzero(today==old)/old.size


0.17893145161290322

In [1157]:
np.count_nonzero(last==today)/old.size

1.0

In [602]:
params_to_optimize = {
    'num_hidden': [32, 64, 128],
    'lr': [0.001, 0.01, 0.1],
}

In [604]:
from sklearn.model_selection import KFold, GridSearchCV
grid_search = GridSearchCV(model, params_to_optimize, cv=kfold, scoring='accuracy')