In [None]:
pip install pyg

In [None]:
pip install torch_geometric

In [None]:
pip install torch_sparse

In [None]:
pip install torch_scatter

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
from torch import nn
from torch.nn import Linear
from torch_geometric.data import Data
from torch_geometric.data import DataLoader
from torch_geometric.nn import GCNConv, ChebConv
from torch_geometric.nn import global_max_pool

def get_dataset(save_path):
    '''
    read data from .npy file 
    no need to modify this function
    '''
    raw_data = np.load(save_path, allow_pickle=True)
    dataset = []
    for i, (node_f, edge_index, edge_attr, y)in enumerate(raw_data):
        sample = Data(
            x=torch.tensor(node_f, dtype=torch.float),
            y=torch.tensor([y], dtype=torch.float),
            edge_index=torch.tensor(edge_index, dtype=torch.long),
            edge_attr=torch.tensor(edge_attr, dtype=torch.float)
        )
        dataset.append(sample)
    return dataset


class GraphNet(nn.Module):
    '''
    Graph Neural Network class
    '''
    def __init__(self, n_features):
        '''
        n_features: number of features from dataset, should be 37
        '''
        super(GraphNet, self).__init__()
        # define your GNN model here
        self.embed = nn.Linear(n_features, 512)
        self.conv1 = GCNConv(512, 512, normalize=True)
        self.conv2 = GCNConv(512, 512, normalize=True)
        self.out_layer = nn.Linear(512, 1)
        
    def forward(self, data):#x
        # define the forward pass here
        x, edge_index = data.x, data.edge_index
        x = self.embed(x.type(torch.FloatTensor))
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = global_max_pool(x, data.batch)
        
        return self.out_layer(x)
    
    

def main():
    # load data and build the data loader
    train_set = get_dataset('train_set.npy')
    test_set = get_dataset('test_set.npy')
    train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

    # number of features in the dataset
    # no need to change the value
    n_features = 37

    # build your GNN model
    model = GraphNet(n_features)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    # define your loss and optimizer
    # loss_func = ...
    # optimizer = ...
    
    loss_func = torch.nn.MSELoss()
    print(model)

    hist = {"train_loss":[], "test_loss":[]}
    num_epoch = 300
    for epoch in range(1, 1+num_epoch):
        model.train()
        loss_all = 0
        for i,data in enumerate(train_loader):
            # your codes for training the model
            # ...
            optimizer.zero_grad()
            pred = model(data)
            label = data.y[:]
            loss = F.mse_loss(pred.squeeze(), label)
            loss.backward()
            optimizer.step()
            loss_all += loss.item() * data.num_graphs * len(data)
        train_loss = loss_all / len(train_set)

        with torch.no_grad():
            loss_all = 0
            for i,data in enumerate(test_loader):
                # your codes for validation on test set
                # ...
                optimizer.zero_grad()
                pred = model(data)
                label = data.y[:]
                loss = F.mse_loss(pred.squeeze(),label)

                loss_all += loss.item() * data.num_graphs * len(data)
                optimizer.step()
            test_loss = loss_all / len(test_set)

            hist["train_loss"].append(train_loss)
            hist["test_loss"].append(test_loss)
            print(f'Epoch: {epoch}, Train loss: {train_loss:.3}, Test loss: {test_loss:.3}')

        if test_loss < 3.3:
          plot_epoch = epoch
          break  

    # test on test set to get prediction 
    with torch.no_grad():
        prediction = np.zeros(len(test_set))
        label = np.zeros(len(test_set))
        idx = 0
        for data in test_loader:
            data = data
            #.to(device)
            output = model(data)
            prediction[idx:idx+len(output)] = output.squeeze().detach().numpy()
            label[idx:idx+len(output)] = data.y.detach().numpy()
            idx += len(output)
        prediction = np.array(prediction).squeeze()
        label = np.array(label).squeeze()
    print(model)
    torch.save(model.state_dict(),'p1_GNN_model3.ckpt')
    from google.colab import drive
    drive.mount('/content/gdrive')
    model_save_name = 'p1_GNN_model_LearningV3.ckpt'
    path = F"/content/gdrive/MyDrive/{model_save_name}"
    torch.save(model.state_dict(),path)
    
    # visualization
    # plot loss function
    ax = plt.subplot(1,1,1)
    ax.plot([e for e in range(1,1+plot_epoch)], hist["train_loss"], label="train loss")
    ax.plot([e for e in range(1,1+plot_epoch)], hist["test_loss"], label="test loss")
    plt.xlabel("epoch")
    plt.ylabel("loss")
    ax.legend()
    plt.show()

    # plot prediction vs. label
    x = np.linspace(np.min(label), np.max(label))
    y = np.linspace(np.min(label), np.max(label))
    ax = plt.subplot(1,1,1)
    ax.scatter(prediction, label, marker='+', c='red')
    ax.plot(x, y, '--')
    plt.xlabel("prediction")
    plt.ylabel("label")
    plt.show()

    print("MSE:", np.sum(np.square(prediction-label)))


if __name__ == "__main__":
    main()