In [29]:
import sys
import pickle
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import cv2
import os
from torch_geometric.data import Dataset, download_url, Data
import pandas as pd
import shutil
import torch
from scipy.spatial import distance

from torch_geometric.nn import GCNConv
import torch.nn.functional as F
from torch.nn import Linear

from torch_geometric.loader import NeighborLoader
from torch_geometric.sampler import BaseSampler


In [43]:
dataset = torch.load("C://Austin//Projects//MS_Thesis_Data//base_gnn_testing_root//processed//data_0.pt")

In [40]:
class MyOwnDataset(Dataset): #For this ds I have ald done all the required pre prcessing 
    def __init__(self, root, transform=None, pre_transform=None, pre_filter=None, data_dict = None):
        self.data_dict = data_dict
        super().__init__(root, transform, pre_transform, pre_filter)
        print(self.raw_dir)
        
    @property
    def raw_file_names(self):
        return np.array([os.path.join(self.raw_dir, x) for x in self.data_dict["name"]])

    @property
    def processed_file_names(self):
        return "not_implemented.pt"
    
    def process(self):
        
        idx = -1

        # Process pre made data dictionary

        x = np.append(self.data_dict["features"], self.data_dict["centroids"], axis=0)
        
        y = np.append(self.data_dict["cluster"], np.array([0,1,2,3,4]), axis=0)
        
        edge_map = []


        i=-1
        for cluster in y[:127]:
            i+=1
            edge_map.append([np.int64(i), np.int64(cluster+self.data_dict["features"].shape[0])])
            #edge_map.append([np.int64(cluster+self.data_dict["features"].shape[0]), np.int64(i)])
        

        i+=1
        for centroid1 in y[i:-1]:
            for centroid2 in y[i+1:]:
                edge_map.append([np.int64(centroid1+self.data_dict["features"].shape[0]), np.int64(centroid2+self.data_dict["features"].shape[0])])
                #edge_map.append([np.int64(centroid2+self.data_dict["features"].shape[0]), np.int64(centroid1+self.data_dict["features"].shape[0])])
            i+=1

        edge_attrs = [] 


        for edge in edge_map:
            edge_attrs.append(distance.euclidean(x[edge[0]], x[edge[1]]))

        edge_attrs = np.array(edge_attrs)

        edge_attrs = edge_attrs/np.max(edge_attrs)

        edge_map_aux = [None,None]
        edge_map_aux[0] = [x[0] for x in edge_map]
        edge_map_aux[1] = [x[1] for x in edge_map]
        edge_map_aux = np.array(edge_map_aux)

        # Process pre made data dictionary

        x = torch.Tensor(x)
        y = torch.Tensor(y)
        edge_attrs = torch.Tensor(np.array(edge_attrs))
        edge_map_aux = torch.Tensor(np.array(edge_map_aux)).to(torch.int64)

        data = Data(x, edge_map_aux, edge_attrs, y)

        if self.pre_filter is not None and not self.pre_filter(data):
            pass

        if self.pre_transform is not None:
            data = self.pre_transform(data)
        
        idx += 1

        torch.save(data, os.path.join(self.processed_dir, f'data_{idx}.pt'))

    def len(self):
        return self.data_dict["name"].shape[0]

    def get(self, idx):
        data = torch.load(os.path.join(self.processed_dir, f'data_{idx}.pt'))
        return data

In [41]:
dataset = MyOwnDataset(root = "C://Austin//Projects//MS_Thesis_Data//base_gnn_testing_root", data_dict = data)

C:\Austin\Projects\MS_Thesis_Data\base_gnn_testing_root\raw


Processing...
Done!


In [44]:
dataset

Data(x=[132, 4096], edge_index=[2, 137], edge_attr=[137], y=[132])

In [45]:
dataset.num_features

4096

In [46]:
dataset

Data(x=[132, 4096], edge_index=[2, 137], edge_attr=[137], y=[132])

In [None]:
from sklearn.metrics import roc_auc_score
from torch_geometric.utils import negative_sampling


class Net(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def encode(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv2(x, edge_index)

    def decode(self, z, edge_label_index):
        return (z[edge_label_index[0]] * z[edge_label_index[1]]).sum(
            dim=-1
        )  # product of a pair of nodes on each edge

    def decode_all(self, z):
        prob_adj = z @ z.t()
        return (prob_adj > 0).nonzero(as_tuple=False).t()
    

def train_link_predictor(
    model, train_data, optimizer, criterion, n_epochs=100
    #, val_data
):

    for epoch in range(1, n_epochs + 1):

        model.train()
        optimizer.zero_grad()
        z = model.encode(train_data.x, train_data.edge_index)

        # sampling training negatives for every training epoch
        neg_edge_index = negative_sampling(
            edge_index=train_data.edge_index, num_nodes=train_data.num_nodes,
            num_neg_samples=train_data.edge_label_index.size(1), method='sparse')

        edge_label_index = torch.cat(
            [train_data.edge_label_index, neg_edge_index],
            dim=-1,
        )
        edge_label = torch.cat([
            train_data.edge_label,
            train_data.edge_label.new_zeros(neg_edge_index.size(1))
        ], dim=0)

        out = model.decode(z, edge_label_index).view(-1)
        loss = criterion(out, edge_label)
        loss.backward()
        optimizer.step()

        #val_auc = eval_link_predictor(model, val_data)

        if epoch % 10 == 0:
            print(f"Epoch: {epoch:03d}, Train Loss: {loss:.3f}")#, Val AUC: {val_auc:.3f}")

    return model


@torch.no_grad()
def eval_link_predictor(model, data):

    model.eval()
    z = model.encode(data.x, data.edge_index)
    out = model.decode(z, data.edge_label_index).view(-1).sigmoid()

    return roc_auc_score(data.edge_label.cpu().numpy(), out.cpu().numpy())

In [108]:
model = Net(dataset.num_features, dataset.num_features//2, dataset.num_features//8).to("cpu")
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)
criterion = torch.nn.BCEWithLogitsLoss()
model = train_link_predictor(model, dataset, optimizer, criterion, n_epochs=1)

#test_auc = eval_link_predictor(model, test_data)
#print(f"Test: {test_auc:.3f}")

AttributeError: 'GlobalStorage' object has no attribute 'edge_label'

In [13]:
model = Net(dataset.num_features, dataset.num_features//2, dataset.num_features//8).to("cpu")
print(model)
print("Number of parameters: ", sum(p.numel() for p in model.parameters()))

Net(
  (conv1): GCNConv(4096, 2048)
  (conv2): GCNConv(2048, 512)
)
Number of parameters:  9439744


In [None]:
model.encode(dataset.x, dataset.edge_index)[0]
#Output a encoding vector for every node:
# Shape = [number_of_nodes, size_of_embedding] 

tensor([-9.8741e-01,  4.3697e-01,  1.6317e+00, -1.6259e+00, -5.1332e-04,
         3.6588e-01,  1.2880e-01,  2.2605e+00, -1.9703e-01,  2.4138e-01,
         1.7142e-01, -8.9749e-01, -4.9346e-01,  2.2433e+00,  2.2167e-02,
         1.3420e-01,  1.4354e+00, -4.2092e-01, -1.0728e+00,  1.3446e+00,
        -5.6346e-01, -2.1791e-01, -3.7950e-01, -1.1852e+00,  2.5461e-01,
         1.1167e+00,  1.3299e-01,  2.5837e-01,  3.4525e-01, -9.3704e-01,
         2.3742e+00,  1.9445e+00, -9.8799e-01, -2.3600e-02,  4.6015e-01,
        -1.1712e+00,  7.9486e-01, -7.1922e-01,  1.7959e+00,  2.5094e-01,
        -1.4819e+00, -4.9602e-01, -1.0645e+00,  1.3196e+00, -2.9403e-01,
         5.3875e-01,  1.9642e-01, -1.5188e+00,  7.1758e-01, -2.1441e-01,
        -1.4755e+00, -6.8343e-01, -1.1784e+00,  1.2444e+00, -5.0079e-01,
         6.6117e-01, -9.1243e-01,  6.1809e-01,  1.4038e+00, -1.0875e+00,
        -5.9860e-01, -9.6111e-01, -5.5544e-01, -2.6084e-01, -1.0332e+00,
         7.6523e-01, -1.0081e-02,  6.9663e-01, -2.6

In [20]:
neg_edge_index = negative_sampling(dataset.edge_index,dataset.x.shape[0], dataset.edge_index.shape[0], method='sparse')

RuntimeError: Numpy is not available