In [None]:
!pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.0+cu118.html

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://data.pyg.org/whl/torch-2.0.0+cu118.html
Collecting pyg_lib
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcu118/pyg_lib-0.2.0%2Bpt20cu118-cp310-cp310-linux_x86_64.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_scatter
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcu118/torch_scatter-2.1.1%2Bpt20cu118-cp310-cp310-linux_x86_64.whl (10.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.2/10.2 MB[0m [31m64.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torch_sparse
  Downloading https://data.pyg.org/whl/torch-2.0.0%2Bcu118/torch_sparse-0.6.17%2Bpt20cu118-cp310-cp310-linux_x86_64.whl (4.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.8/4.8 MB[0m [31m39.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecti

In [None]:
!pip install torch_geometric

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch_geometric
  Downloading torch_geometric-2.3.1.tar.gz (661 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m661.6/661.6 kB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: torch_geometric
  Building wheel for torch_geometric (pyproject.toml) ... [?25l[?25hdone
  Created wheel for torch_geometric: filename=torch_geometric-2.3.1-py3-none-any.whl size=910476 sha256=c82641559b69e3f14c329fb83cb3bd9dbe41ea2dae8fbebbcf39315462754dc5
  Stored in directory: /root/.cache/pip/wheels/ac/dc/30/e2874821ff308ee67dcd7a66dbde912411e19e35a1addda028
Successfully built torch_geometric
Installing collected packages: torch_geometric
Successfully installed torch_geomet

##**GAE**

In [None]:
import torch
from torch_geometric.nn import GCNConv

class Encoder(torch.nn.Module):
    def __init__(self, hidden_features):
        super().__init__()

        in_features = -1
        convs = [] 
        for hidden in hidden_features:
            convs.append(GCNConv(-1, hidden))
            in_features = hidden
        
        self.convs = torch.nn.ModuleList(convs)

    def forward(self, x, edge_index):
        for conv in self.convs:
            x = conv(x, edge_index).relu()

        return x

class Decoder(torch.nn.Module):
    def __init__(self, gnn_features):
        super().__init__()

        self.linear1 = torch.nn.Linear(2 * gnn_features, 32)
        self.linear2 = torch.nn.Linear(32, 1)

    def forward(self, z, pos_edge_index, sigmoid = True):
        x = torch.cat((z[pos_edge_index[0]], z[pos_edge_index[1]]), dim = 1)

        x = self.linear1(x).tanh()
        x = self.linear2(x)

        return x if not sigmoid else x.sigmoid()

In [None]:
from copy import deepcopy

def train_gae_one_epoch(model, optimizer, train_data):
    model.train()
    optimizer.zero_grad()

    z = model(train_data.x, train_data.edge_index)

    pos_edge_index = train_data.edge_label_index[:, train_data.edge_label == 1]
    neg_edge_index = train_data.edge_label_index[:, train_data.edge_label == 0]

    loss = model.recon_loss(z, pos_edge_index, neg_edge_index)

    loss.backward()
    optimizer.step()

    return float(loss)

def test_gae(model, train_data, test_data):
    with torch.no_grad():
        z = model(train_data.x, train_data.edge_index)

    pos_edge_index = test_data.edge_label_index[:, test_data.edge_label == 1]
    neg_edge_index = test_data.edge_label_index[:, test_data.edge_label == 0]

    return model.test(z, pos_edge_index, neg_edge_index)[0]

def train_gae(model, optimizer, train_data, val_data, patience, nepochs = 1500):
    best_epoch = -1
    best_val = 0.0
    best_params = None
    
    loss_history = []
    val_history = []

    for epoch in range(nepochs):
        loss = train_gae_one_epoch(model, optimizer, train_data)
        val = test_gae(model, train_data, val_data)

        loss_history.append(loss)
        val_history.append(val)

        # if val > best_val:
        #     best_epoch = epoch
        #     best_val = val
        #     best_params = deepcopy(model.state_dict())

        # if (epoch - best_epoch) > patience:
        #     model.load_state_dict(best_params)
        #     return loss_history, val_history

    #model.load_state_dict(best_params)
    return loss_history, val_history

##**Hyperparameters**

In [None]:
from sklearn.model_selection import ParameterGrid

HYPERPARAMETER_GRID = {
    'hidden': [[64, 32]],
    'lr': [1e-3],
}

hyperparameter_list = list(ParameterGrid(HYPERPARAMETER_GRID))
hyperparameter_list[:6]

[{'hidden': [64, 32], 'lr': 0.001}]

##**Graph Embedding**

In [None]:
from google.colab import drive

drive.mount("/content/drive", force_remount = True)

Mounted at /content/drive


In [None]:
PATH_TO_PICKLE = "/content/drive/MyDrive/Multimodal_Fusion/grafos_reg"
PATH_TO_SAVE = "/content/drive/MyDrive/Multimodal_Fusion/grafos_gae"

In [None]:
import numpy as np
import random

random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)
np.random.seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
import os
import pickle as pkl
from torch_geometric.transforms import RandomLinkSplit
from torch_geometric.utils import from_networkx
from torch_geometric.nn import GAE
from tqdm import tqdm

pickles = os.listdir(PATH_TO_PICKLE)

for pickle in [pickles[6]]:
    graph_name = pickle.split('.')[0]
    print(f"Graph: {graph_name}")

    with open(f"{PATH_TO_PICKLE}/{pickle}", "rb") as file:
        G = pkl.load(file)

    index_to_node = {index: node for index, node in enumerate(G.nodes())}
    
    G_copy = deepcopy(G)
    for node in G_copy.nodes():
        for attr in list(G_copy.nodes[node].keys()):
            if attr != 'f_features':
                del G_copy.nodes[node][attr]

    data = from_networkx(G_copy, group_node_attrs = ['f_features'])
    data.x = data.x.to(torch.float32)

    train_data, _, test_data = RandomLinkSplit(num_val = 0, num_test = 0.2, is_undirected = True)(data)
    
    train_data = train_data.to('cuda')
    test_data = test_data.to('cuda')

    if not os.path.exists(f"{PATH_TO_SAVE}/{graph_name}"):
        os.mkdir(f"{PATH_TO_SAVE}/{graph_name}")

    for index, hyperparams in tqdm(enumerate(hyperparameter_list), total = len(hyperparameter_list)):
        model = GAE(Encoder(hyperparams['hidden']), Decoder(hyperparams['hidden'][-1])).to('cuda')
        optimizer = torch.optim.Adam(model.parameters(), lr = hyperparams['lr'])

        loss_history, val_history = train_gae(model, optimizer, train_data, test_data, patience = 3, nepochs = 3)

        with torch.no_grad():
            z = model(train_data.x, train_data.edge_index).cpu().numpy()

        print(z, val_history[-1])

        for i in range(z.shape[0]):
            G.nodes[index_to_node[i]]['gae_features'] = z[i]

        #with open(f"{PATH_TO_SAVE}/{graph_name}/{index}.pkl", "wb") as file:
        #    pkl.dump(G, file)

    print("")


Graph: music


100%|██████████| 1/1 [00:00<00:00, 20.29it/s]

[[0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.05407215 0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.19939551 0.         ... 0.         0.         0.        ]
 [0.         0.27265045 0.         ... 0.         0.         0.        ]
 [0.         0.02513844 0.         ... 0.         0.         0.11460975]] 0.6596279354349979




