In [51]:
import subprocess

def install_dependencies():
    commands = [
        "sudo apt install unzip -y",
        "pip install gdown",
        "pip install torch",
        "pip install torch-geometric",
        "pip install numpy",
        "pip install pandas",
        "pip install scikit-learn",
        "pip install pyarrow",
        "pip install tensorflow"
    ]

    for command in commands:
        process = subprocess.Popen(command.split(), stdout=subprocess.PIPE)
        output, error = process.communicate()
        if error:
            print(f"Error occurred: {error}")
        else:
            print(f"Output: {output}")

In [52]:
import torch
import torch.nn.functional as F
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
import numpy as np
from torch_geometric.data import Data
import random

In [46]:
import gcsfs
import torch

index = 0
path = f'gs://datasets-dev-ded86f66/benchmarks/scientific_trend_prediction/Tgn_embeddings_d5/{1980+index}.pt'
fs = gcsfs.GCSFileSystem()

with fs.open(path, 'rb') as f:
    embeddings = torch.load(f)

print(len(embeddings))

36398


In [8]:
!gdown --fuzzy https://drive.google.com/file/d/1vm3n5-3aPs7LX4Vp0Z3UqhwgyiWkcKaI/view?usp=sharing

Downloading...
From (original): https://drive.google.com/uc?id=1vm3n5-3aPs7LX4Vp0Z3UqhwgyiWkcKaI
From (redirected): https://drive.google.com/uc?id=1vm3n5-3aPs7LX4Vp0Z3UqhwgyiWkcKaI&confirm=t&uuid=ef137808-741e-4b08-a8fc-3aa994de1d96
To: /content/graphs_7-9-24.zip
100% 518M/518M [00:07<00:00, 67.7MB/s]


In [None]:
!unzip graphs_7-9-24.zip

In [53]:
import json
import pandas as pd

all_nodes = []

for year in range(1980, 2024):
    file_path = f'graphs_for_corvic/{year}.json'
    with open(file_path, 'r') as file:
        data = json.load(file)

    nodes = pd.DataFrame(data['nodes'])
    all_nodes = all_nodes + nodes['id'].tolist()
    edges = pd.DataFrame(data['links'])

In [55]:
all_nodes = set(all_nodes)
print(len(all_nodes))

36398


In [57]:
print(all_nodes)

{'thbs2', 'carfilzomib', 'sonidegib phosphate', 'ubtf', 'maged2', 'period of infection', 'nrip1', 'aluminum chlorohydrate', 'ptpn14', 'nmnat2', 'homeoboxes', 'rpl18a', 'grap2', 'tmem123', 'stricture', 'vinflunine', 'irak4', 'cdca2', 'gpr137b', 'prelp', 'fesoterodine', 'branchio-oto-renal syndrome', 'cetyl alcohol', 'bms-962476', 'lymphatic diseases', 'cfh', 'retapamulin', 'finger clinodactyly', 'urofollitropin', 'cell cycle', 'flad1', 'cdca8', 'nfil3', 'bevenopran', 'cd36', 'mir149', 'lower extremity deformities, congenital', 'ichthyosis bullosa of siemens', 'kif5c', 'spns2', 'gator2 subcomplex', 'slc29a1', 'trichosporonosis', 'semg1', 'epb41l3', 'adaptor related protein complexes', 'znf540', 'bms-986120', 'dehydrocholic acid', 'gsk-2245035', 'gclc', 'vesicular stomatitis', 'hamartoma', 'ccdc170', 'trsup-tta1-1', 'lipidoses', 'prr34-as1', 'tubb2a', 'dourine', 'hemolytic-uremic syndrome', 'albumin:globulin ratio measurement', 'rny4', 'rpl36', 'wrn', 'nup85', 'sinusitis', 'padeliporfin p

In [59]:
print('benzene' in all_nodes)

False


In [42]:
!cd graphs_for_corvic; ls

1980.json  1985.json  1990.json  1995.json  2000.json  2005.json  2010.json  2015.json	2020.json
1981.json  1986.json  1991.json  1996.json  2001.json  2006.json  2011.json  2016.json	2021.json
1982.json  1987.json  1992.json  1997.json  2002.json  2007.json  2012.json  2017.json	2022.json
1983.json  1988.json  1993.json  1998.json  2003.json  2008.json  2013.json  2018.json	2023.json
1984.json  1989.json  1994.json  1999.json  2004.json  2009.json  2014.json  2019.json


In [41]:
print('benzene' in all_nodes)

False


In [34]:
edges.head()

Unnamed: 0,weight,source,target
0,2.3,mavorixafor,plerixafor
1,2.4,mavorixafor,vipr1
2,1.6,mavorixafor,tat
3,1.2,mavorixafor,slc5a2
4,9.6,mavorixafor,cxcr4


In [None]:
'''
import json

for year in range(1980, 2024):
    file_path = f'graphs_for_corvic/{year}.json'
    with open(file_path, 'r') as file:
        data = json.load(file)

    path = "gs://datasets-dev-ded86f66/benchmarks/scientific_trend_prediction/new_parquet_data"

    nodes = pd.DataFrame(data['nodes'])
    nodes = nodes.rename(columns={"node_type": "node_label", "id": "node_id"})

    edges = pd.DataFrame(data['links'])
    edges = edges.rename(columns={"source": "source_id", "target": "destination_id"})

    nodes.to_parquet(f"{path}/{year}/{year}_nodes.parquet", engine='pyarrow')
    edges.to_parquet(f"{path}/{year}/{year}_edges.parquet", engine='pyarrow')
'''

## Loading edge files with a percentile threshold on the edge weights. Higher percentile extracts stronger relations. This parameter can be adjusted to control the strength of trends that we want to predict for future.

In [None]:
def load_data(year, data_dir, percentile=0.0):
    edges = pd.read_parquet(f'{data_dir}/{year}/{year}_edges.parquet', engine='pyarrow')
    nodes = pd.read_parquet(f'{data_dir}/{year}/{year}_nodes.parquet', engine='pyarrow')
    weight_threshold = edges['weight'].quantile(percentile)
    filtered_edges = edges[edges['weight'] >= weight_threshold]
    return filtered_edges, nodes

data_dir = "gs://datasets-dev-ded86f66/benchmarks/scientific_trend_prediction/new_parquet_data"
years = range(1980, 2024)

all_node_ids = set()
id_to_label = {}
for i in years:
    _, n = load_data(i, data_dir)
    all_node_ids = all_node_ids.union(set(n['node_id'].tolist()))
    keys, vals = n['node_id'].tolist(), n['node_label'].tolist()
    entries = {key: value for key, value in zip(keys, vals)}
    id_to_label.update(entries)

def featurizer(edges, node_ids, id_to_label):
    label_order = ['phenotype', 'gene', 'compound']
    label_to_index = {label: i for i, label in enumerate(label_order)}

    node_features = np.zeros((len(node_ids), 3), dtype=float)
    out_degree_count = {node: {label: 0 for label in label_order} for node in node_ids}

    for src, dest in zip(edges['source_id'], edges['destination_id']):
        dest_label = id_to_label[dest]
        out_degree_count[src][dest_label] += 1

    for i, node in enumerate(node_ids):
        node_feature_vector = [out_degree_count[node][label] for label in label_order]
        node_features[i] = node_feature_vector

    return torch.tensor(node_features, dtype=torch.float)

node_ids = sorted(list(all_node_ids))
node_id_to_index = {node_id: idx for idx, node_id in enumerate(node_ids)}

graphs = []

for year in years:
    edges, _ = load_data(year, data_dir)
    node_feature = featurizer(edges, node_ids, id_to_label)
    edge_index = np.array([edges['source_id'].map(node_id_to_index).values,
                           edges['destination_id'].map(node_id_to_index).values])
    edge_index = torch.tensor(edge_index, dtype=torch.long)
    edge_weights = torch.tensor(edges['weight'].values, dtype=torch.float)
    g = Data(x=node_feature, edge_index=edge_index, edge_attr=edge_weights, y=edge_weights)
    graphs.append(g)

print(f"Number of graphs: {len(graphs)}")

Number of graphs: 44


In [43]:
import json

file_path = 'node_order_for_embedding_attribution.json'

with open(file_path, 'r') as file:
    data = json.load(file)

In [None]:
import json
with open("node_order_for_embedding_attribution.json", 'w') as file:
    json.dump(node_ids, file)

## TGN Network

In [None]:
from torch.nn import Parameter
from torch_geometric.nn import ChebConv
from torch_geometric.nn.inits import glorot, zeros

class GConvLSTM(torch.nn.Module):
    r"""An implementation of the Chebyshev Graph Convolutional Long Short Term Memory
    Cell. For details see this paper: `"Structured Sequence Modeling with Graph
    Convolutional Recurrent Networks." <https://arxiv.org/abs/1612.07659>`_

    Args:
        in_channels (int): Number of input features.
        out_channels (int): Number of output features.
        K (int): Chebyshev filter size :math:`K`.
        normalization (str, optional): The normalization scheme for the graph
            Laplacian (default: :obj:`"sym"`):

            1. :obj:`None`: No normalization
            :math:`\mathbf{L} = \mathbf{D} - \mathbf{A}`

            2. :obj:`"sym"`: Symmetric normalization
            :math:`\mathbf{L} = \mathbf{I} - \mathbf{D}^{-1/2} \mathbf{A}
            \mathbf{D}^{-1/2}`

            3. :obj:`"rw"`: Random-walk normalization
            :math:`\mathbf{L} = \mathbf{I} - \mathbf{D}^{-1} \mathbf{A}`

            You need to pass :obj:`lambda_max` to the :meth:`forward` method of
            this operator in case the normalization is non-symmetric.
            :obj:`\lambda_max` should be a :class:`torch.Tensor` of size
            :obj:`[num_graphs]` in a mini-batch scenario and a
            scalar/zero-dimensional tensor when operating on single graphs.
            You can pre-compute :obj:`lambda_max` via the
            :class:`torch_geometric.transforms.LaplacianLambdaMax` transform.
        bias (bool, optional): If set to :obj:`False`, the layer will not learn
            an additive bias. (default: :obj:`True`)
    """

    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        K: int,
        normalization: str = "sym",
        bias: bool = True,
    ):
        super(GConvLSTM, self).__init__()

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.K = K
        self.normalization = normalization
        self.bias = bias
        self._create_parameters_and_layers()
        self._set_parameters()

    def _create_input_gate_parameters_and_layers(self):

        self.conv_x_i = ChebConv(
            in_channels=self.in_channels,
            out_channels=self.out_channels,
            K=self.K,
            normalization=self.normalization,
            bias=self.bias,
        )

        self.conv_h_i = ChebConv(
            in_channels=self.out_channels,
            out_channels=self.out_channels,
            K=self.K,
            normalization=self.normalization,
            bias=self.bias,
        )

        self.w_c_i = Parameter(torch.Tensor(1, self.out_channels))
        self.b_i = Parameter(torch.Tensor(1, self.out_channels))

    def _create_forget_gate_parameters_and_layers(self):

        self.conv_x_f = ChebConv(
            in_channels=self.in_channels,
            out_channels=self.out_channels,
            K=self.K,
            normalization=self.normalization,
            bias=self.bias,
        )

        self.conv_h_f = ChebConv(
            in_channels=self.out_channels,
            out_channels=self.out_channels,
            K=self.K,
            normalization=self.normalization,
            bias=self.bias,
        )

        self.w_c_f = Parameter(torch.Tensor(1, self.out_channels))
        self.b_f = Parameter(torch.Tensor(1, self.out_channels))

    def _create_cell_state_parameters_and_layers(self):

        self.conv_x_c = ChebConv(
            in_channels=self.in_channels,
            out_channels=self.out_channels,
            K=self.K,
            normalization=self.normalization,
            bias=self.bias,
        )

        self.conv_h_c = ChebConv(
            in_channels=self.out_channels,
            out_channels=self.out_channels,
            K=self.K,
            normalization=self.normalization,
            bias=self.bias,
        )

        self.b_c = Parameter(torch.Tensor(1, self.out_channels))

    def _create_output_gate_parameters_and_layers(self):

        self.conv_x_o = ChebConv(
            in_channels=self.in_channels,
            out_channels=self.out_channels,
            K=self.K,
            normalization=self.normalization,
            bias=self.bias,
        )

        self.conv_h_o = ChebConv(
            in_channels=self.out_channels,
            out_channels=self.out_channels,
            K=self.K,
            normalization=self.normalization,
            bias=self.bias,
        )

        self.w_c_o = Parameter(torch.Tensor(1, self.out_channels))
        self.b_o = Parameter(torch.Tensor(1, self.out_channels))

    def _create_parameters_and_layers(self):
        self._create_input_gate_parameters_and_layers()
        self._create_forget_gate_parameters_and_layers()
        self._create_cell_state_parameters_and_layers()
        self._create_output_gate_parameters_and_layers()

    def _set_parameters(self):
        glorot(self.w_c_i)
        glorot(self.w_c_f)
        glorot(self.w_c_o)
        zeros(self.b_i)
        zeros(self.b_f)
        zeros(self.b_c)
        zeros(self.b_o)

    def _set_hidden_state(self, X, H):
        if H is None:
            H = torch.zeros(X.shape[0], self.out_channels).to(X.device)
        return H

    def _set_cell_state(self, X, C):
        if C is None:
            C = torch.zeros(X.shape[0], self.out_channels).to(X.device)
        return C

    def _calculate_input_gate(self, X, edge_index, edge_weight, H, C, lambda_max):
        I = self.conv_x_i(X, edge_index, edge_weight, lambda_max=lambda_max)
        I = I + self.conv_h_i(H, edge_index, edge_weight, lambda_max=lambda_max)
        I = I + (self.w_c_i * C)
        I = I + self.b_i
        I = torch.sigmoid(I)
        return I

    def _calculate_forget_gate(self, X, edge_index, edge_weight, H, C, lambda_max):
        F = self.conv_x_f(X, edge_index, edge_weight, lambda_max=lambda_max)
        F = F + self.conv_h_f(H, edge_index, edge_weight, lambda_max=lambda_max)
        F = F + (self.w_c_f * C)
        F = F + self.b_f
        F = torch.sigmoid(F)
        return F

    def _calculate_cell_state(self, X, edge_index, edge_weight, H, C, I, F, lambda_max):
        T = self.conv_x_c(X, edge_index, edge_weight, lambda_max=lambda_max)
        T = T + self.conv_h_c(H, edge_index, edge_weight, lambda_max=lambda_max)
        T = T + self.b_c
        T = torch.tanh(T)
        C = F * C + I * T
        return C

    def _calculate_output_gate(self, X, edge_index, edge_weight, H, C, lambda_max):
        O = self.conv_x_o(X, edge_index, edge_weight, lambda_max=lambda_max)
        O = O + self.conv_h_o(H, edge_index, edge_weight, lambda_max=lambda_max)
        O = O + (self.w_c_o * C)
        O = O + self.b_o
        O = torch.sigmoid(O)
        return O

    def _calculate_hidden_state(self, O, C):
        H = O * torch.tanh(C)
        return H

    def forward(
        self,
        X: torch.FloatTensor,
        edge_index: torch.LongTensor,
        edge_weight: torch.FloatTensor = None,
        H: torch.FloatTensor = None,
        C: torch.FloatTensor = None,
        lambda_max: torch.Tensor = None,
    ) -> torch.FloatTensor:
        """
        Making a forward pass. If edge weights are not present the forward pass
        defaults to an unweighted graph. If the hidden state and cell state
        matrices are not present when the forward pass is called these are
        initialized with zeros.

        Arg types:
            * **X** *(PyTorch Float Tensor)* - Node features.
            * **edge_index** *(PyTorch Long Tensor)* - Graph edge indices.
            * **edge_weight** *(PyTorch Long Tensor, optional)* - Edge weight vector.
            * **H** *(PyTorch Float Tensor, optional)* - Hidden state matrix for all nodes.
            * **C** *(PyTorch Float Tensor, optional)* - Cell state matrix for all nodes.
            * **lambda_max** *(PyTorch Tensor, optional but mandatory if normalization is not sym)* - Largest eigenvalue of Laplacian.

        Return types:
            * **H** *(PyTorch Float Tensor)* - Hidden state matrix for all nodes.
            * **C** *(PyTorch Float Tensor)* - Cell state matrix for all nodes.
        """
        H = self._set_hidden_state(X, H)
        C = self._set_cell_state(X, C)
        I = self._calculate_input_gate(X, edge_index, edge_weight, H, C, lambda_max)
        F = self._calculate_forget_gate(X, edge_index, edge_weight, H, C, lambda_max)
        C = self._calculate_cell_state(X, edge_index, edge_weight, H, C, I, F, lambda_max)
        O = self._calculate_output_gate(X, edge_index, edge_weight, H, C, lambda_max)
        H = self._calculate_hidden_state(O, C)
        return H, C

In [None]:
class TemporalGNN(torch.nn.Module):
    def __init__(self, num_nodes, node_features, hidden_channels, output_channels):
        super(TemporalGNN, self).__init__()
        self.recurrent = GConvLSTM(node_features, hidden_channels, 3)
        self.linear = torch.nn.Linear(hidden_channels, output_channels)
        self.edge_mlp = torch.nn.Sequential(
                torch.nn.Linear(2 * output_channels, hidden_channels),
                torch.nn.ReLU(),
                torch.nn.Linear(hidden_channels, 1)
            )

    def forward(self, seq):
        H, C = None, None
        for i in range(len(seq)):
            x = seq[i].x
            edge_index = seq[i].edge_index
            edge_attr = seq[i].edge_attr
            H, C = self.recurrent(x, edge_index, edge_attr, H, C)

        H = F.relu(H)
        H = self.linear(H)
        return F.log_softmax(H, dim=1)

    def predict_edge_weight(self, node_embeddings, edge_index):
        src, dst = edge_index
        edge_features = torch.cat([node_embeddings[src], node_embeddings[dst]], dim=1)
        probs = self.edge_mlp(edge_features)
        probs = torch.sigmoid(probs)
        return probs.squeeze()

    def get_edge_embeddings(self, node_embeddings, edge_index):
        src, dst = edge_index
        edge_features = torch.cat([node_embeddings[src], node_embeddings[dst]], dim=1)
        return edge_features

In [None]:
node_dim = graphs[0].x.shape[1]
num_nodes = graphs[0].x.shape[0]
hidden_channels = 64
output_channels = 64
learning_rate = 0.0001
epochs = 30
time_window = 10
weight_decay = 0.0001

model = TemporalGNN(num_nodes, node_dim, hidden_channels, output_channels)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
criterion = torch.nn.BCELoss()

## Training loop if you want to train the TGN from scratch

In [None]:
from sklearn.model_selection import train_test_split
import copy

def create_sequences(data, time_step):
    X, Y = [], []
    for i in range(len(data) - time_step - 1):
        X.append(data[i:(i + time_step)])
        Y.append(data[i + time_step])
    return X, Y

x, y = create_sequences(graphs, time_window)

split_index = int(len(x) * 0.8)

x_train, x_test = copy.deepcopy(x[:split_index]), copy.deepcopy(x[split_index:])
y_train, y_test = copy.deepcopy(y[:split_index]), copy.deepcopy(y[split_index:])

print("Size of x_train:", len(x_train))
print("Size of x_test:", len(x_test))
print("Size of y_train:", len(y_train))
print("Size of y_test:", len(y_test))

In [None]:
import torch
import random
from torch_geometric.utils import negative_sampling

def add_negative_samples(data):
    num_pos_samples = data.edge_index.size(1)
    num_neg_samples = num_pos_samples
    neg_edge_index = negative_sampling(data.edge_index, num_nodes=data.num_nodes, num_neg_samples=num_neg_samples)
    pos_weights = torch.ones(num_pos_samples, device=data.edge_index.device)
    neg_weights = torch.zeros(num_neg_samples, device=data.edge_index.device)

    data.edge_index = torch.cat([data.edge_index, neg_edge_index], dim=1)
    data.y = torch.cat([pos_weights, neg_weights])
    data.edge_attr = torch.cat([data.edge_attr, neg_weights])

    perm = torch.randperm(data.edge_index.size(1))

    data.edge_index = data.edge_index[:, perm]
    data.edge_attr = data.edge_attr[perm]
    data.y = data.y[perm]

    return data

for i in range(len(y_train)):
    y_train[i] = add_negative_samples(y_train[i])

for i in range(len(y_test)):
    y_test[i] = add_negative_samples(y_test[i])

In [None]:
import matplotlib.pyplot as plt
import torch

TRAIN = True

if TRAIN:
  train_losses = []
  val_accuracies_combined = []
  val_accuracies_presence = []
  val_accuracies_absence = []

  model.train()
  for epoch in range(epochs):
      total_loss = 0
      model.train()
      for i in range(len(x_train)):
          optimizer.zero_grad()
          node_embeddings = model(x_train[i])
          probs = model.predict_edge_weight(node_embeddings, y_train[i].edge_index)
          loss = criterion(probs, y_train[i].y)
          loss.backward()
          optimizer.step()

          total_loss += loss.item()

      avg_train_loss = total_loss / len(x_train)
      train_losses.append(avg_train_loss)

      # Validation
      model.eval()
      total_correct_presence = 0
      total_correct_absence = 0
      total_presence = 0
      total_absence = 0
      total_correct = 0
      total_predictions = 0
      with torch.no_grad():
          for i in range(len(x_test)):
              node_embeddings = model(x_test[i])
              probs = model.predict_edge_weight(node_embeddings, y_test[i].edge_index)
              predictions = (probs > 0.5).int()

              correct_presence = ((predictions == y_test[i].y) & (y_test[i].y == 1)).sum().item()
              correct_absence = ((predictions == y_test[i].y) & (y_test[i].y == 0)).sum().item()

              total_correct_presence += correct_presence
              total_correct_absence += correct_absence

              total_presence += (y_test[i].y == 1).sum().item()
              total_absence += (y_test[i].y == 0).sum().item()

              total_correct += (predictions == y_test[i].y).sum().item()
              total_predictions += y_test[i].y.size(0)

      val_accuracy_presence = total_correct_presence / total_presence if total_presence > 0 else 0
      val_accuracy_absence = total_correct_absence / total_absence if total_absence > 0 else 0
      val_accuracy_combined = total_correct / total_predictions if total_predictions > 0 else 0

      val_accuracies_presence.append(val_accuracy_presence)
      val_accuracies_absence.append(val_accuracy_absence)
      val_accuracies_combined.append(val_accuracy_combined)

      print(f'Epoch {epoch+1}, Loss: {avg_train_loss:.4f}, Val Accuracy (Combined): {val_accuracy_combined:.4f}, Val Accuracy (Presence): {val_accuracy_presence:.4f}, Val Accuracy (Absence): {val_accuracy_absence:.4f}')

## Storing model weights to gs bucket

In [None]:
import gcsfs

if TRAIN:
  fs = gcsfs.GCSFileSystem()
  with fs.open('gs://datasets-dev-ded86f66/benchmarks/scientific_trend_prediction/model_weights/tgn_complete_model.pth', 'wb') as f:
      torch.save(model.state_dict(), f)

In [None]:
!ls

In [None]:
model.load_state_dict(torch.load('test_full_model.pth'))

<All keys matched successfully>

## Generating TGN embeddings and storing to gs storage

In [None]:
import torch
import gcsfs

path = 'gs://datasets-dev-ded86f66/benchmarks/scientific_trend_prediction/model_weights/tgn_complete_model.pth'
fs = gcsfs.GCSFileSystem()

with fs.open(path, 'rb') as f:
    state_dict = torch.load(f)

model.load_state_dict(state_dict)

<All keys matched successfully>

## Writing embeddings to gs bucket

In [None]:
import torch
import io
from google.cloud import storage

def upload_tensor_to_gcs(tensor, bucket_name, destination_blob_name):
    """Uploads a tensor to the GCS bucket."""
    client = storage.Client()
    bucket = client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    buffer = io.BytesIO()
    torch.save(tensor, buffer)
    buffer.seek(0)

    blob.upload_from_file(buffer, content_type='application/octet-stream')

bucket_name = 'datasets-dev-ded86f66'
prefix = 'benchmarks/scientific_trend_prediction/Tgn_embeddings_d5'

for year in range(len(graphs)):
    node_embeddings = model([graphs[year]])
    gcs_file = f'{prefix}/{1980 + year}.pt'

    upload_tensor_to_gcs(node_embeddings, bucket_name, gcs_file)

    print(f'Saved embeddings for year {1980 + year} to {gcs_file}')

Saved embeddings for year 1980 to benchmarks/scientific_trend_prediction/Tgn_embeddings_d5/1980.pt
Saved embeddings for year 1981 to benchmarks/scientific_trend_prediction/Tgn_embeddings_d5/1981.pt
Saved embeddings for year 1982 to benchmarks/scientific_trend_prediction/Tgn_embeddings_d5/1982.pt
Saved embeddings for year 1983 to benchmarks/scientific_trend_prediction/Tgn_embeddings_d5/1983.pt
Saved embeddings for year 1984 to benchmarks/scientific_trend_prediction/Tgn_embeddings_d5/1984.pt
Saved embeddings for year 1985 to benchmarks/scientific_trend_prediction/Tgn_embeddings_d5/1985.pt
Saved embeddings for year 1986 to benchmarks/scientific_trend_prediction/Tgn_embeddings_d5/1986.pt
Saved embeddings for year 1987 to benchmarks/scientific_trend_prediction/Tgn_embeddings_d5/1987.pt
Saved embeddings for year 1988 to benchmarks/scientific_trend_prediction/Tgn_embeddings_d5/1988.pt
Saved embeddings for year 1989 to benchmarks/scientific_trend_prediction/Tgn_embeddings_d5/1989.pt
Saved embe

In [None]:
import json
from google.cloud import storage

bucket_name = 'datasets-dev-ded86f66'
prefix = 'benchmarks/scientific_trend_prediction/Tgn_embeddings_d1'
file_name = 'node_order_for_embedding_attribution.json'
blob_name = f'{prefix}/{file_name}'

json_data = json.dumps(node_ids)

client = storage.Client()

bucket = client.bucket(bucket_name)

blob = bucket.blob(blob_name)
blob.upload_from_string(json_data, content_type='application/json')

print(f"The list has been written to gs://{bucket_name}/{blob_name}")

The list has been written to gs://datasets-dev-ded86f66/benchmarks/scientific_trend_prediction/Tgn_embeddings_d1/node_order_for_embedding_attribution.json


In [None]:
!gsutil -m cp -r \
  "gs://datasets-dev-ded86f66/benchmarks/scientific_trend_prediction/Tgn_embeddings_d5" \
  .

Copying gs://datasets-dev-ded86f66/benchmarks/scientific_trend_prediction/Tgn_embeddings_d5/1980.pt...
/ [0/41 files][    0.0 B/347.1 MiB]   0% Done                                   Copying gs://datasets-dev-ded86f66/benchmarks/scientific_trend_prediction/Tgn_embeddings_d5/1981.pt...
/ [0/41 files][    0.0 B/347.1 MiB]   0% Done                                   Copying gs://datasets-dev-ded86f66/benchmarks/scientific_trend_prediction/Tgn_embeddings_d5/1982.pt...
/ [0/41 files][    0.0 B/347.1 MiB]   0% Done                                   Copying gs://datasets-dev-ded86f66/benchmarks/scientific_trend_prediction/Tgn_embeddings_d5/1983.pt...
/ [0/41 files][    0.0 B/347.1 MiB]   0% Done                                   Copying gs://datasets-dev-ded86f66/benchmarks/scientific_trend_prediction/Tgn_embeddings_d5/1984.pt...
/ [0/41 files][    0.0 B/347.1 MiB]   0% Done                                   Copying gs://datasets-dev-ded86f66/benchmarks/scientific_trend_prediction/Tgn_em

In [None]:
!ls

graph.py				   sampler.py		   Tgn_embeddings_d1
models.py				   TemporalDataHandler.py  Tgn_embeddings_d5
node_order_for_embedding_attribution.json  test_full_model.pth	   TGNEncoder.py
__pycache__				   test_model.pth


In [None]:
import shutil
from google.colab import files

shutil.make_archive('Tgn_embeddings_d5', 'zip', 'Tgn_embeddings_d5')
files.download('Tgn_embeddings_d5.zip')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>