In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/thesis/graph_neural_network/

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/thesis/graph_neural_network


In [2]:
!pip install torchmetrics
!pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu124

import torch
import os
print("PyTorch has version {}".format(torch.__version__))

srs_url = f"https://pytorch-geometric.com/whl/torch-{torch.__version__}.html"
!pip install torch_scatter -f $srs_url
!pip install torch_sparse -f $srs_url
!pip install torch_cluster -f $srs_url
!pip install torch_spline_conv -f $srs_url
!pip install torch_geometric

Looking in indexes: https://download.pytorch.org/whl/cu124
PyTorch has version 2.4.0+cu124
Looking in links: https://pytorch-geometric.com/whl/torch-2.4.0+cu124.html
Collecting torch_scatter
  Downloading https://data.pyg.org/whl/torch-2.4.0%2Bcu124/torch_scatter-2.1.2%2Bpt24cu124-cp312-cp312-linux_x86_64.whl (10.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.7/10.7 MB[0m [31m55.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_scatter
Successfully installed torch_scatter-2.1.2+pt24cu124
Looking in links: https://pytorch-geometric.com/whl/torch-2.4.0+cu124.html
Collecting torch_sparse
  Downloading https://data.pyg.org/whl/torch-2.4.0%2Bcu124/torch_sparse-0.6.18%2Bpt24cu124-cp312-cp312-linux_x86_64.whl (5.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch_sparse
Successfully installed torch_sparse-0.6.18+pt24cu124
Looking in

In [None]:
# !pip uninstall torch_scatter torch_sparse torch_cluster torch_spline_conv torch_geometric -y

In [3]:
import logging

import sys
import itertools
import yaml
import subprocess
import os
import logging

# Path to the config file and train script
config_path = "config.yaml"
train_script = "train.py"


for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

logging.basicConfig(
    level=logging.DEBUG,
    datefmt='%m-%d %H:%M:%S',
    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s"
)
logger = logging.getLogger(__name__)


In [11]:
import torch
import torch_geometric
import torch_sparse
print(torch.__version__)
print(torch_geometric.__version__)
print(torch_sparse.__version__)

2.4.0+cu124
2.6.1
0.6.18+pt24cu124


In [8]:
import argparse
import yaml
import pprint
from src.data_loader import GraphLoader
from src.batch_loader import BatchLoader
from src.utils import *
import pathlib


parser = argparse.ArgumentParser(description="Load configuration file")
parser.add_argument("--config", type=str, default=f"config.yaml", help="Path to YAML config file")
args, unknown = parser.parse_known_args()

with open(args.config, "r") as f:
    cfg = yaml.safe_load(f)

logger.info("Configuration:")
logger.info(pprint.pformat(cfg))

graph_loader = GraphLoader(cfg)
graph_data, data = graph_loader.load()

# Split the data into train/val/test sets
train_val_test_split = random_link_split(cfg)
train_data, val_data, test_data = train_val_test_split(graph_data)

logger.debug(f"Train: {train_data}")
logger.debug(f"Val: {val_data}")
logger.debug(f"Test: {test_data}")

# batch loader
batch_loader = BatchLoader(cfg)
train_loader = batch_loader.load(train_data, shuffle=True)

logging.info(f"Number of training batches: {len(train_loader)}")

logging.info(next(iter(train_loader)))

09-12 13:49:01 - INFO - src.utils - Configuration:
09-12 13:49:01 - INFO - src.utils - {'batch_method': 'binary_link_neighbors',
 'batch_norm': False,
 'batch_size': 1024,
 'book_features': [{'name': 'textual_desc'},
                   {'aggr_fn': 'mean', 'name': 'textual_reviews'}],
 'books_filename': 'books_filtered.parquet',
 'coreness_k': 5,
 'decoder': 'inner_product_decoder',
 'descriptions_filename': 'descriptions_filtered.parquet',
 'dir': '../data_sample',
 'disjoint_train_ratio': 0.0,
 'dropout': 0.5,
 'edge_type': 'interactions',
 'embeddings_descriptions_filename': 'embeddings_descriptions_sbert_pt.parquet',
 'embeddings_reviews_filename': 'embeddings_reviews_sbert_pt.parquet',
 'encoder': 'sage_encoder',
 'epochs': 0,
 'eval_interval': 1,
 'feature_aggr_method': 'concat',
 'feature_linear_dim': 64,
 'heads': 2,
 'hidden_channels': 32,
 'interactions_filename': 'interactions.parquet',
 'item_emb_dim': 64,
 'item_feature_linear': True,
 'kl_beta': 1.0,
 'kl_warmup_epoch': 50

In [None]:
### Change config

new_config = {
    'epochs': 1,
    "val_size": 0.2,
    "test_size": 0.2,
    "seed": 100,
    'n_layer': 2,
    'learning_rate': 0.001,
    'item_emb_dim': 128,
    'user_emb_dim': 128,
    "negative_sampling_method": "batch_random",
    "recon_loss": "bpr", # "binary", "bpr", "bce"
    'feature_linear_dim': 128,
    'hidden_channels': 128,
    'latent_dim': 64,
    'kl_beta': 0.5,
    "kl_warmup_epoch" : 30,
    'eval_interval': 1,
    'variational': False,
    'save_model': True
}


with open(config_path, "r") as f:
    config = yaml.safe_load(f)

config.update(new_config)

temp_config_path = f"temp_config.yaml"
with open(temp_config_path, "w") as f:
    yaml.dump(config, f)

In [None]:

process = subprocess.Popen(
    ["python", "-u", train_script, "--config", temp_config_path],
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,
    text=True
)

# Stream output line by line
for line in process.stdout:
    print(line, end="")  # live printing in notebook

process.wait()  # wait for completion


INFO:__main__:Configuration:
INFO:__main__:{'batch_method': 'binary_link_neighbors',
 'batch_norm': False,
 'batch_size': 128,
 'book_features': [{'name': 'textual_desc'},
                   {'aggr_fn': 'mean', 'name': 'textual_reviews'}],
 'books_filename': 'books_filtered.parquet',
 'coreness_k': 5,
 'decoder': 'inner_product_decoder',
 'descriptions_filename': 'descriptions_filtered.parquet',
 'dir': '../data_sample',
 'disjoint_train_ratio': 0.0,
 'dropout': 0.5,
 'edge_type': 'interactions',
 'embeddings_descriptions_filename': 'embeddings_descriptions_sbert_pt.parquet',
 'embeddings_reviews_filename': 'embeddings_reviews_sbert_pt.parquet',
 'encoder': 'sage_encoder',
 'epochs': 1,
 'eval_interval': 1,
 'feature_aggr_method': 'concat',
 'feature_linear_dim': 128,
 'heads': 2,
 'hidden_channels': 128,
 'interactions_filename': 'interactions.parquet',
 'item_emb_dim': 128,
 'item_feature_linear': True,
 'kl_beta': 0.5,
 'kl_warmup_epoch': 30,
 'language_filter': True,
 'latent_dim':

3221226356