In [None]:
import torch
from torch import Tensor
print(torch.__version__)

# Install required packages.
import os
os.environ['TORCH'] = torch.__version__

!pip install torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install pyg-lib -f https://data.pyg.org/whl/nightly/torch-${TORCH}.html
!pip install git+https://github.com/pyg-team/pytorch_geometric.git
!pip install sentence-transformers

In [None]:
import os

import torch
import torch_geometric
import torch_geometric.transforms as T
from torch_geometric.datasets import MovieLens
from torch_geometric.loader import LinkNeighborLoader
import networkx as nx



In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
dataset = MovieLens(os.getcwd(), model_name='all-MiniLM-L6-v2')

In [None]:
data = dataset[0]

# Add one-hot encoded vectors to identify users
data['user'].x = torch.eye(data['user'].num_nodes)
del data['user'].num_nodes
data['movie'].node_id = torch.arange(9742)

data = T.ToUndirected()(data)
del data['movie', 'rev_rates', 'user'].edge_label  # Reverse labels are not needed

print(data['user', 'rates', 'movie'].edge_index)

In [None]:
# Run this to permutate the data with a random permutation matrix

import numpy as np

permutation = False

if permutation:

    user_permutation = np.eye(610)
    movie_permutation = np.eye(9742)

    user_permutation = np.random.permutation(user_permutation)
    movie_permutation = np.random.permutation(movie_permutation)

    user_origin = np.argmax(user_permutation, axis=1)
    movie_origin = np.argmax(movie_permutation, axis=1)

    user_permutation_mapping = {}
    for i, s in enumerate(user_origin):
        user_permutation_mapping[s] = i

    movie_permutation_mapping = {}
    for i, s in enumerate(movie_origin):
        movie_permutation_mapping[s] = i

    for i in range(100836):
        o = data['user', 'rates', 'movie'].edge_index[0][i]
        data['user', 'rates', 'movie'].edge_index[0][i] = user_permutation_mapping[int(o.numpy())]

        o = data['user', 'rates', 'movie'].edge_index[1][i]
        data['user', 'rates', 'movie'].edge_index[1][i] = movie_permutation_mapping[int(o.numpy())]

        o = data['movie', 'rev_rates', 'user'].edge_index[0][i]
        data['movie', 'rev_rates', 'user'].edge_index[0][i] = movie_permutation_mapping[int(o.numpy())]

        o = data['movie', 'rev_rates', 'user'].edge_index[1][i]
        data['movie', 'rev_rates', 'user'].edge_index[1][i] = user_permutation_mapping[int(o.numpy())]

print(data['user', 'rates', 'movie'].edge_index)

tensor([[ 434,  434,  434,  ...,  388,  388,  388],
        [ 556, 8763, 9587,  ..., 2138, 3439, 8182]])


In [None]:
data = data.to(device)

In [None]:
print(data)

HeteroData(
  [1mmovie[0m={
    x=[9742, 404],
    node_id=[9742]
  },
  [1muser[0m={ x=[610, 610] },
  [1m(user, rates, movie)[0m={
    edge_index=[2, 100836],
    edge_label=[100836]
  },
  [1m(movie, rev_rates, user)[0m={ edge_index=[2, 100836] }
)


In [None]:
train_data, val_data, test_data = T.RandomLinkSplit(
    num_val=0.2,
    num_test=0.2,
    neg_sampling_ratio=0.0,
    edge_types=[('user', 'rates', 'movie')],
    rev_edge_types=[('movie', 'rev_rates', 'user')],
    is_undirected=True,
    key="edge_label"
)(data)

print(data['user', 'rates', 'movie'].edge_label.unique(return_counts=True))

print(test_data['user', 'rates', 'movie'].edge_label.unique(return_counts=True))

print(train_data)
print(test_data)
print(val_data)

(tensor([0, 1, 2, 3, 4, 5], device='cuda:0'), tensor([ 1370,  4602, 13101, 33183, 35369, 13211], device='cuda:0'))
(tensor([0, 1, 2, 3, 4, 5], device='cuda:0'), tensor([ 285,  940, 2626, 6695, 6997, 2624], device='cuda:0'))
HeteroData(
  [1mmovie[0m={
    x=[9742, 404],
    node_id=[9742]
  },
  [1muser[0m={ x=[610, 610] },
  [1m(user, rates, movie)[0m={
    edge_index=[2, 60502],
    edge_label=[60502],
    edge_label_index=[2, 60502]
  },
  [1m(movie, rev_rates, user)[0m={ edge_index=[2, 60502] }
)
HeteroData(
  [1mmovie[0m={
    x=[9742, 404],
    node_id=[9742]
  },
  [1muser[0m={ x=[610, 610] },
  [1m(user, rates, movie)[0m={
    edge_index=[2, 80669],
    edge_label=[20167],
    edge_label_index=[2, 20167]
  },
  [1m(movie, rev_rates, user)[0m={ edge_index=[2, 80669] }
)
HeteroData(
  [1mmovie[0m={
    x=[9742, 404],
    node_id=[9742]
  },
  [1muser[0m={ x=[610, 610] },
  [1m(user, rates, movie)[0m={
    edge_index=[2, 60502],
    edge_label=[20167],
    ed

In [None]:
from torch_geometric.nn import SAGEConv, to_hetero, GATConv, SAGEConv
from torch.nn import Linear, Dropout
from torch_geometric.nn.aggr import MultiAggregation, MaxAggregation, MinAggregation, MeanAggregation, SumAggregation, StdAggregation


class Encoder(torch.nn.Module):

    def __init__(self, hidden_dimension):
        super().__init__()

        mode_kwargs = {
            "in_channels": -1,
            "out_channels": hidden_dimension,
            "num_heads": 1,
        }

        # Different methods of combining the aggregators

        # self.conv1 = SAGEConv((-1, -1), hidden_dimension, aggr='add', normalize=True)
        # self.conv2 = SAGEConv((-1, -1), hidden_dimension, aggr='add', normalize=True)
        # self.conv3 = SAGEConv((-1, -1), hidden_dimension, aggr='add', normalize=True)

        # self.conv1 = SAGEConv((-1, -1), hidden_dimension, aggr=MultiAggregation(aggrs=[MeanAggregation(), SumAggregation()]), normalize=True)
        # self.conv2 = SAGEConv((-1, -1), hidden_dimension, aggr=MultiAggregation(aggrs=[MeanAggregation(), SumAggregation()]), normalize=True)
        # self.conv3 = SAGEConv((-1, -1), hidden_dimension, aggr=MultiAggregation(aggrs=[MeanAggregation(), SumAggregation()]), normalize=True)

        # self.conv1 = SAGEConv((-1, -1), hidden_dimension, aggr=MultiAggregation(aggrs=[MeanAggregation(), MinAggregation(), MaxAggregation(), StdAggregation()]))
        # self.conv2 = SAGEConv((-1, -1), hidden_dimension, aggr=MultiAggregation(aggrs=[MeanAggregation(), MinAggregation(), MaxAggregation(), StdAggregation()]))
        # self.conv3 = SAGEConv((-1, -1), hidden_dimension, aggr=MultiAggregation(aggrs=[MeanAggregation(), MinAggregation(), MaxAggregation(), StdAggregation()]))

        self.conv1 = SAGEConv((-1, -1), hidden_dimension, aggr=MultiAggregation(aggrs=[MeanAggregation(), StdAggregation()], mode='attn', mode_kwargs=mode_kwargs))
        self.conv2 = SAGEConv((-1, -1), hidden_dimension, aggr=MultiAggregation(aggrs=[MeanAggregation(), StdAggregation()], mode='attn', mode_kwargs=mode_kwargs))
        self.conv3 = SAGEConv((-1, -1), hidden_dimension, aggr=MultiAggregation(aggrs=[MeanAggregation(), StdAggregation()], mode='attn', mode_kwargs=mode_kwargs))

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = self.conv2(x, edge_index)
        x = self.conv3(x, edge_index)
        return x


class RatingClassifier(torch.nn.Module):


    def __init__(self, hidden_dimension):
        super().__init__()

        self.linear1 = Linear(2 * hidden_dimension, 64)
        self.linear2 = Linear(64, 1)
        self.dropout1 = Dropout(p=0.3)


    def forward(self, x, edge_label_index):

        row_indices = edge_label_index[0]
        column_indices = edge_label_index[1]

        concat_features = torch.cat([x['user'][row_indices], x['movie'][column_indices]], dim=-1)

        concat_features = self.dropout1(self.linear1(concat_features).relu())
        concat_features = self.linear2(concat_features)

        return concat_features.view(-1)


class RecommenderGNN(torch.nn.Module):
    def __init__(self, hidden_dimensions):
        super().__init__()

        self.movie_embedding = torch.nn.Linear(404, hidden_dimensions)
        self.user_embedding = torch.nn.Linear(610, hidden_dimensions)
        self.movie_id_embedding = torch.nn.Embedding(data['movie'].num_nodes, hidden_dimensions)

        self.encoder = Encoder(hidden_dimensions)
        self.encoder = to_hetero(self.encoder, metadata=data.metadata())

        self.classifier = RatingClassifier(hidden_dimensions)

    def forward(self, data):

        user_embedding = self.user_embedding(data['user'].x)
        movie_embedding = self.movie_embedding(data['movie'].x)
        movie_id_embedding = self.movie_id_embedding(data['movie'].node_id)

        x_dict = {
          "user": user_embedding,
          "movie": movie_embedding + movie_id_embedding
        } 

        x_dict = self.encoder(x_dict, data.edge_index_dict)

        pred = self.classifier(x_dict, data["user", "rates", "movie"].edge_label_index,)

        return pred

model = RecommenderGNN(16).to(device)

with torch.no_grad():
    model(train_data)

In [None]:
def train(model, data, loss_func, optimizer):
    data = data.to(device)
    model.train()
    optimizer.zero_grad()
    prediction = model(data)
    target = data['user', 'movie'].edge_label.float()
    loss = loss_func(prediction, target)
    loss.backward()
    optimizer.step()
    return float(loss.sqrt())

In [None]:
def evaluate(model, data):
    model.eval()
    with torch.no_grad():
        data = data.to(device)
        prediction = model(data)
        prediction = prediction.clamp(min=0, max=5)
        target = data['user', 'movie'].edge_label.float()
        rmse = torch.nn.functional.mse_loss(prediction, target).sqrt()
        return rmse


In [None]:

for D in [16, 32, 64]:

    model = RecommenderGNN(D)
    model.to(device)

    loss_func = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    with torch.no_grad():  # lazy initialization of dimensions
        model(train_data)

    min_train_loss = 100
    min_val_loss = 100
    min_test_loss = 100

    print()
    print(f"Dimension {D}")


    for epoch in range(1, 401):

        train_loss = train(model, train_data, loss_func, optimizer)
        val_loss = evaluate(model, val_data)
        test_loss = evaluate(model, test_data)

        if val_loss < min_val_loss:
            min_train_loss = min(min_train_loss, train_loss)
            min_test_loss = min(min_test_loss, test_loss)
            min_val_loss = min(min_val_loss, val_loss)

        if epoch % 20 == 0:
            print(f"Epoch {epoch}  Train: {min_train_loss:.3f}   Val: {min_val_loss:.3f}   Test: {min_test_loss:.3f}")





Dimension 16
Epoch 20  Train: 1.524   Val: 1.315   Test: 1.307
Epoch 40  Train: 1.262   Val: 1.093   Test: 1.103
Epoch 60  Train: 1.208   Val: 1.085   Test: 1.094
Epoch 80  Train: 1.190   Val: 1.081   Test: 1.089
Epoch 100  Train: 1.175   Val: 1.079   Test: 1.086
Epoch 120  Train: 1.157   Val: 1.070   Test: 1.077
Epoch 140  Train: 1.139   Val: 1.058   Test: 1.065
Epoch 160  Train: 1.119   Val: 1.042   Test: 1.049
Epoch 180  Train: 1.093   Val: 1.022   Test: 1.030
Epoch 200  Train: 1.071   Val: 1.004   Test: 1.011
Epoch 220  Train: 1.049   Val: 0.992   Test: 1.000
Epoch 240  Train: 1.040   Val: 0.988   Test: 0.995
Epoch 260  Train: 1.032   Val: 0.986   Test: 0.993
Epoch 280  Train: 1.029   Val: 0.986   Test: 0.993
Epoch 300  Train: 1.021   Val: 0.986   Test: 0.992
Epoch 320  Train: 1.014   Val: 0.984   Test: 0.990
Epoch 340  Train: 1.008   Val: 0.981   Test: 0.986
Epoch 360  Train: 0.998   Val: 0.978   Test: 0.983
Epoch 380  Train: 0.988   Val: 0.975   Test: 0.979
Epoch 400  Train: 0.9