In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import torch
from torch_geometric.data import Data


In [3]:

# Load datasets
books = pd.read_csv('data/Books.csv')
ratings = pd.read_csv('data/Ratings.csv')
users = pd.read_csv('data/Users.csv')

# Encode User-ID and ISBN to numerical values
user_encoder = LabelEncoder()
book_encoder = LabelEncoder()


  books = pd.read_csv('data/Books.csv')


In [4]:

ratings['User-ID'] = user_encoder.fit_transform(ratings['User-ID'])
ratings['ISBN'] = book_encoder.fit_transform(ratings['ISBN'])

# Create node feature matrices (dummy features for now)
num_users = ratings['User-ID'].max() + 1
num_books = ratings['ISBN'].max() + 1

# Features: Initialize dummy features (1 for all nodes)
user_features = torch.ones((num_users, 1))
book_features = torch.ones((num_books, 1))


In [5]:

# Combine user and book features
x = torch.cat([user_features, book_features], dim=0)

# Create edge index and edge attributes (ratings as weights)
user_indices = torch.tensor(ratings['User-ID'].values, dtype=torch.long)
book_indices = torch.tensor(ratings['ISBN'].values + num_users, dtype=torch.long)  # Offset book indices
edge_index = torch.stack([user_indices, book_indices], dim=0)

# Edge weights: Ratings normalized between 0 and 1
edge_attr = torch.tensor(ratings['Book-Rating'].values / ratings['Book-Rating'].max(), dtype=torch.float)

# Build PyTorch Geometric graph
data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr)
print(data)


Data(x=[445839, 1], edge_index=[2, 1149780], edge_attr=[1149780])


In [6]:
import torch.nn as nn
from torch_geometric.nn import GCNConv

class BookGNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(BookGNN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, x, edge_index, edge_attr):
        x = self.conv1(x, edge_index, edge_weight=edge_attr)
        x = torch.relu(x)
        x = self.conv2(x, edge_index, edge_weight=edge_attr)
        return x


In [7]:
from torch.optim import Adam

# Define the model, loss, and optimizer
input_dim = 1  # Initial feature size
hidden_dim = 64
output_dim = 16  # Embedding size

model = BookGNN(input_dim, hidden_dim, output_dim)
optimizer = Adam(model.parameters(), lr=0.01)
loss_fn = nn.MSELoss()


In [9]:

# Training loop
epochs = 100
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    
    # Forward pass
    out = model(data.x, data.edge_index, data.edge_attr)
    
    # Compute loss (for example, reconstructing ratings)
    # Here we need to map predictions back to edges and compare with true ratings
    user_embeddings = out[data.edge_index[0]]
    book_embeddings = out[data.edge_index[1]]
    predicted_ratings = torch.sum(user_embeddings * book_embeddings, dim=1)
    loss = loss_fn(predicted_ratings, data.edge_attr)
    
    loss.backward()
    optimizer.step()
    
    print(f'Epoch {epoch + 1}, Loss: {loss.item()}')


Epoch 1, Loss: 4.79559326171875
Epoch 2, Loss: 0.606834352016449
Epoch 3, Loss: 0.22045959532260895
Epoch 4, Loss: 0.20439378917217255
Epoch 5, Loss: 0.2571750581264496
Epoch 6, Loss: 0.343279093503952
Epoch 7, Loss: 0.39156877994537354
Epoch 8, Loss: 0.37562716007232666
Epoch 9, Loss: 0.3234677314758301
Epoch 10, Loss: 0.27986496686935425
Epoch 11, Loss: 0.24868036806583405
Epoch 12, Loss: 0.23002441227436066
Epoch 13, Loss: 0.21993345022201538
Epoch 14, Loss: 0.21443592011928558
Epoch 15, Loss: 0.21061564981937408
Epoch 16, Loss: 0.2066454291343689
Epoch 17, Loss: 0.2017355114221573
Epoch 18, Loss: 0.19597670435905457
Epoch 19, Loss: 0.18998436629772186
Epoch 20, Loss: 0.1844744235277176
Epoch 21, Loss: 0.17996174097061157
Epoch 22, Loss: 0.1766582429409027
Epoch 23, Loss: 0.1745181381702423
Epoch 24, Loss: 0.1733422428369522
Epoch 25, Loss: 0.1728324443101883
Epoch 26, Loss: 0.17281672358512878
Epoch 27, Loss: 0.17308466136455536
Epoch 28, Loss: 0.17346632480621338
Epoch 29, Loss: 0

In [10]:
import time

modelName = 'book_gnn_model-' + str(int(time.time())) + '.pth'
torch.save(model.state_dict(), 'models/' + modelName)


In [11]:
# Test the model
model.eval()

BookGNN(
  (conv1): GCNConv(1, 64)
  (conv2): GCNConv(64, 16)
)