In [1]:
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import cv2
import warnings
import copy
import random
from keras import layers, models
from keras.utils import to_categorical
from keras.datasets import mnist
from keras.optimizers import Adam
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
!pip install torch_geometric
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
import zipfile
import requests
from io import BytesIO

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.1


In [2]:
# Set device: use GPU if available, otherwise fallback to CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define base path to save trained models and other outputs
save_base_path = "/path/to/save"

In [90]:
url = 'https://files.grouplens.org/datasets/movielens/ml-latest-small.zip'

# Download the dataset
response = requests.get(url)
zip_file = zipfile.ZipFile(BytesIO(response.content))

# Extract the ratings and movies CSV files
ratings = pd.read_csv(zip_file.open('ml-latest-small/ratings.csv'))
movies = pd.read_csv(zip_file.open('ml-latest-small/movies.csv'))

# Preview the datasets
print(ratings.head())
print(movies.head())

   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931
   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  


In [91]:
# Get unique users and items from the ratings dataset
users = ratings['userId'].unique()
items = ratings['movieId'].unique()

# Create mappings from user/item IDs to indices (used for embedding)
user_to_idx = {user: idx for idx, user in enumerate(users)}
item_to_idx = {item: idx for idx, item in enumerate(items)}

# Convert user and item IDs in ratings to indices
ratings['user_idx'] = ratings['userId'].apply(lambda x: user_to_idx[x])
ratings['item_idx'] = ratings['movieId'].apply(lambda x: item_to_idx[x])

In [92]:
print(ratings)

        userId  movieId  rating   timestamp  user_idx  item_idx
0            1        1     4.0   964982703         0         0
1            1        3     4.0   964981247         0         1
2            1        6     4.0   964982224         0         2
3            1       47     5.0   964983815         0         3
4            1       50     5.0   964982931         0         4
...        ...      ...     ...         ...       ...       ...
100831     610   166534     4.0  1493848402       609      3120
100832     610   168248     5.0  1493850091       609      2035
100833     610   168250     5.0  1494273047       609      3121
100834     610   168252     5.0  1493846352       609      1392
100835     610   170875     3.0  1493846415       609      2873

[100836 rows x 6 columns]


In [93]:
print(users.size)
print(items.size)

610
9724


In [94]:
interaction_matrix = ratings.pivot(index='user_idx', columns='item_idx', values='rating').fillna(0)

In [95]:
interaction_array= np.array(interaction_matrix)
print(interaction_array)
print('matrix dimensions : ', interaction_array.shape)

[[4.  4.  4.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]
 [0.  0.  0.  ... 0.  0.  0. ]
 ...
 [2.5 2.  0.  ... 0.  0.  0. ]
 [3.  0.  0.  ... 0.  0.  0. ]
 [5.  0.  5.  ... 3.  3.5 3.5]]
matrix dimensions :  (610, 9724)


In [96]:
class UserItemDataset(Dataset):
    def __init__(self, ratings):
        self.ratings = ratings

    def __len__(self):
        return len(self.ratings)

    def __getitem__(self, idx):
        row = self.ratings.iloc[idx]
        return {
            'user_idx': torch.tensor(row['user_idx'], dtype=torch.long),
            'item_idx': torch.tensor(row['item_idx'], dtype=torch.long),
            'rating': torch.tensor(row['rating'], dtype=torch.float),
        }

# Create train, validation, and test splits (80% train, 10% validation, 10% test)
train_size = int(len(ratings))
val_size = int(0 * len(ratings))
# test_size = len(ratings) - train_size - val_size

train_dataset, val_dataset = torch.utils.data.random_split(ratings, [train_size, val_size])

# Create data loaders for batching
train_loader = DataLoader(UserItemDataset(ratings.iloc[train_dataset.indices]), batch_size=32, shuffle=True)
# val_loader = DataLoader(UserItemDataset(ratings.iloc[val_dataset.indices]), batch_size=32, shuffle= True)
# test_loader = DataLoader(UserItemDataset(ratings.iloc[test_dataset.indices]), batch_size=32, shuffle=False)

In [10]:
class MFModel(nn.Module):
    def __init__(self, num_users, num_items, embedding_size):
        super(MFModel, self).__init__()
        # Create embedding layers for users and items
        self.user_embedding = nn.Embedding(num_users, embedding_size)
        self.item_embedding = nn.Embedding(num_items, embedding_size)

    def forward(self, user_ids, item_ids):
        # Get user and item embeddings
        user_embedding = self.user_embedding(user_ids)
        item_embedding = self.item_embedding(item_ids)
        # Compute the dot product between user and item embeddings
        dot_product = (user_embedding * item_embedding).sum(dim=1)
        return dot_product

# Initialize the model with number of users, items, and the embedding size
num_users = len(users)
num_items = len(items)
embedding_size = 50  # This is a tunable hyperparameter

In [11]:
mf_model1 = MFModel(num_users, num_items, embedding_size).to(device)
optimizer = optim.Adam(mf_model1.parameters(), lr=0.001)  # Adam optimizer
loss_fn = nn.MSELoss()  # Loss function (Mean Squared Error)

In [89]:
def train_mf_model(model, train_loader, optimizer, criterion, num_epochs=10):
    model.train()  # Set the model to training mode
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in train_loader:
            user_ids = batch['user_idx'].to(device)
            item_ids = batch['item_idx'].to(device)
            ratings = batch['rating'].to(device)

            optimizer.zero_grad()  # Zero the gradients
            preds = model(user_ids, item_ids)  # Forward pass
            loss = criterion(preds, ratings)  # Compute loss
            loss.backward()  # Backpropagation
            optimizer.step()  # Gradient descent step
            total_loss += loss.item()  # Accumulate loss

        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / len(train_loader):.4f}')

In [86]:
train_mf_model(mf_model1, train_loader, optimizer, loss_fn)

KeyboardInterrupt: 

In [14]:
with torch.no_grad():
    user_embeddings = mf_model1.user_embedding.weight.cpu().numpy()
    item_embeddings = mf_model1.item_embedding.weight.cpu().numpy()

In [15]:
def create_graph_data(ratings,num_users,user_embeddings,item_embeddings):
    user_item_edges = ratings[['user_idx', 'item_idx']].values.T  # Create edges between user-item pairs

    user_item_edges[1] += num_users

    # Create edge index (format required by torch_geometric)
    edge_index = torch.tensor(user_item_edges, dtype=torch.long)

    # Concatenate user and item embeddings to form node features
    node_features = torch.cat([torch.tensor(user_embeddings, dtype=torch.float), torch.tensor(item_embeddings, dtype=torch.float)], dim=0)

    print(node_features.shape)
    print(user_item_edges.shape)
    print(user_item_edges)

    # Create the PyTorch Geometric data object (x: node features, edge_index: graph edges)
    train_graph_data = Data(x=node_features, edge_index=edge_index)
    return train_graph_data

In [16]:
train_graph_data = create_graph_data(ratings,num_users,user_embeddings,item_embeddings)

torch.Size([10334, 50])
(2, 100836)
[[   0    0    0 ...  609  609  609]
 [ 610  611  612 ... 3731 2002 3483]]


In [17]:
class GCNModel(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCNModel, self).__init__()
        # First graph convolutional layer
        self.conv1 = GCNConv(in_channels, hidden_channels)
        # Second graph convolutional layer
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, data):
        # Forward pass through the first graph convolutional layer
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = torch.relu(x)  # Apply ReLU non-linearity
        # Forward pass through the second graph convolutional layer
        x = self.conv2(x, edge_index)
        return x

In [18]:
model1 = GCNModel(in_channels=embedding_size, hidden_channels=64, out_channels=32).to(device)

In [19]:
gcn_optimizer = optim.Adam(model1.parameters(), lr=0.01)
gcn_loss_fn = nn.MSELoss()

In [20]:
def train_gcn_model(model, train_graph, optimizer, criterion, interaction_matrix,num_epochs=30):
    model.train()  # Set model to training mode
    user_embed=[]
    item_embed=[]
    for epoch in range(num_epochs):
        optimizer.zero_grad()  # Zero the gradients
        output = model(train_graph)  # Forward pass through the GCN


        #print('output dimension',output.shape)
        # Assuming user_idx and item_idx are indices of user-item pairs
        user_indices = ratings['user_idx'].unique()  # Indices for users
        item_indices = ratings['item_idx'].unique()  # Indices for items


        #print('user indices dimension check',user_indices.shape)
        #print('item indices dimension check',item_indices.shape)
        # Get embeddings for the relevant user-item pairs
        user_embeddings = output[user_indices]  # Shape: (N, embedding_size)
        item_embeddings = output[item_indices + num_users]  # Shift by num_users for items

        # Compute predicted ratings
        predicted_ratings = torch.matmul(user_embeddings, item_embeddings.T) # Dot product

        # Get target ratings from interaction matrix
        interaction_tensor = torch.tensor(interaction_matrix.values, dtype=torch.float32)
        target= interaction_tensor
        #target = interaction_tensor[user_indices, item_indices].view(-1)  # Flatten to match

        # Compute loss
        loss = criterion(predicted_ratings, target)
        loss.backward()  # Backpropagation
        optimizer.step()  # Gradient descent step

        user_embed= user_embeddings
        item_embed= item_embeddings

        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}')

    return user_embed, item_embed

In [21]:
user_embeddings, item_embeddings= train_gcn_model(model1, train_graph_data, gcn_optimizer, gcn_loss_fn, interaction_matrix)

Epoch 1/30, Loss: 74.7393
Epoch 2/30, Loss: 5.2746
Epoch 3/30, Loss: 4.8273
Epoch 4/30, Loss: 9.4986
Epoch 5/30, Loss: 10.6388
Epoch 6/30, Loss: 8.3612
Epoch 7/30, Loss: 5.8143
Epoch 8/30, Loss: 3.8968
Epoch 9/30, Loss: 2.6151
Epoch 10/30, Loss: 1.8156
Epoch 11/30, Loss: 1.3357
Epoch 12/30, Loss: 1.0451
Epoch 13/30, Loss: 0.8618
Epoch 14/30, Loss: 0.7418
Epoch 15/30, Loss: 0.6582
Epoch 16/30, Loss: 0.5974
Epoch 17/30, Loss: 0.5513
Epoch 18/30, Loss: 0.5150
Epoch 19/30, Loss: 0.4859
Epoch 20/30, Loss: 0.4620
Epoch 21/30, Loss: 0.4421
Epoch 22/30, Loss: 0.4251
Epoch 23/30, Loss: 0.4102
Epoch 24/30, Loss: 0.3970
Epoch 25/30, Loss: 0.3852
Epoch 26/30, Loss: 0.3745
Epoch 27/30, Loss: 0.3647
Epoch 28/30, Loss: 0.3558
Epoch 29/30, Loss: 0.3476
Epoch 30/30, Loss: 0.3402


In [22]:
print(user_embeddings)
print(user_embeddings.shape)

tensor([[ 0.2572,  0.0737, -0.0401,  ..., -0.1844, -0.3863,  0.1713],
        [ 1.1857,  0.6170, -0.8158,  ..., -1.2879, -0.5507, -0.7424],
        [-0.7403, -0.1380, -0.5928,  ..., -0.9412, -0.9063, -0.3348],
        ...,
        [-0.0207,  0.0797, -0.0270,  ...,  0.0047,  0.0094,  0.0215],
        [ 0.9587, -0.1195, -0.7350,  ...,  0.1593,  0.0339,  0.7046],
        [-0.0322,  0.0967, -0.0094,  ...,  0.0062,  0.0124,  0.0148]],
       grad_fn=<IndexBackward0>)
torch.Size([610, 32])


In [23]:
print(item_embeddings)
print(item_embeddings.shape)
old_item_embeddings= item_embeddings

tensor([[ 0.1911,  1.2820, -0.0076,  ...,  0.2630,  0.0987, -0.3907],
        [ 0.1513,  0.5433,  0.0576,  ..., -0.1745,  0.1631, -0.2930],
        [-0.1279,  0.8484,  0.0539,  ...,  0.4865,  0.1126, -0.7557],
        ...,
        [-0.0319,  0.2110, -0.0112,  ...,  0.0293,  0.0264,  0.0817],
        [ 0.0226,  0.0739, -0.0111,  ...,  0.0253,  0.1290,  0.1384],
        [-0.0733,  0.2177, -0.0644,  ...,  0.0327,  0.0119,  0.0159]],
       grad_fn=<IndexBackward0>)
torch.Size([9724, 32])


In [24]:
def evaluate_gcn_model(model, user_embeddings, item_embeddings, interaction_matrix):
    model.eval()  # Set model to evaluation mode
    with torch.no_grad():

        predicted_ratings = torch.matmul(user_embeddings, item_embeddings.T)
        interaction_tensor = torch.tensor(interaction_matrix.values, dtype=torch.float32)
        target= interaction_tensor

        rmse = np.sqrt(mean_squared_error(target, predicted_ratings))  # Compute RMSE
        print(f'RMSE: {rmse:.4f}')

In [25]:
def predict_new_user_rating(item_embeddings, masked_array, num_users=num_users):
    item_embeddings = item_embeddings.detach().numpy()
    masked_array = np.array(masked_array, dtype=np.float32)

    masked_array = masked_array.reshape(-1, 1)  # Shape: (num_items, 1)
    weighted_sum = np.sum(item_embeddings * masked_array, axis=0)
    sum_of_weights = np.sum(masked_array)
    new_user_embedding = weighted_sum / sum_of_weights
    predicted_ratings = np.dot(item_embeddings, new_user_embedding)

    return predicted_ratings

In [47]:
def prediction_test(num_users, interaction_array, item_embeddings):
    metric=0
    for i in range(len(interaction_array) - num_users, len(interaction_array)):
        normal_test = interaction_array[i]
        non_zero_indices = np.nonzero(normal_test)[0]
        num_values_to_keep = len(non_zero_indices) // 2
        selected_indices = np.random.choice(non_zero_indices, size=num_values_to_keep, replace=False)
        masked_array = np.zeros_like(normal_test)
        masked_array[selected_indices] = normal_test[selected_indices]
        prediction = predict_new_user_rating(item_embeddings, masked_array)
        prediction= np.clip(prediction, 0, 5)
        rmse = np.sqrt(np.mean((prediction - normal_test) ** 2))
        metric+= rmse
    return metric/num_users

In [27]:
evaluate_gcn_model(model1, user_embeddings, item_embeddings,  interaction_matrix)

RMSE: 0.5832


RECNORMAL

In [64]:
print(prediction_test(50, interaction_array, old_item_embeddings))

1.1436645739520044


# PHASE 2

In [100]:
# Encoder: Maps interaction matrix to latent space
class Encoder(nn.Module):
    def __init__(self, input_dim, latent_dim):
        super(Encoder, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, latent_dim),
        )

    def forward(self, x):
        return self.model(x)

In [101]:
# Generator: Generates perturbed interactions
class Generator(nn.Module):
    def __init__(self, latent_dim, output_dim):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(latent_dim, 128),
            nn.ReLU(),
            nn.Linear(128, output_dim),
            nn.Tanh(),  # Output perturbations in range [-1, 1]
        )

    def forward(self, z):
        return self.model(z)

In [102]:
def train_encoder_generator(encoder, generator, interaction_matrix, num_epochs=20, batch_size=61, lr=0.001, lambda_reg=0.1):
    e_optimizer = torch.optim.Adam(encoder.parameters(), lr=lr)
    g_optimizer = torch.optim.Adam(generator.parameters(), lr=lr)
    mse_loss = nn.MSELoss()

    for epoch in range(num_epochs):
        for i in range(0, len(interaction_matrix), batch_size):
            # Get batch of data
            real_data = interaction_matrix[i:i + batch_size]
            batch_size = real_data.size(0)

            # Encode real data to latent space
            latent_real = encoder(real_data)

            # Generate perturbations
            perturbations = generator(latent_real)

            # Create perturbed matrix
            perturbed_data = real_data + perturbations
            perturbed_data = torch.clamp(perturbed_data, 0, 5)  # Clip to valid range [0, 5]

            # Loss: Reconstruction + Regularization
            recon_loss = mse_loss(perturbed_data, real_data)
            reg_loss = lambda_reg * torch.norm(perturbations, p=2)
            loss = recon_loss + reg_loss

            # Backward and optimization
            e_optimizer.zero_grad()
            g_optimizer.zero_grad()
            loss.backward()
            e_optimizer.step()
            g_optimizer.step()

        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item():.4f}")

    return encoder, generator

In [103]:
interaction_matrix= torch.tensor(interaction_array, dtype=torch.float32)
print(interaction_matrix)
interaction_matrix = interaction_matrix.float()

tensor([[4.0000, 4.0000, 4.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [2.5000, 2.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [3.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [5.0000, 0.0000, 5.0000,  ..., 3.0000, 3.5000, 3.5000]])


In [104]:
num_items = interaction_matrix.size(1)
latent_dim = 8

# Initialize encoder and generator
encoder = Encoder(input_dim=num_items, latent_dim=latent_dim)
generator = Generator(latent_dim=latent_dim, output_dim=num_items)

In [105]:
# Train the encoder and generator
encoder, generator = train_encoder_generator(
    encoder, generator, interaction_matrix, num_epochs=10, batch_size=61, lr=0.01, lambda_reg=1
)


Epoch 1/10, Loss: 29.6222
Epoch 2/10, Loss: 8.8403
Epoch 3/10, Loss: 4.0263
Epoch 4/10, Loss: 2.7743
Epoch 5/10, Loss: 1.3858
Epoch 6/10, Loss: 0.7618
Epoch 7/10, Loss: 0.5064
Epoch 8/10, Loss: 0.4296
Epoch 9/10, Loss: 0.3393
Epoch 10/10, Loss: 0.4449


In [106]:
# Generate perturbed interactions
with torch.no_grad():
    latent_real = encoder(interaction_matrix)
    perturbations = generator(latent_real)
    perturbed_matrix = torch.clamp(interaction_matrix + perturbations*10, 0, 5)

print("Original Interaction Matrix:")
print(interaction_matrix)

print("Perturbed Interaction Matrix:")
print(perturbed_matrix)

Original Interaction Matrix:
tensor([[4.0000, 4.0000, 4.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [2.5000, 2.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [3.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [5.0000, 0.0000, 5.0000,  ..., 3.0000, 3.5000, 3.5000]])
Perturbed Interaction Matrix:
tensor([[4.0027e+00, 4.0025e+00, 3.9960e+00,  ..., 5.5158e-03, 1.5042e-02,
         0.0000e+00],
        [2.6638e-03, 2.4732e-03, 0.0000e+00,  ..., 5.5158e-03, 1.5042e-02,
         0.0000e+00],
        [1.3525e-02, 9.0535e-03, 0.0000e+00,  ..., 1.1342e-02, 1.2437e-02,
         1.3604e-03],
        ...,
        [2.5027e+00, 2.0025e+00, 0.0000e+00,  ..., 5.5158e-03, 1.5042e-02,
         0.0000e+00],
        [3.0027e+00, 2.4732e-03, 0.0000e+00,  ..., 5.5158e-03, 1.5042e-02,
         0.0000e+00],
        [5.0000e+00, 2.4732e-03, 4.9960e+00, 

In [107]:
interaction_array_manipulated= np.array(perturbed_matrix)

In [75]:
user_indices, item_indices = np.nonzero(interaction_array_manipulated)  # Get indices of non-zero elements

# Retrieve the corresponding ratings from interaction_array
ratings = interaction_array_manipulated[user_indices, item_indices]

# Create a DataFrame similar to the original ratings DataFrame
reconstructed_ratings = pd.DataFrame({
    'user_idx': user_indices,
    'item_idx': item_indices,
    'rating': ratings
})

# Print to verify
print(reconstructed_ratings)

         user_idx  item_idx    rating
0               0         0  3.995596
1               0         1  3.983047
2               0         2  3.991979
3               0         3  4.974822
4               0         4  5.000000
...           ...       ...       ...
3052220       609      9719  2.527466
3052221       609      9720  4.521444
3052222       609      9721  3.014028
3052223       609      9722  3.470690
3052224       609      9723  3.474229

[3052225 rows x 3 columns]


In [108]:
ratings = reconstructed_ratings

In [109]:
train_size = int(len(ratings))
val_size = int(0 * len(ratings))
train_dataset, val_dataset = torch.utils.data.random_split(ratings, [train_size, val_size])

In [110]:
train_loader = DataLoader(UserItemDataset(ratings.iloc[train_dataset.indices]), batch_size=32, shuffle=True)

In [111]:
users = ratings['user_idx'].unique()
items = ratings['item_idx'].unique()

# Create mappings from user/item IDs to indices (used for embedding)
user_to_idx = {user: idx for idx, user in enumerate(users)}
item_to_idx = {item: idx for idx, item in enumerate(items)}

# Convert user and item IDs in ratings to indices
ratings['user_idx'] = ratings['user_idx'].apply(lambda x: user_to_idx[x])
ratings['item_idx'] = ratings['item_idx'].apply(lambda x: item_to_idx[x])

In [112]:
num_users = len(users)
num_items = len(items)
embedding_size = 50  # This is a tunable hyperparameter

In [113]:
mf_model2 = MFModel(num_users, num_items, embedding_size).to(device)
optimizer = optim.Adam(mf_model2.parameters(), lr=0.001)  # Adam optimizer
loss_fn = nn.MSELoss()  # Loss function (Mean Squared Error)

In [114]:
print(num_users, num_items, mf_model2)

610 9724 MFModel(
  (user_embedding): Embedding(610, 50)
  (item_embedding): Embedding(9724, 50)
)


In [115]:
train_mf_model(mf_model2, train_loader, optimizer, loss_fn)

Epoch 1/10, Loss: 6.0610
Epoch 2/10, Loss: 0.2714


KeyboardInterrupt: 

In [116]:
with torch.no_grad():
    user_embeddings = mf_model2.user_embedding.weight.cpu().numpy()
    item_embeddings = mf_model2.item_embedding.weight.cpu().numpy()

In [117]:
print(user_embeddings.shape)
print(user_embeddings)

(610, 50)
[[-0.16655071 -0.14482775 -0.201875   ...  0.16497223  0.3168738
   0.12480289]
 [-0.01345349 -0.15575585 -0.10927371 ...  0.12519373  0.05076575
  -0.01967353]
 [-0.0124421  -0.01666914  0.00300997 ...  0.03072039  0.00310987
  -0.00123911]
 ...
 [-0.129569    0.19761652 -0.24201405 ...  0.3385542   0.3853156
  -0.14142798]
 [ 0.07561697 -0.15028955 -0.08454427 ...  0.08043491  0.02444841
   0.16111022]
 [-0.49085128  0.34762907 -0.22968818 ...  0.42147285  0.3863847
  -0.16555476]]


In [118]:
train_graph_data = create_graph_data(ratings,num_users,user_embeddings,item_embeddings)

torch.Size([10334, 50])
(2, 3052225)
[[    0     0     0 ...   609   609   609]
 [  610   611   612 ...  5639 10332 10333]]


In [119]:
model2 = GCNModel(in_channels=embedding_size, hidden_channels=64, out_channels=32).to(device)
gcn_optimizer = optim.Adam(model2.parameters(), lr=0.01)
gcn_loss_fn = nn.MSELoss()

In [120]:
interaction_matrix_manipulated = ratings.pivot(index='user_idx', columns='item_idx', values='rating').fillna(0)

In [121]:
user_embeddings, item_embeddings= train_gcn_model(model2, train_graph_data, gcn_optimizer, gcn_loss_fn, interaction_matrix_manipulated)

Epoch 1/30, Loss: 10.6492
Epoch 2/30, Loss: 0.3536
Epoch 3/30, Loss: 1.5777
Epoch 4/30, Loss: 2.0957
Epoch 5/30, Loss: 1.0939
Epoch 6/30, Loss: 0.5137
Epoch 7/30, Loss: 0.3128
Epoch 8/30, Loss: 0.2505
Epoch 9/30, Loss: 0.2304
Epoch 10/30, Loss: 0.2234
Epoch 11/30, Loss: 0.2208
Epoch 12/30, Loss: 0.2198
Epoch 13/30, Loss: 0.2196
Epoch 14/30, Loss: 0.2196
Epoch 15/30, Loss: 0.2197
Epoch 16/30, Loss: 0.2198
Epoch 17/30, Loss: 0.2199
Epoch 18/30, Loss: 0.2200
Epoch 19/30, Loss: 0.2200
Epoch 20/30, Loss: 0.2200
Epoch 21/30, Loss: 0.2199
Epoch 22/30, Loss: 0.2199
Epoch 23/30, Loss: 0.2197
Epoch 24/30, Loss: 0.2196
Epoch 25/30, Loss: 0.2195
Epoch 26/30, Loss: 0.2193
Epoch 27/30, Loss: 0.2191
Epoch 28/30, Loss: 0.2190
Epoch 29/30, Loss: 0.2188
Epoch 30/30, Loss: 0.2186


In [122]:
evaluate_gcn_model(model2, user_embeddings, item_embeddings,  interaction_matrix_manipulated)

RMSE: 0.4676


RECSECURE

In [123]:
print(prediction_test(50, interaction_array_manipulated, item_embeddings))

0.7436637020111084


RECMANI

In [130]:
print(prediction_test(50, interaction_array_manipulated, old_item_embeddings))

1.125307434797287
