In [1]:
# import required modules
import random
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import model_selection, metrics, preprocessing
import copy

import torch
from torch import nn, optim, Tensor

from torch_sparse import SparseTensor, matmul

from collections import defaultdict

from torch_geometric.utils import structured_negative_sampling
from torch_geometric.data import download_url, extract_zip
from torch_geometric.nn.conv.gcn_conv import gcn_norm
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.typing import Adj
import torch.nn.functional as F

In [6]:
from torch.utils.data import Dataset, DataLoader
    # DATA: wget https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
    # NOTE: The whole data pipeline can be automated (e.g., use requests)
df = pd.read_csv("ml-latest-small/ratings.csv")
df.drop("timestamp", axis=1, inplace=True)

# Normalize ratings
rating, min_rating, max_rating = df["rating"], df["rating"].min(), df["rating"].max()
df["rating"] = (rating - min_rating) / (max_rating - min_rating)
print(f"rating is from {df['rating'].min()} to {df['rating'].max()}")

# Do not recommend if the rating is less than 0.5
cond = df["rating"] < 0.5
df["rating"].where(cond, 0, inplace=True)
df["rating"].where(~cond, 1, inplace=True)

enc_movie = {movie_id: idx for idx, movie_id in enumerate(df["movieId"].unique())}
df["movieId"] = [enc_movie[movie_id] for movie_id in df["movieId"]]
print(f"movieId is from {df['movieId'].min()} to {df['movieId'].max()}")

enc_user = {user_id: idx for idx, user_id in enumerate(df["userId"].unique())}
df["userId"] = [enc_user[user_id] for user_id in df["userId"]]
print(f"userId is from {df['userId'].min()} to {df['userId'].max()}")

# PyTorch dataset
class MovieLensSmall(Dataset):
    def __init__(self, df):
        self.df = df
    def __len__(self) :
        return len(self.df)
    def __getitem__(self, idx):
        return list(df.iloc[idx])

train_dataloader = torch.utils.data.DataLoader(MovieLensSmall(df),batch_size=4,shuffle=True,num_workers=8)

rating is from 0.0 to 1.0
movieId is from 0 to 9723
userId is from 0 to 609


In [None]:
class MFModel(nn.Module):
    def __init__(self,num_users:int,num_items:int,embedding_dim:int):
        self.user_embedding = nn.Embedding(num_users,embedding_dim)
        self.item_embedding = nn.Embedding(num_items,embedding_dim)
        self.user_embedding.weight.data.uniform_(0,0.05)
        self.item_embedding.weight.data.uniform_(0,0.05)
    def forward(self,user_indices,item_indices):
        user_embeddings = self.user_embedding(user_indices)
        item_embeddings = self.item_embedding(item_indices)
        out=(user_embeddings*item_embeddings).sum(dim=1)
        return out
        
        
        
    
