In [None]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F

from tqdm import tqdm_notebook

# Model Structure

In [None]:
class MF(nn.Module):
    def __init__(self, dim, num_users, num_items):
        super(MF, self).__init__()
        self.user_embeddings = nn.Embedding(num_users+1, dim)
        self.item_embeddings = nn.Embedding(num_items+1, dim)
        self.linear  = nn.Linear(dim, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, user_id, item_id):
        user_embedding = self.user_embeddings(user_id)
        item_embedding = self.item_embeddings(item_id)
        
        cross_value = user_embedding * item_embedding
        return cross_value
    
class MLP(nn.Module):
    def __init__(self, dim, num_users, num_items, layer_sizes):
        super(MLP, self).__init__()
        self.user_embeddings = nn.Embedding(num_users+1, dim)
        self.item_embeddings = nn.Embedding(num_items+1, dim)
        
        self.linears = []
        prev_size = dim * 2
        for layer in layer_sizes:
            self.linears.append(nn.Linear(prev_size, layer))
            prev_size = layer
    
        self.relu = nn.ReLU()
    def forward(self, user_id, item_id):
        user_embedding = self.user_embeddings(user_id)
        item_embedding = self.item_embeddings(item_id)
        
        input_embedding = torch.cat((user_embedding, item_embedding), 2)
        for linear in self.linears:
            input_embedding = linear(input_embedding)
            input_embedding = self.relu(input_embedding)
        return input_embedding
    
class NCF(nn.Module):
    def __init__(self, num_users, num_items, MF_dim, MLP_dim, MLP_layers):
        super(NCF,self).__init__()
        self.mf = MF(MF_dim, num_users, num_items)
        self.mlp = MLP(MLP_dim, num_users, num_items, MLP_layers)
        
        self.linear = nn.Linear(MF_dim + MLP_layers[-1], 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, user_id, item_id):
        MF_features = self.mf(user_id, item_id)
        MLP_features = self.mlp(user_id, item_id)
        
        input_features = torch.cat((MF_features, MLP_features), 2)
        X = self.linear(input_features)
        return self.sigmoid(X)

# Data

In [None]:
movie_1m_ratings = pd.read_csv("Data/ml-1m/ratings.dat", sep="::", names=['user_id', 'item_id', 'rating', 'timestamp'], engine="python")
num_users = movie_1m_ratings.user_id.unique().shape[0]
num_movies = movie_1m_ratings.item_id.unique().shape[0]

In [None]:
sparsity = 1 - len(movie_1m_ratings) / (num_users * num_movies)
print("number of users: {}, number of movies: {}, sparsity: {}".format(num_users, num_movies, sparsity))
movie_1m_ratings.head(5)

In [None]:
valid_user = []
for i, satisfied in enumerate(movie_1m_ratings.groupby("user_id").size() > 20):
    if satisfied:
        valid_user.append(i+1)

In [None]:
valid_ratings = movie_1m_ratings[movie_1m_ratings.user_id.isin(valid_user)]
valid_ratings

In [None]:
X, y
for rating in valid_ratings.iterrows():
    print(rating[1])

In [None]:
rating[1]["user_id"]