In [1]:
import pandas as pd
import torch
import torch.nn as nn
from sklearn import model_selection
import numpy as np
torch.manual_seed(42)

<torch._C.Generator at 0x18e63534a90>

In [2]:
data = pd.read_csv("ml-100k/u.data",sep="\t", header=None)
data.columns = ['user id', 'movie id', 'rating', 'timestamp']
data['movie id'].unique

<bound method Series.unique of 0         242
1         302
2         377
3          51
4         346
         ... 
99995     476
99996     204
99997    1090
99998     225
99999     203
Name: movie id, Length: 100000, dtype: int64>

In [3]:
movies = pd.read_csv("ml-100k/u.item",
                    sep="|", encoding='latin-1', header=None)
movies.columns = ['movie id', 'movie title' ,'release date','video release date', 'IMDb URL',
                 'unknown', 'Action', 'Adventure', 'Animation', 'Children\'s', 'Comedy',
                 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
                 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
movies.head()

Unnamed: 0,movie id,movie title,release date,video release date,IMDb URL,unknown,Action,Adventure,Animation,Children's,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [4]:
map_id_movie = {}
for id,row in movies.iterrows():
  map_id_movie[row['movie id']] = row['movie title']
map_id_movie[1398]

'Anna (1996)'

In [5]:
num_users = data['user id'].nunique()
num_movies = data['movie id'].nunique()
print(
    (f"Number of users: {num_users}\n"
    f"Number of movies: {num_movies}")
)

Number of users: 943
Number of movies: 1682


In [6]:
class NonLinearModel(nn.Module):
    def __init__(self, user_embedding_size, movie_embedding_size, hidden_size):
        super(NonLinearModel, self).__init__()

        self.user_embedding_layer = nn.Linear(user_embedding_size, hidden_size)
        self.movie_embedding_layer = nn.Linear(movie_embedding_size, hidden_size)
        self.activation = nn.ReLU()
        self.output_layer = nn.Linear(hidden_size, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, user_embedding, movie_embedding):
        user_hidden = self.activation(self.user_embedding_layer(user_embedding))
        movie_hidden = self.activation(self.movie_embedding_layer(movie_embedding))

        # Element-wise multiplication
        elementwise_mult = torch.mul(user_hidden, movie_hidden)

        # Feed through the output layer
        output = self.output_layer(elementwise_mult)

        # Apply sigmoid activation for binary classification
        output = self.sigmoid(output)

        return output*5

In [7]:
data_train,data_test = model_selection.train_test_split(data, test_size = 0.1,random_state = 42,stratify = data['rating'])
data_test

Unnamed: 0,user id,movie id,rating,timestamp
24188,299,88,3,889502902
14023,347,462,2,881654359
20170,96,185,5,884403866
87853,880,302,5,880166451
8174,177,289,2,880130534
...,...,...,...,...
18733,43,215,5,883955467
83494,860,516,3,885991040
36379,313,484,5,891016193
17294,112,286,4,884992484


In [8]:
class MF(nn.Module):
  def __init__(self,users,movie_ids,user_embed_size,movie_embed_size):
    super(MF,self).__init__()
    self.user_embed = nn.Embedding(users,user_embed_size)
    self.user_bias = nn.Embedding(users, 1)
    self.movie_embed = nn.Embedding(movie_ids,movie_embed_size)
    self.movie_bias = nn.Embedding(movie_ids,1)
    self.user_embed.weight.data.uniform_(0,0.05)
    self.movie_embed.weight.data.uniform_(0,0.05)
    self.user_bias.weight.data.uniform_(-0.01,0.01)
    self.movie_bias.weight.data.uniform_(-0.01,0.01)
    self.nonlinear = NonLinearModel(user_embed_size,movie_embed_size,64)


  def forward(self,u,v):
    U = self.user_embed(u)
    V = self.movie_embed(v)
    return self.nonlinear(U,V)


In [9]:
def train_epochs(model,lr,epochs,Nueralnet = False):
  optimizer = torch.optim.Adam(model.parameters(), lr=lr)
  loss_fn = nn.MSELoss()
  for t in range(epochs):
    model.train()
    users = torch.LongTensor(data_train['user id'].to_numpy()) # .cuda()
    items = torch.LongTensor(data_train['movie id'].to_numpy()) #.cuda()
    if Nueralnet:
        ratings = torch.FloatTensor(data_train['rating'].to_numpy())/5
        ratings = ratings.unsqueeze(1)
    else:
        ratings = torch.FloatTensor(data_train['rating'].to_numpy()) #.cuda()
    y_pred = model(users,items)
    loss_train = loss_fn(y_pred.squeeze(),ratings)

    # Backpropagation
    optimizer.zero_grad()
    loss_train.backward()
    optimizer.step()

    model.eval()
    with torch.no_grad():
        test_users = torch.LongTensor(data_test['user id'].to_numpy())
        test_items = torch.LongTensor(data_test['movie id'].to_numpy())
        if Nueralnet:
            test_ratings = torch.FloatTensor(data_test['rating'].to_numpy())/5
            test_ratings=test_ratings.unsqueeze(1)
        else:
            test_ratings = torch.FloatTensor(data_test['rating'].to_numpy())
        pred_test = model(test_users,test_items)
        loss_test = loss_fn(pred_test.squeeze(), test_ratings)

    print(f"Epoch {t+1} -- Train loss: {loss_train:>7f} Test loss: {loss_test:>7f}")


In [10]:
model = MF(num_users+1, num_movies+1,user_embed_size=200,movie_embed_size=200)
train_epochs(model,0.04,60)
print("Model trained")


Epoch 1 -- Train loss: 2.559910 Test loss: 2.201903
Epoch 2 -- Train loss: 2.201351 Test loss: 1.191207
Epoch 3 -- Train loss: 1.155521 Test loss: 1.234888
Epoch 4 -- Train loss: 1.167054 Test loss: 1.270009
Epoch 5 -- Train loss: 1.228638 Test loss: 1.123796
Epoch 6 -- Train loss: 1.070476 Test loss: 1.052099
Epoch 7 -- Train loss: 0.979383 Test loss: 1.137652
Epoch 8 -- Train loss: 1.052339 Test loss: 1.083558
Epoch 9 -- Train loss: 0.996821 Test loss: 0.970891
Epoch 10 -- Train loss: 0.886993 Test loss: 0.978370
Epoch 11 -- Train loss: 0.912830 Test loss: 1.011412
Epoch 12 -- Train loss: 0.948133 Test loss: 0.963189
Epoch 13 -- Train loss: 0.896982 Test loss: 0.923383
Epoch 14 -- Train loss: 0.837811 Test loss: 0.958558
Epoch 15 -- Train loss: 0.854956 Test loss: 0.968678
Epoch 16 -- Train loss: 0.861958 Test loss: 0.927036
Epoch 17 -- Train loss: 0.823618 Test loss: 0.904143
Epoch 18 -- Train loss: 0.810499 Test loss: 0.905083
Epoch 19 -- Train loss: 0.807296 Test loss: 0.907601
Ep

In [11]:
user_id = input("Enter the user_id for movie recommendations: ")
movie_list = data[data['user id'] == int(user_id)]['movie id'].to_list()
unseen_list = [int(x) for x in range(1,num_movies+1) if x not in movie_list]
unseen_list = np.array(unseen_list)
temp_x = torch.LongTensor(np.full(np.size(unseen_list),int(user_id)))
unseen_mov = torch.LongTensor(unseen_list)


In [12]:
model.eval()
predictions_MF = model(temp_x,unseen_mov)
print(predictions_MF)
with torch.no_grad():
  sorted_values, sorted_indices = torch.sort(predictions_MF, dim=0, descending=True)
  ordered_movies = unseen_mov[sorted_indices]
  ordered_ratings = predictions_MF.squeeze()[sorted_indices]
  print(ordered_ratings)
  i=0
  print("-------------Recommended movies by MF--------------")
  for xyz in ordered_movies[:10]:
    print(map_id_movie[xyz.item()],str(ordered_ratings[i].item())[:4])
    i=i+1

tensor([[4.9070],
        [4.6206],
        [4.5668],
        ...,
        [2.9324],
        [4.1046],
        [3.5053]], grad_fn=<MulBackward0>)
tensor([[5.0000],
        [5.0000],
        [5.0000],
        ...,
        [0.1975],
        [0.1090],
        [0.0958]])
-------------Recommended movies by MF--------------
Santa with Muscles (1996) 4.99
Perfect Candidate, A (1996) 4.99
Some Mother's Son (1996) 4.99
Lamerica (1994) 4.99
Sliding Doors (1998) 4.99
Butcher Boy, The (1998) 4.99
Butcher Boy, The (1998) 4.99
Brothers in Trouble (1995) 4.99
Spanish Prisoner, The (1997) 4.99
Bitter Sugar (Azucar Amargo) (1996) 4.99
