In [1]:
import pandas as pd
import torch
import torch.nn as nn
from sklearn import model_selection
import numpy as np
torch.manual_seed(42)

<torch._C.Generator at 0x1aa2fb94a90>

In [2]:
data = pd.read_csv("ml-100k/u.data",sep="\t", header=None)
data.columns = ['user id', 'movie id', 'rating', 'timestamp']
data['movie id'].unique

<bound method Series.unique of 0         242
1         302
2         377
3          51
4         346
         ... 
99995     476
99996     204
99997    1090
99998     225
99999     203
Name: movie id, Length: 100000, dtype: int64>

In [3]:
movies = pd.read_csv("ml-100k/u.item",
                    sep="|", encoding='latin-1', header=None)
movies.columns = ['movie id', 'movie title' ,'release date','video release date', 'IMDb URL',
                 'unknown', 'Action', 'Adventure', 'Animation', 'Children\'s', 'Comedy',
                 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
                 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
movies.head()

Unnamed: 0,movie id,movie title,release date,video release date,IMDb URL,unknown,Action,Adventure,Animation,Children's,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [4]:
map_id_movie = {}
for id,row in movies.iterrows():
  map_id_movie[row['movie id']] = row['movie title']
map_id_movie[1398]

'Anna (1996)'

In [5]:
num_users = data['user id'].nunique()
num_movies = data['movie id'].nunique()
print(
    (f"Number of users: {num_users}\n"
    f"Number of movies: {num_movies}")
)

Number of users: 943
Number of movies: 1682


In [6]:
data_train,data_test = model_selection.train_test_split(data, test_size = 0.1,random_state = 42,stratify = data['rating'])
data_test

Unnamed: 0,user id,movie id,rating,timestamp
24188,299,88,3,889502902
14023,347,462,2,881654359
20170,96,185,5,884403866
87853,880,302,5,880166451
8174,177,289,2,880130534
...,...,...,...,...
18733,43,215,5,883955467
83494,860,516,3,885991040
36379,313,484,5,891016193
17294,112,286,4,884992484


In [7]:
class MF(nn.Module):
  def __init__(self,users,movie_ids,embed_size):
    super(MF,self).__init__()
    self.user_embed = nn.Embedding(users,embed_size)
    self.user_bias = nn.Embedding(users, 1)
    self.movie_embed = nn.Embedding(movie_ids,embed_size)
    self.movie_bias = nn.Embedding(movie_ids,1)
    self.user_embed.weight.data.uniform_(0,0.05)
    self.movie_embed.weight.data.uniform_(0,0.05)
    self.user_bias.weight.data.uniform_(-0.01,0.01)
    self.movie_bias.weight.data.uniform_(-0.01,0.01)


  def forward(self,u,v):
    U = self.user_embed(u)
    V = self.movie_embed(v)
    b_u = self.user_bias(u).squeeze()
    b_v = self.movie_bias(v).squeeze()
    return torch.sum(U * V + U*2 * V*2, dim=1)


In [8]:
def train_epochs(model,lr,epochs,Nueralnet = False):
  optimizer = torch.optim.Adam(model.parameters(), lr=lr)
  loss_fn = nn.MSELoss()
  for t in range(epochs):
    model.train()
    users = torch.LongTensor(data_train['user id'].to_numpy()) # .cuda()
    items = torch.LongTensor(data_train['movie id'].to_numpy()) #.cuda()
    if Nueralnet:
        ratings = torch.FloatTensor(data_train['rating'].to_numpy())/5
        ratings = ratings.unsqueeze(1)
    else:
        ratings = torch.FloatTensor(data_train['rating'].to_numpy()) #.cuda()
    y_pred = model(users,items)
    loss_train = loss_fn(y_pred,ratings)

    # Backpropagation
    optimizer.zero_grad()
    loss_train.backward()
    optimizer.step()

    model.eval()
    with torch.no_grad():
        test_users = torch.LongTensor(data_test['user id'].to_numpy())
        test_items = torch.LongTensor(data_test['movie id'].to_numpy())
        if Nueralnet:
            test_ratings = torch.FloatTensor(data_test['rating'].to_numpy())/5
            test_ratings=test_ratings.unsqueeze(1)
        else:
            test_ratings = torch.FloatTensor(data_test['rating'].to_numpy())
        pred_test = model(test_users,test_items)
        loss_test = loss_fn(pred_test, test_ratings)

    print(f"Epoch {t+1} -- Train loss: {loss_train:>7f} Test loss: {loss_test:>7f}")


In [9]:
model = MF(num_users+1, num_movies+1,embed_size=100)
train_epochs(model,0.04,60)
print("Model trained")


Epoch 1 -- Train loss: 11.633005 Test loss: 3.297906
Epoch 2 -- Train loss: 3.297945 Test loss: 4.835927
Epoch 3 -- Train loss: 4.800446 Test loss: 3.018769
Epoch 4 -- Train loss: 2.919910 Test loss: 1.011774
Epoch 5 -- Train loss: 0.917120 Test loss: 2.014729
Epoch 6 -- Train loss: 1.932554 Test loss: 3.024206
Epoch 7 -- Train loss: 2.947268 Test loss: 2.889753
Epoch 8 -- Train loss: 2.807131 Test loss: 1.980415
Epoch 9 -- Train loss: 1.880545 Test loss: 1.258407
Epoch 10 -- Train loss: 1.132760 Test loss: 1.482287
Epoch 11 -- Train loss: 1.336005 Test loss: 1.941214
Epoch 12 -- Train loss: 1.803769 Test loss: 1.628931
Epoch 13 -- Train loss: 1.531134 Test loss: 1.127962
Epoch 14 -- Train loss: 1.070038 Test loss: 1.145743
Epoch 15 -- Train loss: 1.104901 Test loss: 1.450125
Epoch 16 -- Train loss: 1.404414 Test loss: 1.561811
Epoch 17 -- Train loss: 1.499401 Test loss: 1.353370
Epoch 18 -- Train loss: 1.269827 Test loss: 1.066043
Epoch 19 -- Train loss: 0.960823 Test loss: 1.037856
E

In [10]:
user_id = input("Enter the user_id for movie recommendations: ")
movie_list = data[data['user id'] == int(user_id)]['movie id'].to_list()
unseen_list = [int(x) for x in range(1,num_movies+1) if x not in movie_list]
unseen_list = np.array(unseen_list)
temp_x = torch.LongTensor(np.full(np.size(unseen_list),int(user_id)))
unseen_mov = torch.LongTensor(unseen_list)


In [11]:
model.eval()
predictions_MF = model(temp_x,unseen_mov)
sorted_indices = np.argsort(predictions_MF.detach().numpy())[::-1]
ordered_movies = unseen_list[sorted_indices]
ordered_ratings = predictions_MF.detach().numpy()[sorted_indices]
print("-------------Recommended movies by MF--------------")
i=0
for xyz in ordered_movies[:10]:
  print(map_id_movie[xyz],ordered_ratings[i])
  i=i+1

-------------Recommended movies by MF--------------
In the Company of Men (1997) 6.2190433
Pather Panchali (1955) 6.0320153
Bridge on the River Kwai, The (1957) 5.8659463
Wallace & Gromit: The Best of Aardman Animation (1996) 5.8282504
When We Were Kings (1996) 5.7794437
Boy's Life 2 (1997) 5.7392445
Paths of Glory (1957) 5.7376103
Jean de Florette (1986) 5.699022
Roman Holiday (1953) 5.6876903
All About Eve (1950) 5.6794105
