In [1]:
import pandas as pd
import torch
import torch.nn as nn
from sklearn import model_selection
import numpy as np
torch.manual_seed(42)

<torch._C.Generator at 0x2e621764ad0>

In [2]:
data = pd.read_csv("ml-100k/u.data",sep="\t", header=None)
data.columns = ['user id', 'movie id', 'rating', 'timestamp']
data['movie id'].unique

<bound method Series.unique of 0         242
1         302
2         377
3          51
4         346
         ... 
99995     476
99996     204
99997    1090
99998     225
99999     203
Name: movie id, Length: 100000, dtype: int64>

In [3]:
movies = pd.read_csv("ml-100k/u.item",
                    sep="|", encoding='latin-1', header=None)
movies.columns = ['movie id', 'movie title' ,'release date','video release date', 'IMDb URL',
                 'unknown', 'Action', 'Adventure', 'Animation', 'Children\'s', 'Comedy',
                 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
                 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
movies.head()

Unnamed: 0,movie id,movie title,release date,video release date,IMDb URL,unknown,Action,Adventure,Animation,Children's,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Toy%20Story%2...,0,0,0,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?GoldenEye%20(...,0,1,1,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Four%20Rooms%...,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Get%20Shorty%...,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,,http://us.imdb.com/M/title-exact?Copycat%20(1995),0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [4]:
map_id_movie = {}
for id,row in movies.iterrows():
  map_id_movie[row['movie id']] = row['movie title']
map_id_movie[1398]

'Anna (1996)'

In [5]:
num_users = data['user id'].nunique()
num_movies = data['movie id'].nunique()
print(
    (f"Number of users: {num_users}\n"
    f"Number of movies: {num_movies}")
)

Number of users: 943
Number of movies: 1682


In [6]:
data_train,data_test = model_selection.train_test_split(data, test_size = 0.1,random_state = 42,stratify = data['rating'])
data_test

Unnamed: 0,user id,movie id,rating,timestamp
24188,299,88,3,889502902
14023,347,462,2,881654359
20170,96,185,5,884403866
87853,880,302,5,880166451
8174,177,289,2,880130534
...,...,...,...,...
18733,43,215,5,883955467
83494,860,516,3,885991040
36379,313,484,5,891016193
17294,112,286,4,884992484


In [7]:
def train_epochs(model,lr,epochs,Nueralnet = False):
  optimizer = torch.optim.Adam(model.parameters(), lr=lr)
  loss_fn = nn.MSELoss()
  for t in range(epochs):
    model.train()
    users = torch.LongTensor(data_train['user id'].to_numpy()) # .cuda()
    items = torch.LongTensor(data_train['movie id'].to_numpy()) #.cuda()
    ratings = torch.FloatTensor(data_train['rating'].to_numpy())
    if Nueralnet:
        ratings = ratings.unsqueeze(1)
    y_pred = model(users,items)
    loss_train = loss_fn(y_pred,ratings)

    optimizer.zero_grad()
    loss_train.backward()
    optimizer.step()

    model.eval()
    with torch.no_grad():
        test_users = torch.LongTensor(data_test['user id'].to_numpy())
        test_items = torch.LongTensor(data_test['movie id'].to_numpy())
        test_ratings = torch.FloatTensor(data_test['rating'].to_numpy())
        if Nueralnet:
            test_ratings=test_ratings.unsqueeze(1)
        pred_test = model(test_users,test_items)
        loss_test = loss_fn(pred_test, test_ratings)

    print(f"Epoch {t+1} -- Train loss: {loss_train:>7f} Test loss: {loss_test:>7f}")

In [8]:
class NueralNet(nn.Module):
  def __init__(self,users,movie_ids,embed_size):
    super(NueralNet,self).__init__()
    self.user_embed = nn.Embedding(users,embed_size)
    self.movie_embed = nn.Embedding(movie_ids,embed_size)
    self.user_embed.weight.data.uniform_(0,0.05)
    self.movie_embed.weight.data.uniform_(0,0.05)
    self.network = nn.Sequential(
            nn.Linear(2*embed_size,64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8,1)
        )
    self.output = nn.Sigmoid()

  def forward(self,u,v):
    U = self.user_embed(u)
    V = self.movie_embed(v)
    vector = torch.cat([U,V], dim=-1)
    out_temp = self.network(vector)
    return self.output(out_temp)*5

In [9]:
model_2 = NueralNet(num_users+1,num_movies+1,64)
train_epochs(model_2,0.002,150,True)

Epoch 1 -- Train loss: 2.049888 Test loss: 2.032038
Epoch 2 -- Train loss: 2.032166 Test loss: 2.014605
Epoch 3 -- Train loss: 2.014728 Test loss: 1.997733
Epoch 4 -- Train loss: 1.997860 Test loss: 1.981044
Epoch 5 -- Train loss: 1.981170 Test loss: 1.964467
Epoch 6 -- Train loss: 1.964589 Test loss: 1.947726
Epoch 7 -- Train loss: 1.947842 Test loss: 1.931398
Epoch 8 -- Train loss: 1.931516 Test loss: 1.917367
Epoch 9 -- Train loss: 1.917480 Test loss: 1.903119
Epoch 10 -- Train loss: 1.903223 Test loss: 1.888515
Epoch 11 -- Train loss: 1.888605 Test loss: 1.873744
Epoch 12 -- Train loss: 1.873819 Test loss: 1.858541
Epoch 13 -- Train loss: 1.858592 Test loss: 1.843000
Epoch 14 -- Train loss: 1.843034 Test loss: 1.833616
Epoch 15 -- Train loss: 1.833733 Test loss: 1.823129
Epoch 16 -- Train loss: 1.823252 Test loss: 1.811818
Epoch 17 -- Train loss: 1.811934 Test loss: 1.799924
Epoch 18 -- Train loss: 1.800024 Test loss: 1.788385
Epoch 19 -- Train loss: 1.788475 Test loss: 1.776477
Ep

In [10]:
user_id = input("Enter the user_id for movie recommendations: ")
movie_list = data[data['user id'] == int(user_id)]['movie id'].to_list()
unseen_list = [int(x) for x in range(1,num_movies+1) if x not in movie_list]
unseen_list = np.array(unseen_list)
temp_x = torch.LongTensor(np.full(np.size(unseen_list),int(user_id)))
unseen_mov = torch.LongTensor(unseen_list)

In [11]:
model_2.eval()
predictions_NN = model_2(temp_x,unseen_mov)
predictions_NN=predictions_NN.squeeze()
sorted_indices = np.argsort(predictions_NN.detach().numpy())[::-1]
ordered_movies = unseen_list[sorted_indices]
ordered_ratings = predictions_NN.detach().numpy()[sorted_indices]
print("---------------Recommended movies by NeuralNetworks--------------")
i=0
for xyz in ordered_movies[:10]:
  print(map_id_movie[xyz],ordered_ratings[i])
  i=i+1

---------------Recommended movies by NeuralNetworks--------------
Santa with Muscles (1996) 4.8836946
Someone Else's America (1995) 4.882595
Pather Panchali (1955) 4.8797936
Safe Passage (1994) 4.879195
Great Day in Harlem, A (1994) 4.878795
Saint of Fort Washington, The (1993) 4.878649
The Deadly Cure (1996) 4.8763876
Entertaining Angels: The Dorothy Day Story (1996) 4.8713226
Bitter Sugar (Azucar Amargo) (1996) 4.8676624
Some Mother's Son (1996) 4.867448
