<a href="https://colab.research.google.com/github/Ritwik411/Movie_Recommendation_System/blob/master/Movie_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Movie Recommendation** 

In [None]:
import os
os.chdir("Path to your dataset")

In [None]:
import numpy as np
import pandas as pd

In [None]:
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

### Import training and testing datasets

In [91]:
df_train = pd.read_csv('ml-100k/u1.base',delimiter='\t')
df_test = pd.read_csv('ml-100k/u1.test',delimiter='\t')

In [92]:
df_train.columns = ['user_id','movie_id','rating','timestamp']
df_test.columns = df_train.columns

In [101]:
train = np.array(df_train,dtype='int')
test = np.array(df_test,dtype='int')

In [102]:
num_users = int(max(max(train[:,0]),max(test[:,0])))
num_movies = int(max(max(train[:,1]),max(test[:,1])))

In [103]:
print(num_users,num_movies)

943 1682


#### Map the movie rating for each user 

In [104]:
def convert(data):
  new_data = []
  for user_id in range(1,num_users+1):
    movie_id = data[:,1][data[:,0]==user_id]
    rating_id = data[:,2][data[:,0]==user_id]
    ratings = np.zeros(num_movies)
    ratings[movie_id-1] = rating_id
    new_data.append(list(ratings))
  return new_data

In [105]:
#Size of train - 943 rows(number of users), 1682 columns (rating given to each movie)
train = convert(train) 
test = convert(test)

In [106]:
train = torch.FloatTensor(train)
test = torch.FloatTensor(test)

### Auto-Encoder Architecture

In [51]:
class SAE(nn.Module):
  def __init__(self,):
    super(SAE,self).__init__()
    #No. of hidden layers in the neural network
    self.fc1 = nn.Linear(num_movies,20)
    self.fc2 = nn.Linear(20,10)
    self.fc3 = nn.Linear(10, 20)
    self.fc4 = nn.Linear(20,num_movies)
    #Activation function usign Sigmoid
    self.activation = nn.Sigmoid()
  #Encoding and decoding
  def forward(self, x):
    x = self.activation(self.fc1(x))
    x = self.activation(self.fc2(x))
    x = self.activation(self.fc3(x))
    x = self.fc4(x)
    return x
sae = SAE()
criterion = nn.MSELoss()
optimizer = optim.RMSprop(sae.parameters(),lr = 0.01, weight_decay = 0.5)

### Training

In [None]:
#Training
nb_epoch = 256
for epoch in range(1,nb_epoch+1):
  train_loss = 0
  s = 0.
  for user_id in range(num_users):
    input = Variable(train[user_id]).unsqueeze(0) # creates a batch
    target = input.clone()
    if torch.sum(target.data>0)>0:
      output = sae(input)
      target.require_grad = False
      output[target ==0] = 0
      loss = criterion(output, target)
      mean_corrector = num_movies/float(torch.sum(target.data>0)+ 1e-10)
      loss.backward()
      train_loss += np.sqrt(loss.data*mean_corrector)
      s += 1.
      optimizer.step()
  print("epoch : "+ str(epoch)+" loss :"+str(train_loss/s))

### Testing

In [141]:
test_loss = 0
s = 0.
for user_id in range(num_users):
  input = Variable(train[user_id]).unsqueeze(0) #Keep train set
  target = Variable(test[user_id])
  if torch.sum(target.data>0)>0:
    output = sae(input)
    target.require_grad = False
    output[(target ==0).unsqueeze(0)] = 0 #Non-zero ratings
    loss = criterion(output, target)
    mean_corrector = num_movies/float(torch.sum(target.data>0)+ 1e-10)
    #No back propogation using testing
    test_loss += np.sqrt(loss.data*mean_corrector)
    s += 1.
    # No optimizer.step() for testing
print("Test Loss : "+str(test_loss/s))

  return F.mse_loss(input, target, reduction=self.reduction)


Test Loss : tensor(0.9488)


# Sample Recommendation 

In [108]:
user = 74
input_var = Variable(train[user]).unsqueeze(0)
result = torch.detach(sae(input_var)).numpy()

### Import movies.csv for movie names

In [127]:
movie_names = pd.read_csv("ml-100k/movies.csv")[['movieId','title']]
movie_names.head()

Unnamed: 0,movieId,title
0,1,Toy Story (1995)
1,2,Jumanji (1995)
2,3,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995)


In [109]:
temp = []
for i in result:
  for j in i:
    temp.append(j)

In [132]:
df = pd.DataFrame(temp,columns=['Ratings'])

In [135]:
df['movieId'] = df.index+1
df = pd.merge(df,movie_names,on='movieId')

In [136]:
df.head()

Unnamed: 0,Ratings,movieId,title
0,3.683569,1,Toy Story (1995)
1,3.17225,2,Jumanji (1995)
2,2.561374,3,Grumpier Old Men (1995)
3,3.286338,4,Waiting to Exhale (1995)
4,3.060054,5,Father of the Bride Part II (1995)


## Top 10 Movie for the user

In [140]:
print(df.nlargest(10,['Ratings'])['title'])

1585        Indian Summer (a.k.a. Alive & Kicking) (1996)
1457                           Grosse Pointe Blank (1997)
1414                           Waiting for Guffman (1996)
167               Free Willy 2: The Adventure Home (1995)
404                                      8 Seconds (1994)
1547                                         Steel (1997)
1336                                101 Dalmatians (1996)
1430                                 Salut cousin! (1996)
1596                                       Gattaca (1997)
125     Silences of the Palace, The (Saimt el Qusur) (...
Name: title, dtype: object
