<a href="https://colab.research.google.com/github/Murtazali05/Recommendation-System-platform/blob/main/Autoencoders.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
% cd /content/drive/MyDrive/Colab\ Notebooks/Recommendation-System

/content/drive/MyDrive/Colab Notebooks/Recommendation-System


In [9]:
import pickle
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable
from pathlib import Path

In [10]:
from read_data import read_data, create_dataset,create_tabular_data

In [11]:
users = pd.read_csv('ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')

In [12]:
ratings, movies = read_data(Path('ml-1m'))

In [13]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [14]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [15]:
nb_users = len(users)
nb_movies = len(movies)

In [16]:
nb_users, nb_movies

(6040, 3883)

In [17]:
df = create_tabular_data(ratings,users,movies) 

In [18]:
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(df, test_size=0.2)

In [19]:
train_set = torch.FloatTensor(train_set.to_numpy())
test_set = torch.FloatTensor(test_set.to_numpy())

In [20]:
### Creating the architecture of the Neural Network
class SAE(nn.Module):
    def __init__(self,nb_users,nb_movies):
        super(SAE, self).__init__()
        self.fc1 = nn.Linear(nb_movies, 20)
        self.fc2 = nn.Linear(20, 10)
        self.fc3 = nn.Linear(10, 20)
        self.fc4 = nn.Linear(20, nb_movies)
        self.activation = nn.Sigmoid()
    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        # x = self.activation(self.fc4(x))
        # x = self.activation(self.fc5(x))
        x = self.fc4(x)
        return x
sae = SAE(nb_users,nb_movies)
criterion = nn.MSELoss()
optimizer = optim.RMSprop(sae.parameters(), lr = 0.05, weight_decay = 0.5)

In [21]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [22]:
sae.to(device)

SAE(
  (fc1): Linear(in_features=3883, out_features=20, bias=True)
  (fc2): Linear(in_features=20, out_features=10, bias=True)
  (fc3): Linear(in_features=10, out_features=20, bias=True)
  (fc4): Linear(in_features=20, out_features=3883, bias=True)
  (activation): Sigmoid()
)

In [23]:
fh = open('sae_checkpoints/sae_1000.weights','rb')
weights = torch.load(fh)

fh.close()

FileNotFoundError: ignored

In [None]:
train_set = train_set.to(device)
test_set = test_set.to(device)

In [None]:
###### Training the SAE
nb_epoch = 1000
for epoch in range(1, nb_epoch + 1):
    train_loss = 0
    s = 0.
    for id_user in range(train_set.shape[0]):
        input = Variable(train_set[id_user]).unsqueeze(0)
        target = input.clone()
        if torch.sum(target.data > 0) > 0:
            output = sae(input)
            target.require_grad = False
            output[target == 0] = 0
            loss = criterion(output, target)
            mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
            loss.backward()
            train_loss += np.sqrt(loss.cpu().data*mean_corrector)
            s += 1. #users that gave atleast one non-zero rating
            optimizer.step()
    print('epoch: '+str(epoch)+' loss: '+ str(train_loss/s))

    if epoch%25==0:
        torch.save(sae.state_dict(),'sae_checkpoints/sae_{}.weights'.format(epoch))

    

In [None]:
## Testing the SAE

test_loss = 0
s = 0.
for id_user in range(nb_users):
  input = Variable(training_set[id_user]).unsqueeze(0)
  target = Variable(test_set[id_user]).unsqueeze(0)
  if torch.sum(target.data > 0) > 0:
    output = sae(input)
    target.require_grad = False
    output[target == 0] = 0
    loss = criterion(output, target)
    mean_corrector = nb_movies/float(torch.sum(target.data > 0) + 1e-10)
    test_loss += np.sqrt(loss.data*mean_corrector)
    s += 1. 
print('test loss: '+str(test_loss/s))

In [None]:
import random

In [None]:
def predict_recommendations(unseen_set,model,user_id=None,N=5):
    if user_id==None:
        user_id = random.randint(0,unseen_set.shape[0])
    print("Predicting Recommendations for user_id: {}".format(user_id))
    input=Variable(unseen_set[user_id]).unsqueeze(0)
    output = model(input)

    out_df = pd.DataFrame(output.cpu().detach().numpy()[0],columns=['predicted_ratings'])
    out_df['ground_truth'] = input.cpu().detach().numpy()[0]

    # filtering unwatched movies and sorting according to the predicted rating
    rec_df = out_df[out_df['ground_truth']==0].sort_values(by='predicted_ratings',ascending=False)
    recs = [x+1 for x in rec_df[:4*N].index]
    return random.sample(recs,k=N)

In [None]:
predict_recommendations(test_set,sae)

In [None]:
(557, 787, 1420, 3245, 130) 200

In [None]:
rec = out_df[out_df['ground_truth']==0].sort_values(by='predicted_ratings',ascending=False)

In [None]:
c=0
vb=[]
for i in test_set:
    
    if i[557]!=0:
        vb.append(c)
    c+=1

In [None]:
movies[movies.movieId==558]

In [None]:
rec[:50]