# Deep Learning Based Recommender Systems

**imports**

In [1]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

np.random.seed(123)

**importing ratings data**

In [2]:
ratings = pd.read_csv(r"C:\Users\Anshul M\movie-ai\Deep-Learning-Based-Recommender-System\Data\rating.csv",
                      parse_dates = ['timestamp'])

**used 25% of ratings due to performance issues (used 30% in the notebook)

In [3]:
rand_userIds = np.random.choice(ratings['userId'].unique(), 
                                size=int(len(ratings['userId'].unique())*0.25), 
                                replace=False)

# mapping the new ratings to the older IDs
ratings = ratings.loc[ratings['userId'].isin(rand_userIds)]


**using the train-test split strategy**

In [4]:
ratings['rank_latest'] = ratings.groupby(['userId'])['timestamp'] \
                                .rank(method='first', ascending=False)

# training on ratings of all movies except the most recent one
train_ratings = ratings[ratings['rank_latest'] != 1]
# latest rating is going to be the test rating
test_ratings = ratings[ratings['rank_latest'] == 1]

# keeping only relevant coloumns of data
train_ratings = train_ratings[['userId', 'movieId', 'rating']]
test_ratings = test_ratings[['userId', 'movieId', 'rating']]

**binarizing the data for the purpose of converting explicit feedback to implicit**

In [5]:
# assigning the value of 1 to movies that have been interacted with by a user
train_ratings.loc[:, 'rating'] = 1

**assigning negative samples to our training data**

In [6]:
# Get a list of all movie IDs
all_movieIds = ratings['movieId'].unique()

# Placeholders that will hold the training data
users, items, labels = [], [], []

# This is the set of items that each user has interaction with
user_item_set = set(zip(train_ratings['userId'], train_ratings['movieId']))

# 4:1 ratio of negative to positive samples
num_negatives = 4

for (u, i) in tqdm(user_item_set):
    users.append(u)
    items.append(i)
    labels.append(1) # items that the user has interacted with are positive
    for _ in range(num_negatives):
        # randomly select an item
        negative_item = np.random.choice(all_movieIds) 
        # check that the user has not interacted with this item
        while (u, negative_item) in user_item_set:
            negative_item = np.random.choice(all_movieIds)
        users.append(u)
        items.append(negative_item)
        labels.append(0) # items not interacted with are negative

  0%|          | 0/4974661 [00:00<?, ?it/s]