# Recommender Systems
Using Popularity Ranking, User-Based Collaborative Filtering and Item-Based Collaborative Filtering to recommend movies based on user or item inputs. Recommenders scored using various metrics on an offline train-test split

## Imports

In [1]:
%pip install lifelines

Collecting lifelines
  Downloading lifelines-0.27.6-py3-none-any.whl (409 kB)
     -------------------------------------- 409.4/409.4 kB 5.1 MB/s eta 0:00:00
Collecting autograd-gamma>=0.3
  Downloading autograd-gamma-0.5.0.tar.gz (4.0 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting formulaic>=0.2.2
  Downloading formulaic-0.6.0-py3-none-any.whl (82 kB)
     ---------------------------------------- 82.1/82.1 kB ? eta 0:00:00
Collecting autograd>=1.5
  Downloading autograd-1.5-py3-none-any.whl (48 kB)
     ---------------------------------------- 48.9/48.9 kB 2.4 MB/s eta 0:00:00
Collecting interface-meta>=1.2.0
  Downloading interface_meta-1.3.0-py3-none-any.whl (14 kB)
Collecting astor>=0.8
  Downloading astor-0.8.1-py2.py3-none-any.whl (27 kB)
Building wheels for collected packages: autograd-gamma
  Building wheel for autograd-gamma (setup.py): started
  Building wheel for autograd-gamma (setup.py): finished with st



In [1]:
# Importing Libraries

from movie_rec_utils import *

import pandas as pd
from ydata_profiling import ProfileReport
import math
import numpy as np
import matplotlib.pyplot as plt
import difflib

from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import r2_score, mean_absolute_error
from lifelines.utils import concordance_index

In [2]:
# Reading Data
links_df = pd.read_csv('../Data/links.csv')
movies_df = pd.read_csv('../Data/movies.csv')
ratings_df = pd.read_csv('../Data/ratings.csv')
tags_df =pd.read_csv('../Data/tags.csv')

## Data Exploration

In [None]:
# Creating y-data profiling reports as html file
movies_profile = ProfileReport(movies_df, minimal=True).to_file('movies_report.html')
ratings_profile = ProfileReport(ratings_df, minimal=True).to_file('ratings_report.html')

# Recommenders

## Popularity Ranking

In [None]:
# Creating Column to Track Review Count During Aggregations
ratings_df['rating_count'] = 1

### Average Ranking, Filtered with Review Minimum

### Laplace Inspired Data Manipulation

### Cumulative Rating

## Collaborative Filtering

### Item-Based Collaborative Filtering

In [3]:
pivoted_ratings = pd.pivot_table(ratings_df, 
                                 values='rating', 
                                 index='userId', 
                                 columns='movieId')

In [5]:
similar_movies = find_similar(title='Nausicaä of the valley of the wind',
                              ratings_df=ratings_df,
                              movies_df=movies_df,
                              pivoted_df=pivoted_ratings,
                              n=7,
                              shared_thresh=5,
                              total_thresh=10,
                              more_data = False)

similar_movies

  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)


0    Monty Python's And Now for Something Completel...
1                             Road to Perdition (2002)
2                                          Thor (2011)
3                      X-Men Origins: Wolverine (2009)
4                       Guardians of the Galaxy (2014)
5                            North by Northwest (1959)
6                                   Blue Velvet (1986)
Name: title, dtype: object

### User-Based Collaborative Filtering

## Evaluating Recommenders with Offline Methods

In [13]:
# Spliting Training and Testing Data
train, test = train_test_split(ratings_df[['userId', 'movieId', 'rating']], test_size=0.1, random_state=42)

# Creating Training Data Frame
useritem_train = create_train(ratings_df, train)

In [None]:
# Computing Cosine Similarity
cos_sim_df = pd.DataFrame(cosine_similarity(useritem_train, useritem_train), 
                          index=useritem_train.index, 
                          columns=useritem_train.index)

In [None]:
# Predicting/Estimating Ratings for Test Data Using Custom Function and Similarity Matrix
test['estimated_rating'] = test.apply(lambda row: estimate_rating(row['userId'], row['movieId']), axis=1)

In [None]:
# Computing Performance Metrics
scores_df = score_est(test['rating'], test['estimated_rating'])
display(scores_df)