# Making Recommendations Based on Popularity

## Notebook Preparation and Data Initial Visualization

### Foldel Path Input:

In [15]:
#@title Google Drive Folder Path: { run: "auto", display-mode: "form" }

path= '/content/drive/MyDrive/Colab Notebooks/08-Recommendation System/' #@param {type: "raw"}
#@markdown for example: '/content/drive/MyDrive/Colab_Notebooks/'

### Data Loader:

In [18]:
from google.colab import drive
drive.mount('/content/drive')
import numpy as np
import pandas as pd

links_df    = pd.read_csv(f'{path}links.csv')
movies_df   = pd.read_csv(f'{path}movies.csv')
ratings_df  = pd.read_csv(f'{path}ratings.csv')
tags_df     = pd.read_csv(f'{path}tags.csv')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Create the `requirements.txt` file:

In [11]:
requirements= '\n'.join(f'{m.__name__}=={m.__version__}' for m in globals().values() if getattr(m, '__version__', None))
with open('/content/drive/MyDrive/Colab Notebooks/08-Recommendation System/requirements.txt', 'w') as output:
    output.write(requirements)

### Initial Visualizer:

In [17]:
#@title Select the DataFrame: { run: "auto" }

df = links_df #@param ["links_df","movies_df","ratings_df","tags_df"] {type:"raw"}
print(df.info())
df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9742 entries, 0 to 9741
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   movieId  9742 non-null   int64  
 1   imdbId   9742 non-null   int64  
 2   tmdbId   9734 non-null   float64
dtypes: float64(1), int64(2)
memory usage: 228.5 KB
None


Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0
...,...,...,...
9737,193581,5476944,432131.0
9738,193583,5914996,445030.0
9739,193585,6397426,479308.0
9740,193587,8391976,483455.0


## 1) Popularity/Quality based recommmender system

In [None]:
#@title Rating Minimun Value Threshold: { run: "auto" }

minimun_rating = 3 #@param {type:"slider", min:0, max:5, step:0.1}
minimun_reviews = 200 #@param {type:"slider", min:0, max:500,step:10}
top_number_of_movies_recommended = 37 #@param {type:"slider", min:1, max:100,step:1}

def recommendation_popularity(minimun_rating,minimun_reviews,top_number_of_movies_recommended):
    
  rating_movieId_mean = pd.DataFrame(ratings_df.groupby('movieId')['rating'].mean()).sort_values("rating", ascending=False)
  rating_movieId_mean = rating_movieId_mean[rating_movieId_mean['rating'] >= minimun_rating]

  rating_movieId_mean['reviews'] = ratings_df.groupby('movieId')['rating'].count()
  reviews= rating_movieId_mean[rating_movieId_mean['reviews'] >= minimun_reviews].sort_values("reviews", ascending=False)

  top= pd.DataFrame(reviews).head(top_number_of_movies_recommended)

  top_movies_names_df= pd.merge(top, movies_df, on='movieId', how='inner')
    
    # Show recommendations

  print(f"Our TOP {top_number_of_movies_recommended} movies: ")

  recommended_movies = pd.DataFrame(top_movies_names_df['title'])
  recommended_movies.columns = ['Recommended_Movies']
  recommended_movies.set_index('Recommended_Movies', inplace=True)
  return recommended_movies
    
    
recommendation_popularity(minimun_rating,minimun_reviews,top_number_of_movies_recommended)

  

Our TOP 37 movies: 


Forrest Gump (1994)
"Shawshank Redemption, The (1994)"
Pulp Fiction (1994)
"Silence of the Lambs, The (1991)"
"Matrix, The (1999)"
Star Wars: Episode IV - A New Hope (1977)
Jurassic Park (1993)
Braveheart (1995)
Terminator 2: Judgment Day (1991)
Schindler's List (1993)
Fight Club (1999)


## 2) Method: **`Correlation`**

NOTE: In this case, the `correlation` between the MOVIES was used to provide a recommendation.

In [None]:
#@title Making recommendations based on MOVIES analisys: { run: "auto" }


Correlation_Method = "pearson" #@param ["pearson", "kendall", "spearman"]
movieId = 2 #@param {type:"number"}
minimun_rating = 2.5 #@param {type:"slider", min:0, max:5, step:0.1}
minimun_reviews = 50 #@param {type:"slider", min:1, max:100,step:1}
top_number_of_movies_recommended = 20 #@param {type:"slider", min:1, max:100,step:1}


def recommendation_byMoviesCorrelation(movieId, Correlation_Method, minimun_reviews, minimun_rating, top_number_of_movies_recommended):
  movies_ratings_df = ratings_df.loc[ratings_df['rating'] >= minimun_rating]
  movies_crosstab = pd.pivot_table(data=movies_ratings_df, values='rating', index='userId', columns='movieId')
  cross_ratings = movies_crosstab[movieId]

  similar_ratings = movies_crosstab.corrwith(cross_ratings,method=Correlation_Method)

  corr_df = pd.DataFrame(similar_ratings, columns=[f'Correlation_{Correlation_Method}'])
  corr_df.dropna(inplace=True)

  rating = pd.DataFrame(movies_ratings_df.groupby('movieId')['rating'].mean())
  rating['reviews'] = movies_ratings_df.groupby('movieId')['rating'].count()


  movies_corr_summary = corr_df.join(rating['reviews'])
  movies_corr_summary.drop(movieId, inplace=True) # drop the inputed movie itself

  top = movies_corr_summary[movies_corr_summary['reviews']>=minimun_reviews].sort_values(f'Correlation_{Correlation_Method}', ascending=False).head(top_number_of_movies_recommended)

  top_movies_names_df= pd.merge(top, movies_df, on='movieId', how='inner')

  selected_movie_name = movies_df[movies_df['movieId'] == movieId]['title']


  print(f"""Based on your recent selection:

  {selected_movie_name.values[0]}

  this are the movies that we would like to recommend:""")

  recommended_movies = pd.DataFrame(top_movies_names_df['title'])
  recommended_movies.columns = ['Recommended_Movies']
  recommended_movies.set_index('Recommended_Movies', inplace=True)
  return recommended_movies

recommendation_byMoviesCorrelation(movieId, Correlation_Method, minimun_reviews, minimun_rating, top_number_of_movies_recommended)

## 3) Method: **`Cosine Similarities`**

NOTE: In this case, the `cosine similarities` between the USERS was used to provide a recommendation.

In [None]:
#@title Making recommendations based on USERS analisys: { run: "auto" }


user_id = 120 #@param {type:"slider", min:1, max:610,step:1}
top_number_of_movies_recommended = 10 #@param {type:"slider", min:1, max:100,step:1}



def recommendation_byUsersSimilarity (user_id,top_number_of_movies_recommended):

  movies =  movies_df[['movieId', 'title']]

  # Create a Pivot DataFrame, where the values are the rating values and the rows= userId  & columns= movieId

  users_items = pd.pivot_table(data=ratings_df, 
                                  values='rating', 
                                  index='userId', 
                                  columns='movieId')

  # Replace NaNs with zeros, The cosine similarity can't be computed with NaN's

  users_items.fillna(0, inplace=True)
  users_items

  # Import and Apply the cosine similarities

  from sklearn.metrics.pairwise import cosine_similarity

  user_similarities = pd.DataFrame(cosine_similarity(users_items),
                                  columns=users_items.index, 
                                  index=users_items.index)

  weights = (user_similarities.query("userId!=@user_id")[user_id] / sum(user_similarities.query("userId!=@user_id")[user_id]))

  #  Find the movies user selected has not rated. We will exclude our user, since we don't want to include them on the weights.


  not_watched_movies = users_items.loc[users_items.index!=user_id, users_items.loc[user_id,:]==0]
  weighted_averages = pd.DataFrame(not_watched_movies.T.dot(weights), columns=["predicted_rating"])
  weighted_averages

  # Show recommendations

  print(f"Specially for YOU: ")

  recommendations = weighted_averages.merge(movies, left_index=True, right_on="movieId")
  top_movies_names_df = recommendations.sort_values("predicted_rating", ascending=False).head(top_number_of_movies_recommended)


  recommended_movies = pd.DataFrame(top_movies_names_df['title'])
  recommended_movies.columns = ['Recommended_Movies']
  recommended_movies.set_index('Recommended_Movies', inplace=True)
  return recommended_movies

recommendation_byUsersSimilarity (user_id,top_number_of_movies_recommended)

Specially for YOU: 


"Shawshank Redemption, The (1994)"
Pulp Fiction (1994)
Forrest Gump (1994)
"Silence of the Lambs, The (1991)"
Star Wars: Episode V - The Empire Strikes Back (1980)
"Matrix, The (1999)"
Toy Story (1995)
Terminator 2: Judgment Day (1991)
Braveheart (1995)
Jurassic Park (1993)
