# Movie Recommender System: Collaborative Filtering

## Import libraries

In [304]:
import math
import numpy as np
import pandas as pd

## Set path to rating data

In [305]:
# Set path to rating data
movieRatingPath = "../../data/ratings_small.csv"

## Set path to movie info data

In [306]:
# Set path to metadata containing movie titles
movieInfoPath = "../../data/movies_metadata.csv"

## Create DataFrames

In [307]:
# Create DataFrame containing ratings info
data = pd.read_csv(movieRatingPath)

In [308]:
# Create DataFrame containing a unique set of movie IDs and titles
metadata = pd.read_csv(movieInfoPath)
metadata["id"] = pd.to_numeric(metadata["id"], errors="coerce")
metadata = metadata.drop_duplicates(subset=["id"])

  metadata = pd.read_csv(movieInfoPath)


## Create a dictionary of the movie IDs

In [309]:
# Create a dictionary of movie IDs and titles
movieIdToTitle = dict(zip(metadata["id"], metadata["title"]))

## Display the ratings DataFrame

In [310]:
# Output the rating DataFrame
data

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205
...,...,...,...,...
99999,671,6268,2.5,1065579370
100000,671,6269,4.0,1065149201
100001,671,6365,4.0,1070940363
100002,671,6385,2.5,1070979663


## Replace null values with zeros

In [311]:
# Format rating column
data['rating'] = data['rating'].astype(float)

# Replace null ratings with zeros
data['rating'].fillna(0, inplace=True)

# Output the rating DataFrame
data

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205
...,...,...,...,...
99999,671,6268,2.5,1065579370
100000,671,6269,4.0,1065149201
100001,671,6365,4.0,1070940363
100002,671,6385,2.5,1070979663


## Create a User-Item matrix

In [312]:
# Create a pivot table for a user-item matrix
userItemMatrix = data.pivot_table(index='userId', columns='movieId', values='rating', aggfunc='mean')

# Turn the matrix into an array
userItemArray = userItemMatrix.fillna(0).values

## Define correlation function

In [313]:
# Define a function to calculate the correlation between any two movies
def calculateCorrelation(matrix, movie1, movie2):
    return np.corrcoef(matrix[:, movie1], matrix[:, movie2])[0, 1]

## Define item-item correlation function

In [314]:
# Define a function to calculate the item-item correlation
def getItemItem(matrix, movieIndex):
    # Create a list of movie IDs, without the given ID
    movieIds = [col for col in range(matrix.shape[1]) if col != movieIndex]
    correlations = []
    
    # Iterate through the movie IDs and calculate the correlation
    for movieId in movieIds:
        correlation = calculateCorrelation(matrix, movieIndex, movieId)
        
        # If the correlation is not NaN, append the movie ID and correlation
        if not np.isnan(correlation):
            correlations.append((movieId, correlation))
    
    # Sort the correlations in descending order and output to a DataFrame
    top10Movies = pd.DataFrame(sorted(correlations, key=lambda x: x[1], reverse=True), columns=["Movie Index", "Correlation"])
    return top10Movies

## Set the movie ID to search for

In [315]:
# Set the movie ID to correlate with
testMovieIndex = 100

# Print the movie title
testMovieTitle = movieIdToTitle.get(testMovieIndex)
print(f"The movie you selected is '{testMovieTitle}'.")

The movie you selected is 'Lock, Stock and Two Smoking Barrels'.


## Generate the list of item-item recommended movies

In [316]:
# Generate the item-item correlation
top10Movies = getItemItem(userItemArray, testMovieIndex)
top10Movies["Movie Title"] = top10Movies["Movie Index"].map(movieIdToTitle)
top10Movies = top10Movies[["Movie Title", "Movie Index", "Correlation"]].dropna(subset=["Movie Title"]).head(10)
top10Movies

Unnamed: 0,Movie Title,Movie Index,Correlation
0,"Bang, Boom, Bang",344,0.458879
1,Ed Wood,522,0.454953
2,Requiem,523,0.453478
3,Mambo Italiano,321,0.447518
4,Jackie Brown,184,0.417068
5,Mon oncle,427,0.411172
6,Wings of Desire,144,0.409131
9,La Haine,406,0.389657
10,Casino,524,0.388236
11,Spirited Away,129,0.386997
