<a href="https://colab.research.google.com/github/SamAniedi/movieRecommenderSystem/blob/main/Movie_Recommender_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install scikit-surprise
# !conda install -y -c conda-forge scikit-surprise # If you use conda on a non-Colab environment

from surprise import SVD
from surprise import Dataset
from surprise.model_selection import cross_validate
from surprise import Reader

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
# importing numpy
from collections import defaultdict
import numpy as np

In [None]:
import os
import csv
import sys
import re



In [None]:
def BuildAntiTestSetForUser(testSubject, trainset):
    fill = trainset.global_mean

    anti_testset = []

    u = trainset.to_inner_uid(str(testSubject))

    user_items = set([j for (j, _) in trainset.ur[u]])
    anti_testset += [(trainset.to_raw_uid(u), trainset.to_raw_iid(i), fill) for
                             i in trainset.all_items() if
                             i not in user_items]
    return anti_testset

# CREATING MOVIE LENS CLASS

In [None]:
class MovieLens:

    movieID_to_name = {}
    name_to_movieID = {}
    ratingsPath = '/content/drive/MyDrive/Machine Learning csv files/ml-latest-small/ratings.csv'
    moviesPath = '/content/drive/MyDrive/Machine Learning csv files/ml-latest-small/movies.csv'

    def loadMovieLensLatestSmall(self):

        # Look for files relative to the directory we are running from
        os.chdir(os.path.dirname(sys.argv[0]))

        ratingsDataset = 0
        self.movieID_to_name = {}
        self.name_to_movieID = {}

        reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1)

        ratingsDataset = Dataset.load_from_file(self.ratingsPath, reader=reader)

        with open(self.moviesPath, newline='', encoding='ISO-8859-1') as csvfile:
                movieReader = csv.reader(csvfile)
                next(movieReader)  #Skip header line
                for row in movieReader:
                    movieID = int(row[0])
                    movieName = row[1]
                    self.movieID_to_name[movieID] = movieName
                    self.name_to_movieID[movieName] = movieID

        return ratingsDataset

    def getUserRatings(self, user):
        userRatings = []
        hitUser = False
        with open(self.ratingsPath, newline='') as csvfile:
            ratingReader = csv.reader(csvfile)
            next(ratingReader)
            for row in ratingReader:
                userID = int(row[0])
                if (user == userID):
                    movieID = int(row[1])
                    rating = float(row[2])
                    userRatings.append((movieID, rating))
                    hitUser = True
                if (hitUser and (user != userID)):
                    break

        return userRatings

    def getPopularityRanks(self):
        ratings = defaultdict(int)
        rankings = defaultdict(int)
        with open(self.ratingsPath, newline='') as csvfile:
            ratingReader = csv.reader(csvfile)
            next(ratingReader)
            for row in ratingReader:
                movieID = int(row[1])
                ratings[movieID] += 1
        rank = 1
        for movieID, ratingCount in sorted(ratings.items(), key=lambda x: x[1], reverse=True):
            rankings[movieID] = rank
            rank += 1
        return rankings

    def getGenres(self):
        genres = defaultdict(list)
        genreIDs = {}
        maxGenreID = 0
        with open(self.moviesPath, newline='', encoding='ISO-8859-1') as csvfile:
            movieReader = csv.reader(csvfile)
            next(movieReader)  #Skip header line
            for row in movieReader:
                movieID = int(row[0])
                genreList = row[2].split('|')
                genreIDList = []
                for genre in genreList:
                    if genre in genreIDs:
                        genreID = genreIDs[genre]
                    else:
                        genreID = maxGenreID
                        genreIDs[genre] = genreID
                        maxGenreID += 1
                    genreIDList.append(genreID)
                genres[movieID] = genreIDList
        # Convert integer-encoded genre lists to bitfields that we can treat as vectors
        for (movieID, genreIDList) in genres.items():
            bitfield = [0] * maxGenreID
            for genreID in genreIDList:
                bitfield[genreID] = 1
            genres[movieID] = bitfield

        return genres

    def getYears(self):
        p = re.compile(r"(?:\((\d{4})\))?\s*$")
        years = defaultdict(int)
        with open(self.moviesPath, newline='', encoding='ISO-8859-1') as csvfile:
            movieReader = csv.reader(csvfile)
            next(movieReader)
            for row in movieReader:
                movieID = int(row[0])
                title = row[1]
                m = p.search(title)
                year = m.group(1)
                if year:
                    years[movieID] = int(year)
        return years

    def getMiseEnScene(self):
        mes = defaultdict(list)
        with open("LLVisualFeatures13K_Log.csv", newline='') as csvfile:
            mesReader = csv.reader(csvfile)
            next(mesReader)
            for row in mesReader:
                movieID = int(row[0])
                avgShotLength = float(row[1])
                meanColorVariance = float(row[2])
                stddevColorVariance = float(row[3])
                meanMotion = float(row[4])
                stddevMotion = float(row[5])
                meanLightingKey = float(row[6])
                numShots = float(row[7])
                mes[movieID] = [avgShotLength, meanColorVariance, stddevColorVariance,
                   meanMotion, stddevMotion, meanLightingKey, numShots]
        return mes

    def getMovieName(self, movieID):
        if movieID in self.movieID_to_name:
            return self.movieID_to_name[movieID]
        else:
            return ""

    def getMovieID(self, movieName):
        if movieName in self.name_to_movieID:
            return self.name_to_movieID[movieName]
        else:
            return 0



In [None]:
# Pick an arbitrary test subject
testSubject = 500

In [None]:

ml = MovieLens()

# LOADING MOVIE RATING

In [None]:
data = ml.loadMovieLensLatestSmall()

# GETTING VIEWERS RATING

In [None]:
# getting user ratings
userRatings = ml.getUserRatings(testSubject)
loved = []
hated = []
for ratings in userRatings:
    if (float(ratings[1]) > 4.0):
        loved.append(ratings)
    if (float(ratings[1]) < 3.0):
        hated.append(ratings)

# MOVIES VIEWER LOVED AND THE ONE'S VIEWER DID NOT LIKE

In [None]:
print("\nUser ", testSubject, " These Movies were rated well")
for ratings in loved:
    print(ml.getMovieName(ratings[0]))
print("\n...and didn't like these movies:")
for ratings in hated:
    print(ml.getMovieName(ratings[0]))


User  500  These Movies were rated well
Shawshank Redemption, The (1994)
Forrest Gump (1994)
Breakfast Club, The (1985)
Secret of NIMH, The (1982)
Pretty in Pink (1986)
Life Is Beautiful (La Vita Ã¨ bella) (1997)
Cruel Intentions (1999)
10 Things I Hate About You (1999)
Chocolat (2000)
Amelie (Fabuleux destin d'AmÃ©lie Poulain, Le) (2001)
Love Actually (2003)
Pride and Prejudice (1995)
Wedding Date, The (2005)
Kiss Kiss Bang Bang (2005)
Accepted (2006)
Meet the Robinsons (2007)
How the Grinch Stole Christmas! (1966)
Stardust (2007)
Juno (2007)
Penelope (2006)
Nick and Norah's Infinite Playlist (2008)
Angus, Thongs and Perfect Snogging (2008)
Boy in the Striped Pajamas, The (Boy in the Striped Pyjamas, The) (2008)

...and didn't like these movies:
Toy Story (1995)
Jumanji (1995)
Babe (1995)
Clueless (1995)
Pocahontas (1995)
Casper (1995)
Star Wars: Episode IV - A New Hope (1977)
Star Trek: Generations (1994)
Ace Ventura: Pet Detective (1994)
Lion King, The (1994)
Mask, The (1994)
Juras

# BUILDING RECOMMENDATION MODEL

In [None]:
print("\nBuilding recommendation model...")
trainSet = data.build_full_trainset()


Building recommendation model...


# MODEL FITTING

In [None]:
# Trainig our mode
algo = SVD()
algo.fit(trainSet)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f5b6e8e8b50>

# CREATING RECOMMENDATIONS

In [None]:
print("Computing recommendations...")
testSet = BuildAntiTestSetForUser(testSubject, trainSet)
predictions = algo.test(testSet)

Computing recommendations...


In [None]:
# creating a list of recommendation
recommendations = []

# SYTEM RECOMMENDATION

In [None]:
print ("\nWe recommend:")
for userID, movieID, actualRating, estimatedRating, _ in predictions:
    intMovieID = int(movieID)
    recommendations.append((intMovieID, estimatedRating))

recommendations.sort(key=lambda x: x[1], reverse=True)

for ratings in recommendations[:10]:
    print(ml.getMovieName(ratings[0]))


We recommend:
Roger & Me (1989)
Indiana Jones and the Last Crusade (1989)
Raging Bull (1980)
Seven Samurai (Shichinin no samurai) (1954)
Monsters, Inc. (2001)
Dark Knight, The (2008)
12 Angry Men (1957)
Wolf of Wall Street, The (2013)
Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)
Happiness (1998)
