# Machine Learning: Movie Recommender System 
## Benjamin Shelton and Garrett Morris

### *Import Statements*


In [1]:
import csv
import numpy as np

### *Basic Housekeeping*
**NUM_USERS** should be obvious

**learning_rate** is how quickly our model learns the genres each user likes

**genre_key** is used to reference which index in the other lists containted in the dictionaries is which genre

**csv stuff** opens the excel files we're going to use-the encoding for the csv files has a line of garbage as the first line so that's why we pop(0)

In [2]:
NUM_USERS = 610
learning_rate = 0.3

genreKey = ['Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 
'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western', 'IMAX', '(no genres listed)']

mFile = open('./ml-latest-small/movies.csv', encoding = 'utf-8')
movieFile = list(csv.reader(mFile, delimiter = ','))
movieFile.pop(0)

rFile = open('./ml-latest-small/ratings.csv')
ratingsFile = list(csv.reader(rFile, delimiter = ','))
ratingsFile.pop(0)

['userId', 'movieId', 'rating', 'timestamp']

### *Dictionaries*
**movies**: Key = the movie number and the value is the name of the movie

**genres**: Key = the movie number and each movie number has a list of the genres that describe it.

**ratings**: Key = user ID (in this case 1-610) and the value is a tuple holding the movie # and the rating the user gave the movie

**MovieR**: Key is the movie number and the value is the average rating that people gave the movie

**genreX**: Key is the movie number and the value is an array of length *genreKey* where each index is a number from 0-1 (most of them are 0s) of how much of one genre the movie is. So if the movie was Drama and Action then it would be **0.5** Drama and **0.5** Action

**theta**: Key is the user # and the value is an array of length *genreKey* that has each user's genre preference scores (if they really like action then the action value will be the highest) 

In [3]:
#movies[movie#] = name
movies = {}
#genres[movie#] = [nGenres]
genres = {}
#ratings[user#] = (movie#, rating)
ratings = {}
#movieR[movie#] = rating
movieR = {}
#genreX[movie#] = [genre%]
genreX = {}
#theta[user#] = [genrePreferences]
theta = {}

### *Filling Dictionaries*


In [4]:
for line in movieFile:
    movies[line[0]] = line[1]
    genres[line[0]] = line[2].split('|')

for x in range(NUM_USERS):
    ratings[str(x+1)] = []
    theta[str(x+1)] = np.random.uniform(size=len(genreKey))

for line in ratingsFile:
    ratings[line[0]].append((line[1], line[2]))

for movie in movies:
    ratingAvg = 0
    numRatings = 1
    for u in ratings:
        for r in ratings[u]:
            if movie == ratings[u][0]:
                ratingAvg += ratings[u][1]
                numRatings += 1
    movieR[movie] = ratingAvg / numRatings
    if int(movie) % 1000 == 0:
        print(movie + " finished.")
        
print("Done!")

for k in movies:
    genreX[k] = np.zeros(len(genreKey))

counter = 0


for k in genres:
    for genre in genres[k]:
        counter += 1
    for genre in genres[k]:
        genreX[k][genreKey.index(genre)] = 1 / counter
    counter = 0

2000 finished.
3000 finished.
4000 finished.
7000 finished.
31000 finished.
53000 finished.
86000 finished.
Done!


### *Learning Algorithm*
<img src="GradDescent.png" style="width:1325px;height:150px;">


In [5]:
for k in ratings:
    m = len(ratings[k]) - 1
    movieSum = 0

    for x in range(1, (m-1)):
        y = ratings[k][x][1]
        movieSum += ((theta[k].T * genreX[ratings[k][x][0]]) - float(y)) * genreX[ratings[k][x][0]]

    theta[k] = theta[k] - learning_rate * movieSum

### *Interface* 

In [11]:
token = 0

token = input("Please input a user number (1-" + str(NUM_USERS) + "): ")
recommend = input("How many movie recommendations (if possible) would you like?: ")

try:
    favorite = 0
    favoriteIndex = 0
    sFavoriteIndex = 0
    index = 0

    for genre in theta[str(token)]:
        if genre > favorite:
            favorite = genre
            sFavoriteIndex = favoriteIndex
            favoriteIndex = index
        index += 1

    print("User " + str(token) + "'s favorite genre is: " + genreKey[favoriteIndex])

    recommendations = []
    gRatings = []

    for k in movies:
        watched = False
        for m in ratings[str(token)]:
            if m[0] == k:
                watched = True
        
        if (genreKey[favoriteIndex] in genres[k] and genreKey[sFavoriteIndex] in genres[k]) and not watched:
            gRating = np.sum(theta[str(token)] * genreX[k])
            if len(recommendations) < int(recommend):
                recommendations.append(movies[k])
                gRatings.append(gRating)
            else:
                swap = False
                counter = 0
                while(not swap and counter < len(recommendations)):
                    if gRatings[counter] < gRating:
                        recommendations[counter] = movies[k]
                        gRatings[counter] = gRating
                        swap = True
                    counter += 1
                    
    print(recommend + " recommendations for user " + token + ":")
    for movie in recommendations:
        print(movie)

except:
    print("Something went wrong")

Please input a user number (1-610): 3
How many movie recommendations (if possible) would you like?: 10
User 3's favorite genre is: Sci-Fi
10 recommendations for user 3:
Terminator 2: Judgment Day (1991)
Barb Wire (1996)
Star Trek V: The Final Frontier (1989)
Starship Troopers (1997)
King Kong vs. Godzilla (Kingukongu tai Gojira) (1962)
Beneath the Planet of the Apes (1970)
Battle for the Planet of the Apes (1973)
Conquest of the Planet of the Apes (1972)
Escape from the Planet of the Apes (1971)
Wing Commander (1999)
