In [2]:
import pandas as pd
import numpy as np

In [68]:
# Getting the movies data
movies = pd.read_csv("dataset/movies.csv")
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [69]:
# Getting the movies rating by each user
ratings = pd.read_csv("dataset/ratings.csv")
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,169,2.5,1204927694
1,1,2471,3.0,1204927438
2,1,48516,5.0,1204927435
3,2,2571,3.5,1436165433
4,2,109487,4.0,1436165496


In [70]:
# Getting the user details
#users = pd.read_csv("dataset/links.csv")
#users.head()

### Data Preprocessing

In [71]:
# Fixing the title and extracting year from it
movies['year'] = movies.title.str.extract("(\d\d\d\d)", expand=False)
movies.head()
movies['title'] = movies.title.str.replace('(\(\d\d\d\d\))','')
movies['title'] = movies.title.apply(lambda d: d.strip())
movies.head()

Unnamed: 0,movieId,title,genres,year
0,1,Toy Story,Adventure|Animation|Children|Comedy|Fantasy,1995
1,2,Jumanji,Adventure|Children|Fantasy,1995
2,3,Grumpier Old Men,Comedy|Romance,1995
3,4,Waiting to Exhale,Comedy|Drama|Romance,1995
4,5,Father of the Bride Part II,Comedy,1995


In [72]:
# Splitting the genres text into list of genres
movies['genres'] = movies.genres.str.split('|')
movies.head()

Unnamed: 0,movieId,title,genres,year
0,1,Toy Story,"[Adventure, Animation, Children, Comedy, Fantasy]",1995
1,2,Jumanji,"[Adventure, Children, Fantasy]",1995
2,3,Grumpier Old Men,"[Comedy, Romance]",1995
3,4,Waiting to Exhale,"[Comedy, Drama, Romance]",1995
4,5,Father of the Bride Part II,[Comedy],1995


In [73]:
# Creating a column for each genres in the movieWithGenreTable

movie_with_genre = movies.copy()
movie_with_genre

for index, row in movie_with_genre.iterrows():
    for genre in row['genres']:
        movie_with_genre.at[index, genre] = 1

movie_with_genre = movie_with_genre.fillna(0)
movie_with_genre.head()

Unnamed: 0,movieId,title,genres,year,Adventure,Animation,Children,Comedy,Fantasy,Romance,...,Horror,Mystery,Sci-Fi,IMAX,Documentary,War,Musical,Western,Film-Noir,(no genres listed)
0,1,Toy Story,"[Adventure, Animation, Children, Comedy, Fantasy]",1995,1.0,1.0,1.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,Jumanji,"[Adventure, Children, Fantasy]",1995,1.0,0.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,Grumpier Old Men,"[Comedy, Romance]",1995,0.0,0.0,0.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,Waiting to Exhale,"[Comedy, Drama, Romance]",1995,0.0,0.0,0.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,Father of the Bride Part II,[Comedy],1995,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [74]:
# Removing the timestamp column as it will not be useful in a simple recommendation system
ratings = ratings.drop("timestamp", axis=1)
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,169,2.5
1,1,2471,3.0
2,1,48516,5.0
3,2,2571,3.5
4,2,109487,4.0


In [75]:
# Lets take a user input

userInput = [
    {"title" : "Toy Story", "rating" : 10},
    {"title" : "Jumanji", 'rating' , 9},
    {'title' : "Paranormal ActivitY: The Marked Ones", 8},
    {'title' : 'Avengers, The', 9},
    {'title' : "Dark Knight Rises, The", 10},
    {'title' : "Insidious Chapter 3", 8},
    {'title' : "Avengers: Age of Ultron", 8},
    {'title' : "G.I. Joe: Retaliation", 9}
]

SyntaxError: invalid syntax (<ipython-input-75-3f3b0577ccfd>, line 5)

In [103]:
movies[movies['year'] == '2013'].head(50)

Unnamed: 0,movieId,title,genres,year
19277,95595,Bela Kiss: Prologue,"[Horror, Mystery, Thriller]",2013
20158,99007,Warm Bodies,"[Comedy, Horror, Romance]",2013
20236,99335,Addicted,[Drama],2013
20339,99721,Texas Chainsaw 3D,"[Horror, Mystery, Thriller]",2013
20342,99728,Gangster Squad,"[Action, Crime, Drama]",2013
20360,99787,"Haunted House, A","[Comedy, Horror]",2013
20369,99813,"Batman: The Dark Knight Returns, Part 2","[Action, Animation]",2013
20399,99910,"Last Stand, The","[Action, Crime, Thriller]",2013
20400,99912,Mama,[Horror],2013
20402,99917,Upstream Color,"[Romance, Sci-Fi, Thriller]",2013
