In [1]:
# Importing Potentially Used Basic Dependencies
import pandas as pd
import numpy as np
from pathlib import Path
from collections import Counter

# Import Dependencies for Algorithms
from sklearn.svm import SVR
from sklearn.linear_model import Lasso
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.tree import ExtraTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [2]:
tags_file_path = ('Data/ratings.csv')
ratings_file_path = ('Data/tags.csv')
movies_file_path = ('Data/movies.csv')
tags_df = pd.read_csv(tags_file_path)
ratings_df = pd.read_csv(ratings_file_path)
movies_df = pd.read_csv(movies_file_path)

In [3]:
tags_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,307,3.5,1256677221
1,1,481,3.5,1256677456
2,1,1091,1.5,1256677471
3,1,1257,4.5,1256677460
4,1,1449,4.5,1256677264


In [4]:
ratings_df.head()

Unnamed: 0,userId,movieId,tag,timestamp
0,14,110,epic,1443148538
1,14,110,Medieval,1443148532
2,14,260,sci-fi,1442169410
3,14,260,space action,1442169421
4,14,318,imdb top 250,1442615195


In [5]:
movies_df = movies_df.set_index(['movieId'])
movies_df.head()

Unnamed: 0_level_0,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy


In [6]:
tags = tags_df.drop(columns=['userId', 'timestamp'])
tags = tags.set_index(["movieId"])
tag_df = tags.iloc[:50000]
tag_df.count()
tag_df.head()

Unnamed: 0_level_0,rating
movieId,Unnamed: 1_level_1
307,3.5
481,3.5
1091,1.5
1257,4.5
1449,4.5


In [7]:
ratings = ratings_df.drop(columns=['userId', 'timestamp'])
ratings = ratings.set_index(["movieId"])
rating_df = ratings.iloc[:50000]
rating_df.count()

tag    50000
dtype: int64

In [8]:
df = rating_df.join(tag_df, on="movieId", how="right")
df.head()

Unnamed: 0,movieId,tag,rating
307.0,307,family,3.5
307.0,307,reflective,3.5
,481,,3.5
1091.0,1091,Quirky and funny.,1.5
1091.0,1091,catastrophe,1.5


In [9]:
movies = df.join(movies_df, on="movieId", sort=True)
movies.reset_index(inplace=True)
movies = movies.dropna()
movies = movies.drop(columns="index")

Unnamed: 0,movieId,tag,rating,title,genres
1143141,189363,women,3.0,Ocean's 8 (2018),Action|Comedy|Crime|Thriller
1143142,189363,action,3.0,Ocean's 8 (2018),Action|Comedy|Crime|Thriller
1143143,189363,all-female,3.0,Ocean's 8 (2018),Action|Comedy|Crime|Thriller
1143144,189363,all-star cast,3.0,Ocean's 8 (2018),Action|Comedy|Crime|Thriller
1143145,189363,anne hathaway,3.0,Ocean's 8 (2018),Action|Comedy|Crime|Thriller
1143146,189363,Cate Blanchett,3.0,Ocean's 8 (2018),Action|Comedy|Crime|Thriller
1143147,189363,comedy,3.0,Ocean's 8 (2018),Action|Comedy|Crime|Thriller
1143148,189363,crime,3.0,Ocean's 8 (2018),Action|Comedy|Crime|Thriller
1143149,189363,franchise,3.0,Ocean's 8 (2018),Action|Comedy|Crime|Thriller
1143150,189363,heist,3.0,Ocean's 8 (2018),Action|Comedy|Crime|Thriller


In [10]:
len(movies["movieId"].unique())

3513

In [11]:
film_ratings = movies.groupby("movieId").mean().reset_index()
film_ratings.head()

Unnamed: 0,movieId,rating
0,1,3.896552
1,2,3.119565
2,4,4.333333
3,6,3.764706
4,7,2.934783


In [17]:
genres = movies.groupby("genres")
genres.head()

Unnamed: 0,movieId,tag,rating,title,genres
0,1,animated,4.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,1,buddy movie,4.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,1,Cartoon,4.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,1,cgi,4.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,1,comedy,4.0,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
...,...,...,...,...,...
1143095,187595,cinematography,3.5,Solo: A Star Wars Story (2018),Action|Adventure|Children|Sci-Fi
1143096,187595,Donald Glover,3.5,Solo: A Star Wars Story (2018),Action|Adventure|Children|Sci-Fi
1143097,187595,Emilia Clarke,3.5,Solo: A Star Wars Story (2018),Action|Adventure|Children|Sci-Fi
1143098,187595,prequel,3.5,Solo: A Star Wars Story (2018),Action|Adventure|Children|Sci-Fi
