In [1]:
from bs4 import BeautifulSoup as bs
from functools import reduce
from pandas.io.json import json_normalize
import json
import matplotlib as mpl
import pandas as pd
import re
import requests
import seaborn as sns
import unicodedata as uni
import os

from IPython.display import display
pd.options.display.max_columns = None

In [2]:
def read_json_to_dict(filename, path_to_file = None, encoding = 'utf-8'):
    
    path_to_file = reduce(lambda x, y: x if x is not None else y, [path_to_file, os.getcwd().replace('\\', '/')])
    
    if path_to_file[-1:] != '/':
        path_to_file = path_to_file + '/'
        
    full_file_path = path_to_file + '/' + filename
    
    with open(full_file_path, 'r', encoding = encoding) as file:
        data = json.load(file)
    
    return data
        

In [3]:
linneburg_ratings = read_json_to_dict('linneburg.json', 'movie_jsons')

In [4]:
def flatten_crew_dict(movie_dict):

    crew_list = movie_dict['crew']['crew_list']
    crew_wanted_dict = {'director':2, 'producer':3, 'writer':1, 'composer':1}
    flat_crew_dict = {}

    for role, nbr in crew_wanted_dict.items():
        inner_list = []
        try:
            filtered_crew_dict = list(filter(lambda input_list: input_list['crew_role'] == role, crew_list))[0]
        except:
            continue

        for i in range(nbr):

            try:
                flat_crew_dict[role + '_' + str(i)] = filtered_crew_dict['crew_attributes'][i]['name']
            except IndexError:
                flat_crew_dict[role + '_' + str(i)] = None

    flat_crew_dict['total_crew_size'] = movie_dict['crew']['total_crew']
    flat_crew_dict['unique_crew_size'] = movie_dict['crew']['total_unique_crew']

    return flat_crew_dict

In [5]:
def flatten_cast_dict(movie_dict):

    cast_list = movie_dict['cast']['cast_list']
    flat_cast_dict = {}
    cast_to_retrieve = 5

    for i in range(min(cast_to_retrieve, movie_dict['cast']['total_cast'])):
        try:
            flat_cast_dict['actor_'+str(i)] = cast_list[i]['actor_name']
        except:
            flat_cast_dict['actor_'+str(i)] = None


    flat_cast_dict['total_cast_size'] = movie_dict['cast']['total_cast']
    return flat_cast_dict

In [6]:
def flatten_genre_list(movie_dict):

    genres_list = movie_dict['genres']
    flat_genres_dict = {}
    for i in range(3):
        try:
            flat_genres_dict['genre_'+str(i)] = genres_list[i]
        except IndexError:
            flat_genres_dict['genre_'+str(i)] = None

    return flat_genres_dict

In [7]:
def flatten_ratings_list(movie_dict):

    ratings_list = movie_dict['rating_counts']
    flat_ratings_dict = {}


    for i in range(len(ratings_list)):
        flat_ratings_dict[str(ratings_list[i]['rating']) + '_stars'] = ratings_list[i]['number_of_ratings']

    flat_ratings_dict['total'] = sum(list(flat_ratings_dict.values()))
    return flat_ratings_dict

In [8]:
def main(movie_list):
    
    movies_to_add = movie_list
    return_movie_list = []

    for i in range(len(movies_to_add)):
        movie_dict = {}
        movie_dict['title'] = movies_to_add[i]['title']
        movie_dict.update(flatten_genre_list(movies_to_add[i]))
        movie_dict['avg_rating'] = movies_to_add[i]['avg_rating']
        movie_dict.update(flatten_ratings_list(movies_to_add[i]))
        movie_dict['user_rating'] = movies_to_add[i]['user_rating']
        movie_dict.update(flatten_crew_dict(movies_to_add[i]))
        movie_dict.update(flatten_cast_dict(movies_to_add[i])) 
        
        return_movie_list.append(movie_dict)
        
    return return_movie_list

In [17]:
flat_dict = main(linneburg_ratings['movies_rated'])
df = json_normalize(flat_dict).reindex(columns=list(flat_dict[0].keys()))
df

Unnamed: 0,title,genre_0,genre_1,genre_2,avg_rating,0.5_stars,1.0_stars,1.5_stars,2.0_stars,2.5_stars,3.0_stars,3.5_stars,4.0_stars,4.5_stars,5.0_stars,total,user_rating,director_0,director_1,producer_0,producer_1,producer_2,writer_0,composer_0,total_crew_size,unique_crew_size,actor_0,actor_1,actor_2,actor_3,actor_4,total_cast_size
0,Chernobyl,drama,,,4.5,64,66,42,215,260,1231,2336,11318,14205,26662,56399,4.5,Johan Renck,,Jane Featherstone,Carolyn Strauss,Craig Mazin,Craig Mazin,Hildur Guðnadóttir,30,29,Jared Harris,Stellan Skarsgård,Emily Watson,Paul Ritter,Jessie Buckley,18
1,The Empire Strikes Back,action,science fiction,adventure,4.4,198,365,205,1423,1573,11539,13089,48833,34684,102819,214728,5.0,Irvin Kershner,,George Lucas,Gary Kurtz,Howard G. Kazanjian,George Lucas,John Williams,29,27,Mark Hamill,Harrison Ford,Carrie Fisher,Billy Dee Williams,Anthony Daniels,75
2,The Shawshank Redemption,drama,crime,,4.4,242,363,284,1334,1453,8604,11438,41785,32994,83111,181608,5.0,Frank Darabont,,Niki Marvin,David V. Lester,Liz Glotzer,Stephen King,Thomas Newman,23,22,Tim Robbins,Morgan Freeman,Bob Gunton,Clancy Brown,Mark Rolston,44
3,The Dark Knight,crime,drama,thriller,4.4,359,756,450,2656,2568,16090,17666,73261,54899,146010,314715,5.0,Christopher Nolan,,Charles Roven,Christopher Nolan,Emma Thomas,Christopher Nolan,Hans Zimmer,36,34,Christian Bale,Michael Caine,Heath Ledger,Gary Oldman,Aaron Eckhart,135
4,The Lord of the Rings: The Return of the King,action,adventure,fantasy,4.3,283,825,373,2482,2102,12350,12124,40369,28211,77537,176656,4.5,Peter Jackson,,Barrie M. Osborne,Peter Jackson,Bob Weinstein,Peter Jackson,Howard Shore,72,69,Elijah Wood,Ian McKellen,Viggo Mortensen,Sean Astin,Liv Tyler,79
5,The Lord of the Rings: The Fellowship of the Ring,action,adventure,fantasy,4.3,405,1095,475,3123,2449,15454,15397,54257,33041,83171,208867,5.0,Peter Jackson,,Barrie M. Osborne,Peter Jackson,Bob Weinstein,Peter Jackson,Howard Shore,73,70,Elijah Wood,Ian McKellen,Viggo Mortensen,Sean Astin,Liv Tyler,80
6,Star Wars,adventure,action,science fiction,4.3,273,558,310,2205,2501,17671,20814,68227,36510,82569,231638,4.5,George Lucas,,George Lucas,Gary Kurtz,Robert Watts,George Lucas,John Williams,28,26,Mark Hamill,Harrison Ford,Carrie Fisher,Peter Cushing,Alec Guinness,107
7,The Lion King,family,animation,drama,4.3,197,471,359,1994,2253,14307,15835,48711,25117,66009,175253,4.5,Roger Allers,Rob Minkoff,Don Hahn,Sarah McArthur,Thomas Schumacher,Joe Ranft,Hans Zimmer,40,38,Rowan Atkinson,Matthew Broderick,Niketa Calame,Jim Cummings,Whoopi Goldberg,29
8,The Lord of the Rings: The Two Towers,fantasy,adventure,action,4.3,301,765,431,2588,2294,13205,13994,45462,28775,62415,170230,4.5,Peter Jackson,,Barrie M. Osborne,Peter Jackson,Bob Weinstein,Peter Jackson,Howard Shore,76,73,Elijah Wood,Ian McKellen,Viggo Mortensen,Sean Astin,Liv Tyler,79
9,Back to the Future,comedy,adventure,science fiction,4.2,139,418,297,2113,2270,16876,18771,57241,27237,69349,194711,4.5,Robert Zemeckis,,Steven Spielberg,Kathleen Kennedy,Frank Marshall,Robert Zemeckis,Alan Silvestri,27,25,Michael J. Fox,Christopher Lloyd,Lea Thompson,Crispin Glover,Thomas F. Wilson,51
