# Project 2: Data Import - Working with Web APIs and JSON (Movies Dataset)

## Importing Data from JSON files 

In [1]:
import pandas as pd
import json

In [17]:
# opens the file and loads it with json.load
with open("blockbusters.json") as f:
    data = json.load(f)

In [3]:
## view the data
data

[{'title': 'Avengers: Endgame',
  'id': 299534,
  'revenue': 2797800564,
  'genres': [{'id': 12, 'name': 'Adventure'},
   {'id': 878, 'name': 'Science Fiction'},
   {'id': 28, 'name': 'Action'}],
  'belongs_to_collection': {'id': 86311,
   'name': 'The Avengers Collection',
   'poster_path': '/yFSIUVTCvgYrpalUktulvk3Gi5Y.jpg',
   'backdrop_path': '/zuW6fOiusv4X9nnW3paHGfXcSll.jpg'},
  'runtime': 181},
 {'title': 'Avatar',
  'id': 19995,
  'revenue': 2787965087,
  'genres': [{'id': 28, 'name': 'Action'},
   {'id': 12, 'name': 'Adventure'},
   {'id': 14, 'name': 'Fantasy'},
   {'id': 878, 'name': 'Science Fiction'}],
  'belongs_to_collection': {'id': 87096,
   'name': 'Avatar Collection',
   'poster_path': '/nslJVsO58Etqkk17oXMuVK4gNOF.jpg',
   'backdrop_path': '/8nCr9W7sKus2q9PLbYsnT7iCkuT.jpg'},
  'runtime': 162},
 {'title': 'Star Wars: The Force Awakens',
  'id': 140607,
  'revenue': 2068223624,
  'genres': [{'id': 28, 'name': 'Action'},
   {'id': 12, 'name': 'Adventure'},
   {'id': 8

In [4]:
## and the type
type(data)

list

In [5]:
# and the length
len(data)

18

In [6]:
# okay, what's the first look like
data[0]

{'title': 'Avengers: Endgame',
 'id': 299534,
 'revenue': 2797800564,
 'genres': [{'id': 12, 'name': 'Adventure'},
  {'id': 878, 'name': 'Science Fiction'},
  {'id': 28, 'name': 'Action'}],
 'belongs_to_collection': {'id': 86311,
  'name': 'The Avengers Collection',
  'poster_path': '/yFSIUVTCvgYrpalUktulvk3Gi5Y.jpg',
  'backdrop_path': '/zuW6fOiusv4X9nnW3paHGfXcSll.jpg'},
 'runtime': 181}

In [7]:
# can convert dictionary to a dataframe quite easily
df = pd.DataFrame(data)
df

Unnamed: 0,title,id,revenue,genres,belongs_to_collection,runtime
0,Avengers: Endgame,299534,2797800564,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...","{'id': 86311, 'name': 'The Avengers Collection...",181
1,Avatar,19995,2787965087,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 87096, 'name': 'Avatar Collection', 'po...",162
2,Star Wars: The Force Awakens,140607,2068223624,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 10, 'name': 'Star Wars Collection', 'po...",136
3,Avengers: Infinity War,299536,2046239637,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...","{'id': 86311, 'name': 'The Avengers Collection...",149
4,Titanic,597,1845034188,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,194
5,Jurassic World,135397,1671713208,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 328, 'name': 'Jurassic Park Collection'...",124
6,The Lion King,420818,1656943394,"[{'id': 12, 'name': 'Adventure'}, {'id': 10751...",,118
7,The Avengers,24428,1519557910,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...","{'id': 86311, 'name': 'The Avengers Collection...",143
8,Furious 7,168259,1506249360,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...","{'id': 9485, 'name': 'The Fast and the Furious...",137
9,Avengers: Age of Ultron,99861,1405403694,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 86311, 'name': 'The Avengers Collection...",141


In [8]:
# saves us having to load the json file in seperately - can't simply pass split data - can use orient param though - normalize still doesn't work
df = pd.read_json("blockbusters.json", orient = "record")
df

Unnamed: 0,title,id,revenue,genres,belongs_to_collection,runtime
0,Avengers: Endgame,299534,2797800564,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...","{'id': 86311, 'name': 'The Avengers Collection...",181
1,Avatar,19995,2787965087,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 87096, 'name': 'Avatar Collection', 'po...",162
2,Star Wars: The Force Awakens,140607,2068223624,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 10, 'name': 'Star Wars Collection', 'po...",136
3,Avengers: Infinity War,299536,2046239637,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...","{'id': 86311, 'name': 'The Avengers Collection...",149
4,Titanic,597,1845034188,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,194
5,Jurassic World,135397,1671713208,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 328, 'name': 'Jurassic Park Collection'...",124
6,The Lion King,420818,1656943394,"[{'id': 12, 'name': 'Adventure'}, {'id': 10751...",,118
7,The Avengers,24428,1519557910,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...","{'id': 86311, 'name': 'The Avengers Collection...",143
8,Furious 7,168259,1506249360,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...","{'id': 9485, 'name': 'The Fast and the Furious...",137
9,Avengers: Age of Ultron,99861,1405403694,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 86311, 'name': 'The Avengers Collection...",141


In [9]:
# what are the columns and their non-null types
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18 entries, 0 to 17
Data columns (total 6 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   title                  18 non-null     object
 1   id                     18 non-null     int64 
 2   revenue                18 non-null     int64 
 3   genres                 18 non-null     object
 4   belongs_to_collection  15 non-null     object
 5   runtime                18 non-null     int64 
dtypes: int64(3), object(3)
memory usage: 992.0+ bytes


In [10]:
# looks at the genres
df.genres[0]

[{'id': 12, 'name': 'Adventure'},
 {'id': 878, 'name': 'Science Fiction'},
 {'id': 28, 'name': 'Action'}]

In [11]:
# what's the collect to collection columns look like
df.belongs_to_collection[0]

{'id': 86311,
 'name': 'The Avengers Collection',
 'poster_path': '/yFSIUVTCvgYrpalUktulvk3Gi5Y.jpg',
 'backdrop_path': '/zuW6fOiusv4X9nnW3paHGfXcSll.jpg'}

In [13]:
# we can flatten json data like this - can't simply use normalize on the column orientation unfortunately
pd.json_normalize(data = data, sep = "_")


Unnamed: 0,title,id,revenue,genres,runtime,belongs_to_collection_id,belongs_to_collection_name,belongs_to_collection_poster_path,belongs_to_collection_backdrop_path,belongs_to_collection
0,Avengers: Endgame,299534,2797800564,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...",181,86311.0,The Avengers Collection,/yFSIUVTCvgYrpalUktulvk3Gi5Y.jpg,/zuW6fOiusv4X9nnW3paHGfXcSll.jpg,
1,Avatar,19995,2787965087,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",162,87096.0,Avatar Collection,/nslJVsO58Etqkk17oXMuVK4gNOF.jpg,/8nCr9W7sKus2q9PLbYsnT7iCkuT.jpg,
2,Star Wars: The Force Awakens,140607,2068223624,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",136,10.0,Star Wars Collection,/iTQHKziZy9pAAY4hHEDCGPaOvFC.jpg,/d8duYyyC9J5T825Hg7grmaabfxQ.jpg,
3,Avengers: Infinity War,299536,2046239637,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",149,86311.0,The Avengers Collection,/yFSIUVTCvgYrpalUktulvk3Gi5Y.jpg,/zuW6fOiusv4X9nnW3paHGfXcSll.jpg,
4,Titanic,597,1845034188,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",194,,,,,
5,Jurassic World,135397,1671713208,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",124,328.0,Jurassic Park Collection,/qIm2nHXLpBBdMxi8dvfrnDkBUDh.jpg,/pJjIH9QN0OkHFV9eue6XfRVnPkr.jpg,
6,The Lion King,420818,1656943394,"[{'id': 12, 'name': 'Adventure'}, {'id': 10751...",118,,,,,
7,The Avengers,24428,1519557910,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",143,86311.0,The Avengers Collection,/yFSIUVTCvgYrpalUktulvk3Gi5Y.jpg,/zuW6fOiusv4X9nnW3paHGfXcSll.jpg,
8,Furious 7,168259,1506249360,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...",137,9485.0,The Fast and the Furious Collection,/uv63yAGg1zETAs1XQsOQpava87l.jpg,/z5A5W3WYJc3UVEWljSGwdjDgQ0j.jpg,
9,Avengers: Age of Ultron,99861,1405403694,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",141,86311.0,The Avengers Collection,/yFSIUVTCvgYrpalUktulvk3Gi5Y.jpg,/zuW6fOiusv4X9nnW3paHGfXcSll.jpg,


In [14]:
## didn't work for genres
pd.json_normalize(data = data, sep = "_").genres[0]

[{'id': 12, 'name': 'Adventure'},
 {'id': 878, 'name': 'Science Fiction'},
 {'id': 28, 'name': 'Action'}]

In [16]:
## but can go deeper - record path - meta allows us to pass columns from original dataframe
## so now we have id in both, so we should use a record prefix to differentiate genre
## so now have a new df from genre columns
pd.json_normalize(data = data, record_path = "genres", meta = ["title", "id"],
                  record_prefix = "genre_")

Unnamed: 0,genre_id,genre_name,title,id
0,12,Adventure,Avengers: Endgame,299534
1,878,Science Fiction,Avengers: Endgame,299534
2,28,Action,Avengers: Endgame,299534
3,28,Action,Avatar,19995
4,12,Adventure,Avatar,19995
5,14,Fantasy,Avatar,19995
6,878,Science Fiction,Avatar,19995
7,28,Action,Star Wars: The Force Awakens,140607
8,12,Adventure,Star Wars: The Force Awakens,140607
9,878,Science Fiction,Star Wars: The Force Awakens,140607


In [None]:
# records, columns and split are the different record types
# records for rows
# columns orientation
# split orientation

## Working with APIs and JSON (Part 1)

In [18]:
api_key  = "api_key=a30d5ffe068c4cb0aecfc349d42fd161"

In [None]:
#example: assume your personal api-key is "123abc"
#api_key  = "api_key=123abc"
#api_key

In [19]:
import pandas as pd
import requests
pd.options.display.max_columns = 30

In [20]:
movie_id = 140607

In [21]:
movie_api = "https://api.themoviedb.org/3/movie/{}?"
movie_api

'https://api.themoviedb.org/3/movie/{}?'

In [22]:
url = movie_api.format(movie_id) + api_key
url

'https://api.themoviedb.org/3/movie/140607?api_key=a30d5ffe068c4cb0aecfc349d42fd161'

In [23]:
r = requests.get(url)
r

<Response [200]>

In [24]:
data = r.json()

In [25]:
data

{'adult': False,
 'backdrop_path': '/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg',
 'belongs_to_collection': {'id': 10,
  'name': 'Star Wars Collection',
  'poster_path': '/r8Ph5MYXL04Qzu4QBbq2KjqwtkQ.jpg',
  'backdrop_path': '/d8duYyyC9J5T825Hg7grmaabfxQ.jpg'},
 'budget': 245000000,
 'genres': [{'id': 28, 'name': 'Action'},
  {'id': 12, 'name': 'Adventure'},
  {'id': 878, 'name': 'Science Fiction'},
  {'id': 14, 'name': 'Fantasy'}],
 'homepage': 'http://www.starwars.com/films/star-wars-episode-vii',
 'id': 140607,
 'imdb_id': 'tt2488496',
 'original_language': 'en',
 'original_title': 'Star Wars: The Force Awakens',
 'overview': 'Thirty years after defeating the Galactic Empire, Han Solo and his allies face a new threat from the evil Kylo Ren and his army of Stormtroopers.',
 'popularity': 31.055,
 'poster_path': '/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg',
 'production_companies': [{'id': 1634,
   'logo_path': None,
   'name': 'Truenorth Productions',
   'origin_country': 'IS'},
  {'id': 1,
   'logo_path

In [26]:
type(data)

dict

In [None]:
#pd.DataFrame(data)

In [27]:
pd.Series(data)

adult                                                                False
backdrop_path                             /k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg
belongs_to_collection    {'id': 10, 'name': 'Star Wars Collection', 'po...
budget                                                           245000000
genres                   [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...
homepage                 http://www.starwars.com/films/star-wars-episod...
id                                                                  140607
imdb_id                                                          tt2488496
original_language                                                       en
original_title                                Star Wars: The Force Awakens
overview                 Thirty years after defeating the Galactic Empi...
popularity                                                          31.055
poster_path                               /wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg
production_companies     

In [30]:
# creates a pandas series and converts it to a dataframe, and then transpose it
df = pd.Series(data).to_frame().T
df

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg,"{'id': 10, 'name': 'Star Wars Collection', 'po...",245000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",http://www.starwars.com/films/star-wars-episod...,140607,tt2488496,en,Star Wars: The Force Awakens,Thirty years after defeating the Galactic Empi...,31.055,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,"[{'id': 1634, 'logo_path': None, 'name': 'True...","[{'iso_3166_1': 'US', 'name': 'United States o...",2015-12-15,2068223624,136,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Every generation has a story.,Star Wars: The Force Awakens,False,7.4,14641


In [31]:
# can also just pass it to normalize - still doesn't work for genre just like before
pd.json_normalize(data, sep = "_")

Unnamed: 0,adult,backdrop_path,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,belongs_to_collection_id,belongs_to_collection_name,belongs_to_collection_poster_path,belongs_to_collection_backdrop_path
0,False,/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg,245000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",http://www.starwars.com/films/star-wars-episod...,140607,tt2488496,en,Star Wars: The Force Awakens,Thirty years after defeating the Galactic Empi...,31.055,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,"[{'id': 1634, 'logo_path': None, 'name': 'True...","[{'iso_3166_1': 'US', 'name': 'United States o...",2015-12-15,2068223624,136,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Every generation has a story.,Star Wars: The Force Awakens,False,7.4,14641,10,Star Wars Collection,/r8Ph5MYXL04Qzu4QBbq2KjqwtkQ.jpg,/d8duYyyC9J5T825Hg7grmaabfxQ.jpg


In [32]:
pd.json_normalize(data = data, record_path = "genres", meta = "title")

Unnamed: 0,id,name,title
0,28,Action,Star Wars: The Force Awakens
1,12,Adventure,Star Wars: The Force Awakens
2,878,Science Fiction,Star Wars: The Force Awakens
3,14,Fantasy,Star Wars: The Force Awakens


In [33]:
pd.json_normalize(data = data, record_path = "production_companies", meta = "title")

Unnamed: 0,id,logo_path,name,origin_country,title
0,1634,,Truenorth Productions,IS,Star Wars: The Force Awakens
1,1,/o86DbpburjxrqAzEDhXZcyE8pDb.png,Lucasfilm Ltd.,US,Star Wars: The Force Awakens
2,11461,/p9FoEt5shEKRWRKVIlvFaEmRnun.png,Bad Robot,US,Star Wars: The Force Awakens


## Working with APIs and JSON (Part 2)

In [34]:
import pandas as pd
import requests
pd.options.display.max_columns = 30

In [35]:
discover_api = "https://api.themoviedb.org/3/discover/movie?"

In [36]:
# just iterate page if you want to increment
query = "&primary_release_date.gte=2020-01-01&primary_release_date.lte=2020-02-29&page=2"

In [37]:
url = discover_api+api_key+query

In [38]:
data = requests.get(url).json()

In [39]:
data

{'page': 2,
 'total_results': 2838,
 'total_pages': 142,
 'results': [{'popularity': 18.799,
   'vote_count': 350,
   'video': False,
   'poster_path': '/uiMHiHp9eAjJty8rddoUnL9G5fU.jpg',
   'id': 492611,
   'adult': False,
   'backdrop_path': '/lYJDf7oa1r9JvRITN1M2Lzis01D.jpg',
   'original_language': 'en',
   'original_title': 'The Turning',
   'genre_ids': [27, 53],
   'title': 'The Turning',
   'vote_average': 5.9,
   'overview': "A young woman quits her teaching job to be a private tutor (governess) for a wealthy young heiress who witnessed her parent's tragic death. Shortly after arriving, the girl's degenerate brother is sent home from his boarding school. The tutor has some strange, unexplainable experiences in the house and begins to suspect there is more to their story.",
   'release_date': '2020-01-23'},
  {'popularity': 20.427,
   'vote_count': 167,
   'video': False,
   'poster_path': '/spTr0VYyRtl36Lkk6nCnnbFXhus.jpg',
   'id': 466622,
   'adult': False,
   'backdrop_path

In [40]:
pd.DataFrame(data)

Unnamed: 0,page,total_results,total_pages,results
0,2,2838,142,"{'popularity': 18.799, 'vote_count': 350, 'vid..."
1,2,2838,142,"{'popularity': 20.427, 'vote_count': 167, 'vid..."
2,2,2838,142,"{'popularity': 27.593, 'vote_count': 152, 'vid..."
3,2,2838,142,"{'popularity': 17.172, 'vote_count': 1345, 'vi..."
4,2,2838,142,"{'popularity': 20.519, 'vote_count': 4, 'video..."
5,2,2838,142,"{'popularity': 22.776, 'vote_count': 83, 'vide..."
6,2,2838,142,"{'popularity': 18.601, 'vote_count': 1269, 'vi..."
7,2,2838,142,"{'popularity': 18.915, 'vote_count': 570, 'vid..."
8,2,2838,142,"{'popularity': 19.412, 'vote_count': 486, 'vid..."
9,2,2838,142,"{'popularity': 18.529, 'vote_count': 41, 'vide..."


In [41]:
pd.DataFrame(data["results"])

Unnamed: 0,popularity,vote_count,video,poster_path,id,adult,backdrop_path,original_language,original_title,genre_ids,title,vote_average,overview,release_date
0,18.799,350,False,/uiMHiHp9eAjJty8rddoUnL9G5fU.jpg,492611,False,/lYJDf7oa1r9JvRITN1M2Lzis01D.jpg,en,The Turning,"[27, 53]",The Turning,5.9,A young woman quits her teaching job to be a p...,2020-01-23
1,20.427,167,False,/spTr0VYyRtl36Lkk6nCnnbFXhus.jpg,466622,False,/a7685JYpZ6rmgkbFD3UVV5vNuhx.jpg,en,The Rhythm Section,"[28, 53]",The Rhythm Section,5.6,After the death of her family in an airplane c...,2020-01-31
2,27.593,152,False,/gmt3HRRgtsio4nRpaHZVKOTwH8W.jpg,666750,False,/6mKAKhj8POVGqV1GsroS5mGIUe9.jpg,en,Dragonheart: Vengeance,[14],Dragonheart: Vengeance,6.6,"Lukas, a young farmer whose family is killed b...",2020-02-04
3,17.172,1345,False,/4SafxuMKQiw4reBiWKVZJpJn80I.jpg,342470,False,/tcrNJfyNEIqaBR8Ogkgnq5xQJnf.jpg,en,All the Bright Places,"[18, 10749]",All the Bright Places,7.6,Two teens facing personal struggles form a pow...,2020-02-28
4,20.519,4,False,/5aueUTOvA8Y6A4eteDwhgo8ReX8.jpg,653668,False,/n0uefHZH2sX0HxU5Ec1w1tF8n3k.jpg,en,Miss Juneteenth,[18],Miss Juneteenth,3.5,"Turquoise, a former beauty queen turned hardwo...",2020-01-24
5,22.776,83,False,/hPWjid7yMatyIDHvku7lCMN7zSi.jpg,526007,False,/kO651id9IGtMGz7OwT1ZThJ6NP2.jpg,en,The Night Clerk,"[80, 18, 53]",The Night Clerk,5.5,Hotel night clerk Bart Bromley is a highly int...,2020-02-19
6,18.601,1269,False,/maib5VlmEqp5xlN8lptnBSftp2o.jpg,565426,False,/9LsJP9OuIBmBUxZpmVKtUUjF0PA.jpg,en,To All the Boys: P.S. I Still Love You,"[35, 10749]",To All the Boys: P.S. I Still Love You,6.9,Lara Jean and Peter have just taken their roma...,2020-02-12
7,18.915,570,False,/vN7JHlHOT9rHNDU27tfYqhABBj5.jpg,465086,False,/46a1JNGmCSfoimlxtuxeDquuQ37.jpg,en,The Grudge,"[27, 9648]",The Grudge,5.7,After a young mother murders her family in her...,2020-01-02
8,19.412,486,False,/mBBBXseq4k4dI63k06XIrsc02j8.jpg,542224,False,/jEPEVO48hKQB0EUNFQOSv6qtKNW.jpg,en,Gretel & Hansel,"[14, 27, 53]",Gretel & Hansel,6.4,A long time ago in a distant fairy tale countr...,2020-01-30
9,18.529,41,False,/iSwTnNS7TKAS79Sz9LvyqlBxxrU.jpg,547017,False,/97UR8xPpUNqpvx5zr7eyf4YSBCE.jpg,en,Shirley,"[18, 53]",Shirley,6.5,A famous horror writer finds inspiration for h...,2020-01-25


##  Importing and Saving the Movies Dataset (Best Practice)

In [42]:
import pandas as pd
import requests
import json
pd.options.display.max_columns = 30

In [43]:
movie_id = [0, 299534, 19995, 140607, 299536, 597, 135397,
            420818, 24428, 168259, 99861, 284054, 12445,
            181808, 330457, 351286, 109445, 321612, 260513]

In [44]:
basic_url = 'https://api.themoviedb.org/3/movie/{}?{}' 

In [45]:
json_list = []
for movie in movie_id:
    url = basic_url.format(movie, api_key)
    r = requests.get(url)
    if r.status_code != 200:
        continue
    else:
        data = r.json()
        json_list.append(data) 
df = pd.DataFrame(json_list)

In [46]:
requests.get(basic_url.format(0, api_key)).status_code

404

In [47]:
df

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,/7RyHsO4yDXtBv1zUU3mTpHeQ0d5.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",356000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...",https://www.marvel.com/movies/avengers-endgame,299534,tt4154796,en,Avengers: Endgame,After the devastating events of Avengers: Infi...,50.881,/or06FN3Dka5tukK1e9sl16pB3iy.jpg,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",2019-04-24,2797800564,181,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Part of the journey is the end.,Avengers: Endgame,False,8.3,13963
1,False,/eS8rJ1KzRNBewx9MduiSHM4kr7S.jpg,"{'id': 87096, 'name': 'Avatar Collection', 'po...",237000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",http://www.avatarmovie.com/,19995,tt0499549,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",32.161,/6EiRUJpuoeQPghrs3YNktfnqOVh.jpg,"[{'id': 444, 'logo_path': '/42UPdZl6B2cFXgNUAS...","[{'iso_3166_1': 'US', 'name': 'United States o...",2009-12-10,2787965087,162,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Enter the World of Pandora.,Avatar,False,7.4,21606
2,False,/k6EOrckWFuz7I4z4wiRwz8zsj4H.jpg,"{'id': 10, 'name': 'Star Wars Collection', 'po...",245000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",http://www.starwars.com/films/star-wars-episod...,140607,tt2488496,en,Star Wars: The Force Awakens,Thirty years after defeating the Galactic Empi...,31.055,/wqnLdwVXoBjKibFRR5U3y0aDUhs.jpg,"[{'id': 1634, 'logo_path': None, 'name': 'True...","[{'iso_3166_1': 'US', 'name': 'United States o...",2015-12-15,2068223624,136,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Every generation has a story.,Star Wars: The Force Awakens,False,7.4,14641
3,False,/lmZFxXgJE3vgrciwuDib0N8CfQo.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",300000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",https://www.marvel.com/movies/avengers-infinit...,299536,tt4154756,en,Avengers: Infinity War,As the Avengers and their allies have continue...,72.819,/7WsyChQLEftFiDOVTGkv3hFpyyt.jpg,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",2018-04-25,2046239637,149,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,An entire universe. Once and for all.,Avengers: Infinity War,False,8.3,18782
4,False,/6VmFqApQRyZZzmiGOQq2C92jyvH.jpg,,200000000,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,597,tt0120338,en,Titanic,101-year-old Rose DeWitt Bukater tells the sto...,28.933,/9xjZS2rlVxm8SFx8kPC3aIGCOYQ.jpg,"[{'id': 4, 'logo_path': '/fycMZt242LVjagMByZOL...","[{'iso_3166_1': 'US', 'name': 'United States o...",1997-11-18,2187463944,194,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Nothing on Earth could come between them.,Titanic,False,7.8,17247
5,False,/xX0IzuFa1Fj06iU2NlOmeMPe7oS.jpg,"{'id': 328, 'name': 'Jurassic Park Collection'...",150000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",http://www.jurassicworld.com/,135397,tt0369610,en,Jurassic World,Twenty-two years after the events of Jurassic ...,25.211,/2c0ajTi8nvrsYl5Oi1lVi6F0kd2.jpg,"[{'id': 56, 'logo_path': '/cEaxANEisCqeEoRvODv...","[{'iso_3166_1': 'US', 'name': 'United States o...",2015-06-06,1671713208,124,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The park is open.,Jurassic World,False,6.6,15742
6,False,/nRXO2SnOA75OsWhNhXstHB8ZmI3.jpg,,260000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 10751...",https://movies.disney.com/the-lion-king-2019,420818,tt6105098,en,The Lion King,"Simba idolizes his father, King Mufasa, and ta...",46.944,/dzBtMocZuJbjLOXvrl4zGYigDzh.jpg,"[{'id': 2, 'logo_path': '/wdrCwmRnLFJhEoH8GSfy...","[{'iso_3166_1': 'US', 'name': 'United States o...",2019-07-12,1656943394,118,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,The King has Returned.,The Lion King,False,7.2,6253
7,False,/kwUQFeFXOOpgloMgZaadhzkbTI4.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",220000000,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",http://marvel.com/avengers_movie/,24428,tt0848228,en,The Avengers,When an unexpected enemy emerges and threatens...,31.521,/RYMX2wcKCBAr24UyPD7xwmjaTn.jpg,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",2012-04-25,1519557910,143,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Some assembly required.,The Avengers,False,7.7,22690
8,False,/ehzI1mVcnHqB58NqPyQwpMqcVoz.jpg,"{'id': 9485, 'name': 'The Fast and the Furious...",190000000,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...",http://www.furious7.com/,168259,tt2820852,en,Furious 7,Deckard Shaw seeks revenge against Dominic Tor...,19.295,/d9jZ2bKZw3ptTuxAyVHA6olPAVs.jpg,"[{'id': 87857, 'logo_path': None, 'name': 'Abu...","[{'iso_3166_1': 'US', 'name': 'United States o...",2015-04-01,1515047671,137,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Vengeance Hits Home,Furious 7,False,7.3,7608
9,False,/8i6ZDk97Vyh0jHohMG4vFeFuKJh.jpg,"{'id': 86311, 'name': 'The Avengers Collection...",250000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",http://marvel.com/movies/movie/193/avengers_ag...,99861,tt2395427,en,Avengers: Age of Ultron,When Tony Stark tries to jumpstart a dormant p...,34.896,/4ssDuvEDkSArWEdyBl2X5EHvYKU.jpg,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",2015-04-22,1405403694,141,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,A New Age Has Come.,Avengers: Age of Ultron,False,7.3,16078


In [48]:
## sorting by revenue
df = df.loc[:, ["title", "id", "revenue", "genres", "belongs_to_collection", "runtime"]].sort_values(by = "revenue",
                                                                                                ascending = False)

In [50]:
df

Unnamed: 0,title,id,revenue,genres,belongs_to_collection,runtime
0,Avengers: Endgame,299534,2797800564,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...","{'id': 86311, 'name': 'The Avengers Collection...",181
1,Avatar,19995,2787965087,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 87096, 'name': 'Avatar Collection', 'po...",162
4,Titanic,597,2187463944,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,194
2,Star Wars: The Force Awakens,140607,2068223624,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 10, 'name': 'Star Wars Collection', 'po...",136
3,Avengers: Infinity War,299536,2046239637,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...","{'id': 86311, 'name': 'The Avengers Collection...",149
5,Jurassic World,135397,1671713208,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 328, 'name': 'Jurassic Park Collection'...",124
6,The Lion King,420818,1656943394,"[{'id': 12, 'name': 'Adventure'}, {'id': 10751...",,118
7,The Avengers,24428,1519557910,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...","{'id': 86311, 'name': 'The Avengers Collection...",143
8,Furious 7,168259,1515047671,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...","{'id': 9485, 'name': 'The Fast and the Furious...",137
13,Frozen II,330457,1450026933,"[{'id': 16, 'name': 'Animation'}, {'id': 10751...","{'id': 386382, 'name': 'Frozen Collection', 'p...",104


In [51]:
df.to_json("movies.json", orient = "records")

In [52]:
with open("movies.json") as f:
    data = json.load(f)

In [53]:
pd.json_normalize(data)

Unnamed: 0,title,id,revenue,genres,runtime,belongs_to_collection.id,belongs_to_collection.name,belongs_to_collection.poster_path,belongs_to_collection.backdrop_path,belongs_to_collection
0,Avengers: Endgame,299534,2797800564,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...",181,86311.0,The Avengers Collection,/tqXiOD5rTyHgabO73Tpw9JDbd88.jpg,/zuW6fOiusv4X9nnW3paHGfXcSll.jpg,
1,Avatar,19995,2787965087,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",162,87096.0,Avatar Collection,/gC3tW9a45RGOzzSh6wv91pFnmFr.jpg,/8nCr9W7sKus2q9PLbYsnT7iCkuT.jpg,
2,Titanic,597,2187463944,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",194,,,,,
3,Star Wars: The Force Awakens,140607,2068223624,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",136,10.0,Star Wars Collection,/r8Ph5MYXL04Qzu4QBbq2KjqwtkQ.jpg,/d8duYyyC9J5T825Hg7grmaabfxQ.jpg,
4,Avengers: Infinity War,299536,2046239637,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",149,86311.0,The Avengers Collection,/tqXiOD5rTyHgabO73Tpw9JDbd88.jpg,/zuW6fOiusv4X9nnW3paHGfXcSll.jpg,
5,Jurassic World,135397,1671713208,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",124,328.0,Jurassic Park Collection,/qIm2nHXLpBBdMxi8dvfrnDkBUDh.jpg,/njFixYzIxX8jsn6KMSEtAzi4avi.jpg,
6,The Lion King,420818,1656943394,"[{'id': 12, 'name': 'Adventure'}, {'id': 10751...",118,,,,,
7,The Avengers,24428,1519557910,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",143,86311.0,The Avengers Collection,/tqXiOD5rTyHgabO73Tpw9JDbd88.jpg,/zuW6fOiusv4X9nnW3paHGfXcSll.jpg,
8,Furious 7,168259,1515047671,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...",137,9485.0,The Fast and the Furious Collection,/zQdytnqfsWKJlqazqfMBL2L7aql.jpg,/z5A5W3WYJc3UVEWljSGwdjDgQ0j.jpg,
9,Frozen II,330457,1450026933,"[{'id': 16, 'name': 'Animation'}, {'id': 10751...",104,386382.0,Frozen Collection,/13Op41T3cALJedrKqYPrlc3cIbO.jpg,/6QonAoIN0jhWZZWZGJswSxHzUnU.jpg,


In [54]:
pd.json_normalize(data, "genres", "title") # flatten or create data frame

Unnamed: 0,id,name,title
0,12,Adventure,Avengers: Endgame
1,878,Science Fiction,Avengers: Endgame
2,28,Action,Avengers: Endgame
3,28,Action,Avatar
4,12,Adventure,Avatar
5,14,Fantasy,Avatar
6,878,Science Fiction,Avatar
7,18,Drama,Titanic
8,10749,Romance,Titanic
9,28,Action,Star Wars: The Force Awakens


# Importing and Saving the Movies Dataset (Real World Scenario)

In [55]:
df

Unnamed: 0,title,id,revenue,genres,belongs_to_collection,runtime
0,Avengers: Endgame,299534,2797800564,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...","{'id': 86311, 'name': 'The Avengers Collection...",181
1,Avatar,19995,2787965087,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 87096, 'name': 'Avatar Collection', 'po...",162
4,Titanic,597,2187463944,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,194
2,Star Wars: The Force Awakens,140607,2068223624,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 10, 'name': 'Star Wars Collection', 'po...",136
3,Avengers: Infinity War,299536,2046239637,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...","{'id': 86311, 'name': 'The Avengers Collection...",149
5,Jurassic World,135397,1671713208,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 328, 'name': 'Jurassic Park Collection'...",124
6,The Lion King,420818,1656943394,"[{'id': 12, 'name': 'Adventure'}, {'id': 10751...",,118
7,The Avengers,24428,1519557910,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...","{'id': 86311, 'name': 'The Avengers Collection...",143
8,Furious 7,168259,1515047671,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...","{'id': 9485, 'name': 'The Fast and the Furious...",137
13,Frozen II,330457,1450026933,"[{'id': 16, 'name': 'Animation'}, {'id': 10751...","{'id': 386382, 'name': 'Frozen Collection', 'p...",104


In [56]:
df.to_csv("movies_raw.csv", index = False)

In [57]:
df = pd.read_csv("movies_raw.csv") # saving as cv is not best practice

In [58]:
df

Unnamed: 0,title,id,revenue,genres,belongs_to_collection,runtime
0,Avengers: Endgame,299534,2797800564,"[{'id': 12, 'name': 'Adventure'}, {'id': 878, ...","{'id': 86311, 'name': 'The Avengers Collection...",181
1,Avatar,19995,2787965087,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 87096, 'name': 'Avatar Collection', 'po...",162
2,Titanic,597,2187463944,"[{'id': 18, 'name': 'Drama'}, {'id': 10749, 'n...",,194
3,Star Wars: The Force Awakens,140607,2068223624,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 10, 'name': 'Star Wars Collection', 'po...",136
4,Avengers: Infinity War,299536,2046239637,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...","{'id': 86311, 'name': 'The Avengers Collection...",149
5,Jurassic World,135397,1671713208,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...","{'id': 328, 'name': 'Jurassic Park Collection'...",124
6,The Lion King,420818,1656943394,"[{'id': 12, 'name': 'Adventure'}, {'id': 10751...",,118
7,The Avengers,24428,1519557910,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...","{'id': 86311, 'name': 'The Avengers Collection...",143
8,Furious 7,168259,1515047671,"[{'id': 28, 'name': 'Action'}, {'id': 53, 'nam...","{'id': 9485, 'name': 'The Fast and the Furious...",137
9,Frozen II,330457,1450026933,"[{'id': 16, 'name': 'Animation'}, {'id': 10751...","{'id': 386382, 'name': 'Frozen Collection', 'p...",104


In [59]:
df.genres[0] # this is not a list but a string - pandas turns everything into a string which causes issues later on

"[{'id': 12, 'name': 'Adventure'}, {'id': 878, 'name': 'Science Fiction'}, {'id': 28, 'name': 'Action'}]"