In [82]:
#Import Dependencies
import pandas as pd
import numpy as np
import requests
import json
import time
import datetime
from config import tmdb_key
from pprint import pprint

In [79]:
#Read and Display CSV file
flix = pd.read_csv('../Rutgers_DS_Project_2/NetflixOriginals.csv')
flix.head()

Unnamed: 0,Title,Genre,Premiere,Runtime,IMDB Score,Language
0,Enter the Anime,Documentary,"August 5, 2019",58,2.5,English/Japanese
1,Dark Forces,Thriller,"August 21, 2020",81,2.6,Spanish
2,The App,Science fiction/Drama,"December 26, 2019",79,2.6,Italian
3,The Open House,Horror thriller,"January 19, 2018",94,3.2,English
4,Kaali Khuhi,Mystery,"October 30, 2020",90,3.4,Hindi


In [3]:
#Verify whether any values are missing in df
flix.isnull().values.any()

False

In [4]:
#Verify value number in df columns
flix.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 584 entries, 0 to 583
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Title       584 non-null    object 
 1   Genre       584 non-null    object 
 2   Premiere    584 non-null    object 
 3   Runtime     584 non-null    int64  
 4   IMDB Score  584 non-null    float64
 5   Language    584 non-null    object 
dtypes: float64(1), int64(1), object(4)
memory usage: 27.5+ KB


## Perform API Calls

In [5]:
#Save config information
url = "https://api.themoviedb.org/3/search/movie?"

In [6]:
#Create Movie ID list
flix_id_list = []
release_date = []
title = []
counter = 0
sets = 1

#Movie list
movies = flix['Title']

#Begin Retrieval
print(f"Beginning Data Retrieval\n"  
f"-----------------------------\n")

#Run loop through movie list 
for movie in movies:
    #Build partial query URL
    query_url = f"{url}api_key={tmdb_key}&query={movie}"
    #Response from API requests
    response = requests.get(query_url).json()
    #Call and creation of list
    try:
        #Add data to list
        title.append(response['results'][0]['title'])
        flix_id_list.append(response['results'][0]['id'])
        release_date.append(response['results'][0]['release_date'])
        #Populate counters
        if counter>50:
            counter=0
            sets+=1
            time.sleep(1)
        else:
            counter+=1
            print(f"Processing Record {counter} of Set {sets} | {movie}")
            time.sleep(1)
    #Should the movie not be found, print exception statement
    except(KeyError, IndexError):
        print(f"Movie not found. Skipping...") 
#Close of loop
print(f"-----------------------------\n"
f"Data Retrieval Complete\n"      
f"-----------------------------\n")

Beginning Data Retrieval
-----------------------------

Processing Record 1 of Set 1 | Enter the Anime
Processing Record 2 of Set 1 | Dark Forces
Processing Record 3 of Set 1 | The App
Processing Record 4 of Set 1 | The Open House
Processing Record 5 of Set 1 | Kaali Khuhi
Processing Record 6 of Set 1 | Drive
Processing Record 7 of Set 1 | Leyla Everlasting
Processing Record 8 of Set 1 | The Last Days of American Crime
Processing Record 9 of Set 1 | Paradox
Processing Record 10 of Set 1 | Sardar Ka Grandson
Processing Record 11 of Set 1 | Searching for Sheela
Processing Record 12 of Set 1 | The Call
Processing Record 13 of Set 1 | Whipped
Processing Record 14 of Set 1 | All Because of You
Processing Record 15 of Set 1 | Mercy
Processing Record 16 of Set 1 | After the Raid
Processing Record 17 of Set 1 | Ghost Stories
Processing Record 18 of Set 1 | The Last Thing He Wanted
Processing Record 19 of Set 1 | What Happened to Mr. Cha?
Processing Record 20 of Set 1 | Death Note
Processing Re

Processing Record 14 of Set 4 | Happy Anniversary
Processing Record 15 of Set 4 | I Am All Girls
Processing Record 16 of Set 4 | Let It Snow
Processing Record 17 of Set 4 | Mascots
Processing Record 18 of Set 4 | Operation Christmas Drop
Processing Record 19 of Set 4 | Rajma Chawal
Processing Record 20 of Set 4 | Rich in Love
Processing Record 21 of Set 4 | Rising High
Processing Record 22 of Set 4 | Rodney King
Processing Record 23 of Set 4 | Sierra Burgess Is a Loser
Processing Record 24 of Set 4 | Small Crimes
Processing Record 25 of Set 4 | Special Correspondents
Processing Record 26 of Set 4 | TAU
Processing Record 27 of Set 4 | The After Party
Processing Record 28 of Set 4 | The Babysitter: Killer Queen
Processing Record 29 of Set 4 | The Claus Family
Processing Record 30 of Set 4 | The Kissing Booth 2
Processing Record 31 of Set 4 | The Perfect Date
Processing Record 32 of Set 4 | What We Wanted
Processing Record 33 of Set 4 | You've Got This
Processing Record 34 of Set 4 | 6 Ba

Processing Record 24 of Set 7 | Oxygen
Processing Record 25 of Set 7 | Set It Up
Processing Record 26 of Set 7 | The Incredible Jessica James
Processing Record 27 of Set 7 | Tigertail
Processing Record 28 of Set 7 | Tramps
Processing Record 29 of Set 7 | What Did Jack Do?
Processing Record 30 of Set 7 | Bad Trip
Processing Record 31 of Set 7 | Bird Box
Processing Record 32 of Set 7 | Bulbbul
Processing Record 33 of Set 7 | Crazy About Her
Processing Record 34 of Set 7 | Elisa & Marcela
Processing Record 35 of Set 7 | I'll Sleep When I'm Dead
Processing Record 36 of Set 7 | I'm Thinking of Ending Things
Processing Record 37 of Set 7 | It Takes a Lunatic
Processing Record 38 of Set 7 | Milestone
Processing Record 39 of Set 7 | Recovery Boys
Processing Record 40 of Set 7 | ReMastered: Who Killed Jam Master Jay?
Processing Record 41 of Set 7 | Shawn Mendes: In Wonder
Processing Record 42 of Set 7 | Space Sweepers
Processing Record 43 of Set 7 | The American Meme
Processing Record 44 of Set

Processing Record 23 of Set 10 | El Camino: A Breaking Bad Movie
Processing Record 24 of Set 10 | Extremis
Processing Record 25 of Set 10 | Father Soldier Son
Processing Record 26 of Set 10 | Get Me Roger Stone
Processing Record 27 of Set 10 | I'm No Longer Here
Processing Record 28 of Set 10 | Mucho Mucho Amor: The Legend of Walter Mercado 
Processing Record 29 of Set 10 | Octonauts & the Great Barrier Reef
Processing Record 30 of Set 10 | Okja
Processing Record 31 of Set 10 | On My Skin
Processing Record 32 of Set 10 | Raat Akeli Hai
Processing Record 33 of Set 10 | ReMastered: Massacre at the Stadium
Processing Record 34 of Set 10 | ReMastered: The Two Killings of Sam Cooke
Processing Record 35 of Set 10 | Secrets of the Saqqara Tomb
Processing Record 36 of Set 10 | Sitara: Let Girls Dream
Processing Record 37 of Set 10 | Sky Ladder: The Art of Cai Guo-Qiang
Processing Record 38 of Set 10 | Team Foxcatcher
Processing Record 39 of Set 10 | The Ballad of Buster Scruggs
Processing Reco

In [27]:
#Converting Raw data to df
pulled_flix_df = pd.DataFrame({
    "Pulled_ID": flix_id_list,
    "Pulled_Movie_Name": title,
    "Pulled_Release_Date": release_date
})

#Show df
pulled_flix_df

#Add empty fields to df
pulled_flix_df['Budget'] = ''
pulled_flix_df['Revenue'] = ''

#Show df
pulled_flix_df

Unnamed: 0,Pulled_ID,Pulled_Movie_Name,Pulled_Release_Date,Budget,Revenue
0,616904,Enter the Anime,2019-08-05,,
1,544087,Dark Forces: Shadow People,2018-05-18,,
2,653522,The App,2019-12-26,,
3,485774,The Open House,2018-01-19,,
4,744876,Kaali Khuhi,2020-10-30,,
...,...,...,...,...,...
567,568332,Taylor Swift: Reputation Stadium Tour,2018-12-31,,
568,355020,Winter on Fire: Ukraine's Fight for Freedom,2015-10-09,,
569,563708,Springsteen On Broadway,2018-12-16,,
570,765613,Emicida: AmarElo - It's All for Yesterday,2020-12-08,,


In [28]:
#Save to CSV
pulled_flix_df.to_csv('PulledTMDBData.csv')

In [54]:
url = f"https://api.themoviedb.org/3/movie/"

#Created list
Budget = []
Revenue = []
drop_index = []

#Start for loop to go through each row and retrieve movie financial data
for index, row in pulled_flix_df.iterrows():
    movie = row['Pulled_Movie_Name']
    movie_id = row['Pulled_ID']
    #Build partial query URL
    query_url = f"{url}{movie_id}?api_key={tmdb_key}"
    #Response from API requests
    response = requests.get(query_url).json()
    try:
        print(f"The budget and revenue for {movie} is {response['budget']} and {response['revenue']}, respectively.")
        if response['original_title'] != '':
            pulled_flix_df.loc[index,'Budget'] = response['budget']
            pulled_flix_df.loc[index,'Revenue'] = response['revenue']
    except (KeyError, IndexError):
        drop_index.append(index)
        print('Missing field/result...skipping.')
    print('-'*10)
    time.sleep(1)
print(f'-----End of Search-----')

The budget and revenue for Enter the Anime is 0 and 0, respectively.
----------
The budget and revenue for Dark Forces: Shadow People is 1000000 and 0, respectively.
----------
The budget and revenue for The App is 0 and 0, respectively.
----------
The budget and revenue for The Open House is 0 and 0, respectively.
----------
The budget and revenue for Kaali Khuhi is 0 and 0, respectively.
----------
The budget and revenue for Drive is 0 and 0, respectively.
----------
The budget and revenue for Leyla Everlasting is 0 and 0, respectively.
----------
The budget and revenue for The Last Days of American Crime is 0 and 0, respectively.
----------
The budget and revenue for Justice League: The Flashpoint Paradox is 3500000 and 0, respectively.
----------
The budget and revenue for Sardar Ka Grandson is 0 and 0, respectively.
----------
The budget and revenue for Searching for Sheela is 0 and 0, respectively.
----------
The budget and revenue for The Call is 13000000 and 68572378, respectiv

The budget and revenue for Game Over, Man! is 0 and 0, respectively.
----------
The budget and revenue for Guilty is 0 and 0, respectively.
----------
The budget and revenue for In the Tall Grass is 0 and 0, respectively.
----------
The budget and revenue for Madame Claude is 0 and 0, respectively.
----------
The budget and revenue for Naked is 0 and 0, respectively.
----------
The budget and revenue for Outside the Wire is 0 and 0, respectively.
----------
The budget and revenue for The Princess Switch: Switched Again is 10000000 and 0, respectively.
----------
The budget and revenue for Under the Riccione Sun is 3000000 and 0, respectively.
----------
The budget and revenue for A Very Murray Christmas is 0 and 0, respectively.
----------
The budget and revenue for Been So Long is 0 and 0, respectively.
----------
The budget and revenue for Dead Kids is 0 and 0, respectively.
----------
The budget and revenue for Get the Grift is 0 and 0, respectively.
----------
The budget and revenu

The budget and revenue for Maska is 0 and 0, respectively.
----------
The budget and revenue for The Decline of Western Civilization is 100000 and 0, respectively.
----------
The budget and revenue for The Minimalists: Less Is Now is 84919 and 0, respectively.
----------
The budget and revenue for The Polka King is 0 and 0, respectively.
----------
The budget and revenue for F*&% the Prom is 0 and 0, respectively.
----------
The budget and revenue for True Memoirs of an International Assassin is 40000000 and 0, respectively.
----------
The budget and revenue for Ultras is 0 and 0, respectively.
----------
The budget and revenue for Come Sunday is 0 and 0, respectively.
----------
The budget and revenue for Forgive Us Our Debts is 0 and 0, respectively.
----------
The budget and revenue for iBoy is 1500000 and 0, respectively.
----------
The budget and revenue for Lovefucked is 0 and 0, respectively.
----------
The budget and revenue for Juanita is 0 and 0, respectively.
----------
The 

The budget and revenue for Layla Majnun is 0 and 0, respectively.
----------
The budget and revenue for Murder to Mercy: The Cyntoia Brown Story is 0 and 0, respectively.
----------
The budget and revenue for My Own Man is 0 and 0, respectively.
----------
The budget and revenue for Nappily Ever After is 0 and 0, respectively.
----------
The budget and revenue for Over the Moon is 0 and 0, respectively.
----------
The budget and revenue for Street Flow is 0 and 0, respectively.
----------
The budget and revenue for Strong Island is 0 and 0, respectively.
----------
The budget and revenue for Sturgill Simpson Presents Sound & Fury is 0 and 0, respectively.
----------
The budget and revenue for Take Your Pills is 0 and 0, respectively.
----------
The budget and revenue for The Heartbreak Club is 0 and 0, respectively.
----------
The budget and revenue for The Mars Generation is 0 and 0, respectively.
----------
The budget and revenue for The Current Occupant is 0 and 0, respectively.
---

The budget and revenue for Heroin(e) is 0 and 0, respectively.
----------
The budget and revenue for Mercury 13 is 0 and 0, respectively.
----------
The budget and revenue for Saving Capitalism is 0 and 0, respectively.
----------
The budget and revenue for Serious Men is 0 and 0, respectively.
----------
The budget and revenue for The Boys in the Band is 0 and 0, respectively.
----------
The budget and revenue for The Boys in the Band: Something Personal is 0 and 0, respectively.
----------
The budget and revenue for The Life Ahead is 0 and 0, respectively.
----------
The budget and revenue for The Other Side of the Wind is 12000000 and 0, respectively.
----------
The budget and revenue for The Trader is 0 and 0, respectively.
----------
The budget and revenue for To the Bone is 0 and 0, respectively.
----------
The budget and revenue for Tony Parker: The Final Shot is 0 and 0, respectively.
----------
The budget and revenue for AK vs AK is 0 and 0, respectively.
----------
The budget

The budget and revenue for Zion is 0 and 0, respectively.
----------
The budget and revenue for Dolemite Is My Name is 0 and 0, respectively.
----------
The budget and revenue for El Camino: A Breaking Bad Movie is 0 and 0, respectively.
----------
The budget and revenue for Extremis is 0 and 0, respectively.
----------
The budget and revenue for Father Soldier Son is 0 and 0, respectively.
----------
The budget and revenue for Get Me Roger Stone is 0 and 0, respectively.
----------
The budget and revenue for I'm No Longer Here is 849080 and 0, respectively.
----------
The budget and revenue for Mucho Mucho Amor: The Legend of Walter Mercado is 0 and 0, respectively.
----------
The budget and revenue for Octonauts and the Caves of Sac Actun is 0 and 0, respectively.
----------
The budget and revenue for Okja is 50000000 and 0, respectively.
----------
The budget and revenue for On My Skin is 0 and 0, respectively.
----------
The budget and revenue for Raat Akeli Hai is 0 and 0, respect

In [56]:
#Create new "Profit" column and display df
clean_pulled = pulled_flix_df
clean_pulled['Profit'] = clean_pulled['Revenue']-clean_pulled['Budget']
clean_pulled

Unnamed: 0,Pulled_ID,Pulled_Movie_Name,Pulled_Release_Date,Budget,Revenue,Profit
0,616904,Enter the Anime,2019-08-05,0,0,0
1,544087,Dark Forces: Shadow People,2018-05-18,1000000,0,-1000000
2,653522,The App,2019-12-26,0,0,0
3,485774,The Open House,2018-01-19,0,0,0
4,744876,Kaali Khuhi,2020-10-30,0,0,0
...,...,...,...,...,...,...
567,568332,Taylor Swift: Reputation Stadium Tour,2018-12-31,0,0,0
568,355020,Winter on Fire: Ukraine's Fight for Freedom,2015-10-09,0,0,0
569,563708,Springsteen On Broadway,2018-12-16,0,0,0
570,765613,Emicida: AmarElo - It's All for Yesterday,2020-12-08,0,0,0


In [57]:
#Update Pulled Data CSV
clean_pulled.to_csv('PulledTMDBData.csv')

In [71]:
clean_pulled['Preformer'] = ''
clean_pulled['Gender'] = ''
clean_pulled.head()

Unnamed: 0,Pulled_ID,Pulled_Movie_Name,Pulled_Release_Date,Budget,Revenue,Profit,Performer,Gender,Preformer
0,616904,Enter the Anime,2019-08-05,0,0,0,Kouzou Morishita,,
1,544087,Dark Forces: Shadow People,2018-05-18,1000000,0,-1000000,,,
2,653522,The App,2019-12-26,0,0,0,Vincenzo Crea,,
3,485774,The Open House,2018-01-19,0,0,0,Dylan Minnette,,
4,744876,Kaali Khuhi,2020-10-30,0,0,0,Shabana Azmi,,


In [72]:
url = f"https://api.themoviedb.org/3/movie/"

#Created list
gender = []
preformer = []
drop_index = []

#Start for loop to go through each row and retrieve preformer data
for index, row in pulled_flix_df.iterrows():
    movie_id = row['Pulled_ID']
    movie = row['Pulled_Movie_Name']
    #Build partial query URL
    query_url = f"{url}{movie_id}/credits?api_key={tmdb_key}"
    #Response from API requests
    response = requests.get(query_url).json()
    try:
        print(f"The preformer in {movie}, is {response['cast'][0]['name']}. Their gender is characterized as, {response['cast'][0]['gender']}.")
        if response['cast'][0]['name'] != '':
            clean_pulled.loc[index,'Preformer'] = response['cast'][0]['name']
            clean_pulled.loc[index,'Gender'] = response['cast'][0]['gender']
    except (KeyError, IndexError):
        drop_index.append(index)
        print('Missing field/result...skipping.')
    print('-'*10)
    time.sleep(1)
print(f'-----End of Search-----')

The preformer in Enter the Anime, is Kouzou Morishita. Their gender is characterized as, 2.
----------
Missing field/result...skipping.
----------
The preformer in The App, is Vincenzo Crea. Their gender is characterized as, 2.
----------
The preformer in The Open House, is Dylan Minnette. Their gender is characterized as, 2.
----------
The preformer in Kaali Khuhi, is Shabana Azmi. Their gender is characterized as, 1.
----------
The preformer in Drive, is Sushant Singh Rajput. Their gender is characterized as, 2.
----------
The preformer in Leyla Everlasting, is Demet Akbağ. Their gender is characterized as, 1.
----------
The preformer in The Last Days of American Crime, is Edgar Ramírez. Their gender is characterized as, 2.
----------
The preformer in Justice League: The Flashpoint Paradox, is Justin Chambers. Their gender is characterized as, 2.
----------
The preformer in Sardar Ka Grandson, is Arjun Kapoor. Their gender is characterized as, 2.
----------
The preformer in Searching

The preformer in A Christmas Prince: The Royal Wedding, is Rose McIver. Their gender is characterized as, 1.
----------
The preformer in Back to School, is Rodney Dangerfield. Their gender is characterized as, 2.
----------
The preformer in Dangerous Lies, is Camila Mendes. Their gender is characterized as, 1.
----------
The preformer in Gunjan Saxena: The Kargil Girl, is Pankaj Tripathi. Their gender is characterized as, 2.
----------
The preformer in Dangerous Intuition, is Genea Charpentier. Their gender is characterized as, 1.
----------
The preformer in The Most Assassinated Woman in the World, is Anna Mouglalis. Their gender is characterized as, 1.
----------
The preformer in Things Heard & Seen, is Amanda Seyfried. Their gender is characterized as, 1.
----------
The preformer in To Each Her Own, is Kelly Ames. Their gender is characterized as, 0.
----------
The preformer in Who Would You Take to a Deserted Island?, is Pol Monen. Their gender is characterized as, 2.
----------
Th

The preformer in All Day and a Night, is Ashton Sanders. Their gender is characterized as, 2.
----------
The preformer in American Son, is Kerry Washington. Their gender is characterized as, 1.
----------
The preformer in Barry Lyndon, is Ryan O'Neal. Their gender is characterized as, 2.
----------
The preformer in Candy Jar, is Sami Gayle. Their gender is characterized as, 1.
----------
The preformer in Choked: Paisa Bolta Hai, is Saiyami Kher. Their gender is characterized as, 1.
----------
The preformer in Class of '83, is Bobby Deol. Their gender is characterized as, 2.
----------
The preformer in Extinction, is Michael Peña. Their gender is characterized as, 2.
----------
The preformer in Happy Anniversary, is Noël Wells. Their gender is characterized as, 1.
----------
The preformer in I Am All Girls, is Erica Wessels. Their gender is characterized as, 1.
----------
The preformer in Let It Snow, is Joan Cusack. Their gender is characterized as, 1.
----------
The preformer in Masco

The preformer in 100 Degrees Below Zero, is John Rhys-Davies. Their gender is characterized as, 2.
----------
The preformer in Citation, is Jimmy Jean-Louis. Their gender is characterized as, 2.
----------
The preformer in Crazy Awesome Teachers, is Gading Marten. Their gender is characterized as, 0.
----------
Missing field/result...skipping.
----------
The preformer in High Flying Bird, is André Holland. Their gender is characterized as, 2.
----------
The preformer in In the Shadow of the Moon, is Boyd Holbrook. Their gender is characterized as, 2.
----------
Missing field/result...skipping.
----------
The preformer in Octonauts and the Caves of Sac Actun, is Simon Greenall. Their gender is characterized as, 2.
----------
The preformer in Offering to the Storm, is Marta Etura. Their gender is characterized as, 1.
----------
The preformer in Roxanne, Roxanne, is Chanté Adams. Their gender is characterized as, 1.
----------
The preformer in Someone Great, is Gina Rodriguez. Their gende

The preformer in Jingle Jangle: A Christmas Journey, is Forest Whitaker. Their gender is characterized as, 2.
----------
The preformer in Life Overtakes Me, is Henry Ascher. Their gender is characterized as, 0.
----------
The preformer in Lust Stories, is Radhika Apte. Their gender is characterized as, 1.
----------
The preformer in Monster Hunter, is Milla Jovovich. Their gender is characterized as, 1.
----------
The preformer in Mowgli: Legend of the Jungle, is Rohan Chand. Their gender is characterized as, 2.
----------
The preformer in Nobody Knows I'm Here, is Jorge Garcia. Their gender is characterized as, 2.
----------
The preformer in Nobody Speak: Trials of the Free Press, is Hulk Hogan. Their gender is characterized as, 2.
----------
The preformer in Oxygen, is Maura Tierney. Their gender is characterized as, 1.
----------
The preformer in Set It Up, is Zoey Deutch. Their gender is characterized as, 1.
----------
The preformer in The Incredible Jessica James, is Jessica Willi

The preformer in I Don't Feel at Home in This World Anymore, is Melanie Lynskey. Their gender is characterized as, 1.
----------
The preformer in Laerte-se, is Laerte Coutinho. Their gender is characterized as, 1.
----------
The preformer in Mank, is Gary Oldman. Their gender is characterized as, 2.
----------
The preformer in Our Souls at Night, is Robert Redford. Their gender is characterized as, 2.
----------
The preformer in Outlaw King, is Chris Pine. Their gender is characterized as, 2.
----------
The preformer in Pagglait, is Sanya Malhotra. Their gender is characterized as, 1.
----------
The preformer in ReMastered: Who Shot the Sheriff, is Jimmy Cliff. Their gender is characterized as, 2.
----------
The preformer in Seeing Allred, is Gloria Allred. Their gender is characterized as, 1.
----------
The preformer in Spelling the Dream, is Srinivas Ayyagari. Their gender is characterized as, 0.
----------
The preformer in The Claudia Kishi Club, is Naia Cucukov. Their gender is cha

The preformer in Zion, is Zion Clark. Their gender is characterized as, 0.
----------
The preformer in Dolemite Is My Name, is Eddie Murphy. Their gender is characterized as, 2.
----------
The preformer in El Camino: A Breaking Bad Movie, is Aaron Paul. Their gender is characterized as, 2.
----------
The preformer in Extremis, is Monica Bhargava. Their gender is characterized as, 0.
----------
The preformer in Father Soldier Son, is Brian Eisch. Their gender is characterized as, 0.
----------
The preformer in Get Me Roger Stone, is Roger Stone. Their gender is characterized as, 2.
----------
The preformer in I'm No Longer Here, is Juan Daniel Garcia Treviño. Their gender is characterized as, 2.
----------
The preformer in Mucho Mucho Amor: The Legend of Walter Mercado, is Walter Mercado. Their gender is characterized as, 2.
----------
The preformer in Octonauts and the Caves of Sac Actun, is Simon Greenall. Their gender is characterized as, 2.
----------
The preformer in Okja, is Ahn S

The preformer in Winter on Fire: Ukraine's Fight for Freedom, is Cissy Jones. Their gender is characterized as, 0.
----------
The preformer in Springsteen On Broadway, is Bruce Springsteen. Their gender is characterized as, 2.
----------
The preformer in Emicida: AmarElo - It's All for Yesterday, is Emicida. Their gender is characterized as, 2.
----------
The preformer in David Attenborough: A Life on Our Planet, is David Attenborough. Their gender is characterized as, 2.
----------
-----End of Search-----


In [77]:
#Delete the 'performer' field show df
#Gender: 2 - Male, 1 - Female, 0 - Unknown
clean_pulled.head()

Unnamed: 0,Pulled_ID,Pulled_Movie_Name,Pulled_Release_Date,Budget,Revenue,Profit,Gender,Preformer
0,616904,Enter the Anime,2019-08-05,0,0,0,2.0,Kouzou Morishita
1,544087,Dark Forces: Shadow People,2018-05-18,1000000,0,-1000000,,
2,653522,The App,2019-12-26,0,0,0,2.0,Vincenzo Crea
3,485774,The Open House,2018-01-19,0,0,0,2.0,Dylan Minnette
4,744876,Kaali Khuhi,2020-10-30,0,0,0,1.0,Shabana Azmi


In [78]:
#Update Pulled Data CSV
clean_pulled.to_csv('PulledTMDBData.csv')

In [96]:
#Convert 'Premiere' column into datetime format and display updated df
full_dates = flix['Premiere']
converted = pd.to_datetime(full_dates)
flix['Premiere'] = converted
flix.head()

Unnamed: 0,Title,Genre,Premiere,Runtime,IMDB Score,Language
0,Enter the Anime,Documentary,2019-08-05,58,2.5,English/Japanese
1,Dark Forces,Thriller,2020-08-21,81,2.6,Spanish
2,The App,Science fiction/Drama,2019-12-26,79,2.6,Italian
3,The Open House,Horror thriller,2018-01-19,94,3.2,English
4,Kaali Khuhi,Mystery,2020-10-30,90,3.4,Hindi


In [117]:
new_clean_pulled = clean_pulled

# Start here guys!!!!

In [122]:
#Identify field dtypes
new_clean_pulled.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 572 entries, 0 to 571
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Pulled_ID            572 non-null    int64 
 1   Pulled_Movie_Name    572 non-null    object
 2   Pulled_Release_Date  572 non-null    object
 3   Budget               572 non-null    object
 4   Revenue              572 non-null    object
 5   Profit               572 non-null    object
 6   Gender               572 non-null    object
 7   Preformer            572 non-null    object
dtypes: int64(1), object(7)
memory usage: 35.9+ KB


In [135]:
#Rename 'Pulled_Movie_Name' column in clean_pulled df
ncp_df = new_clean_pulled.rename(columns={"Pulled_Movie_Name": "Title"})

In [137]:
#Display ncp_df
ncp_df.head()

Unnamed: 0,Pulled_ID,Title,Pulled_Release_Date,Budget,Revenue,Profit,Gender,Preformer
0,616904,Enter the Anime,2019-08-05,0,0,0,2.0,Kouzou Morishita
1,544087,Dark Forces: Shadow People,2018-05-18,1000000,0,-1000000,,
2,653522,The App,2019-12-26,0,0,0,2.0,Vincenzo Crea
3,485774,The Open House,2018-01-19,0,0,0,2.0,Dylan Minnette
4,744876,Kaali Khuhi,2020-10-30,0,0,0,1.0,Shabana Azmi


In [155]:
#Merge 'flix' df and 'clean_pulled' df on movie titles, in order to verify that "movie name" 
#and "release dates" match and display
merged_df = flix.merge(ncp_df, how='inner', on='Title')
merged_df

Unnamed: 0,Title,Genre,Premiere,Runtime,IMDB Score,Language,Pulled_ID,Pulled_Release_Date,Budget,Revenue,Profit,Gender,Preformer
0,Enter the Anime,Documentary,2019-08-05,58,2.5,English/Japanese,616904,2019-08-05,0,0,0,2,Kouzou Morishita
1,The App,Science fiction/Drama,2019-12-26,79,2.6,Italian,653522,2019-12-26,0,0,0,2,Vincenzo Crea
2,The Open House,Horror thriller,2018-01-19,94,3.2,English,485774,2018-01-19,0,0,0,2,Dylan Minnette
3,Kaali Khuhi,Mystery,2020-10-30,90,3.4,Hindi,744876,2020-10-30,0,0,0,1,Shabana Azmi
4,Drive,Action,2019-11-01,147,3.5,Hindi,466550,2019-11-01,0,0,0,2,Sushant Singh Rajput
...,...,...,...,...,...,...,...,...,...,...,...,...,...
494,Dancing with the Birds,Documentary,2019-10-23,51,8.3,English,634541,2019-10-23,0,0,0,2,Stephen Fry
495,Ben Platt: Live from Radio City Music Hall,Concert Film,2020-05-20,85,8.4,English,699210,2020-05-20,0,0,0,2,Ben Platt
496,Taylor Swift: Reputation Stadium Tour,Concert Film,2018-12-31,125,8.4,English,568332,2018-12-31,0,0,0,1,Taylor Swift
497,Winter on Fire: Ukraine's Fight for Freedom,Documentary,2015-10-09,91,8.4,English/Ukranian/Russian,355020,2015-10-09,0,0,0,0,Cissy Jones


In [153]:
#Identify field dtypes
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 499 entries, 0 to 498
Data columns (total 13 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Title                499 non-null    object        
 1   Genre                499 non-null    object        
 2   Premiere             499 non-null    datetime64[ns]
 3   Runtime              499 non-null    int64         
 4   IMDB Score           499 non-null    float64       
 5   Language             499 non-null    object        
 6   Pulled_ID            499 non-null    int64         
 7   Pulled_Release_Date  499 non-null    object        
 8   Budget               499 non-null    object        
 9   Revenue              499 non-null    object        
 10  Profit               499 non-null    object        
 11  Gender               499 non-null    object        
 12  Preformer            499 non-null    object        
dtypes: datetime64[ns](1), float64(1), i

In [156]:
#df of validated API pulls
valid_df = merged_df[merged_df['Premiere'] == merged_df['Pulled_Release_Date']]
valid_df

Unnamed: 0,Title,Genre,Premiere,Runtime,IMDB Score,Language,Pulled_ID,Pulled_Release_Date,Budget,Revenue,Profit,Gender,Preformer
0,Enter the Anime,Documentary,2019-08-05,58,2.5,English/Japanese,616904,2019-08-05,0,0,0,2,Kouzou Morishita
1,The App,Science fiction/Drama,2019-12-26,79,2.6,Italian,653522,2019-12-26,0,0,0,2,Vincenzo Crea
2,The Open House,Horror thriller,2018-01-19,94,3.2,English,485774,2018-01-19,0,0,0,2,Dylan Minnette
3,Kaali Khuhi,Mystery,2020-10-30,90,3.4,Hindi,744876,2020-10-30,0,0,0,1,Shabana Azmi
4,Drive,Action,2019-11-01,147,3.5,Hindi,466550,2019-11-01,0,0,0,2,Sushant Singh Rajput
...,...,...,...,...,...,...,...,...,...,...,...,...,...
493,The Three Deaths of Marisela Escobedo,Documentary,2020-10-14,109,8.2,Spanish,753230,2020-10-14,0,0,0,0,Juan Manuel Fraire Escobedo
494,Dancing with the Birds,Documentary,2019-10-23,51,8.3,English,634541,2019-10-23,0,0,0,2,Stephen Fry
495,Ben Platt: Live from Radio City Music Hall,Concert Film,2020-05-20,85,8.4,English,699210,2020-05-20,0,0,0,2,Ben Platt
496,Taylor Swift: Reputation Stadium Tour,Concert Film,2018-12-31,125,8.4,English,568332,2018-12-31,0,0,0,1,Taylor Swift


In [157]:
#df of inconsitent API pulls
inconsist_df = merged_df[merged_df['Premiere'] != merged_df['Pulled_Release_Date']]
inconsist_df

Unnamed: 0,Title,Genre,Premiere,Runtime,IMDB Score,Language,Pulled_ID,Pulled_Release_Date,Budget,Revenue,Profit,Gender,Preformer
9,The Call,Drama,2020-11-27,112,4.1,Korean,158011,2013-03-14,13000000,68572378,55572378,1,Halle Berry
11,Mercy,Thriller,2016-11-22,90,4.2,English,180305,2014-10-07,0,0,0,1,Frances O'Connor
13,Ghost Stories,Horror anthology,2020-01-01,144,4.3,Hindi,429417,2018-01-20,0,135095,135095,2,Andy Nyman
14,The Last Thing He Wanted,Political thriller,2020-02-21,115,4.3,English,505225,2020-02-14,0,0,0,1,Anne Hathaway
18,The Girl on the Train,Thriller,2021-02-26,120,4.4,Hindi,346685,2016-10-05,45000000,173185859,128185859,1,Emily Blunt
...,...,...,...,...,...,...,...,...,...,...,...,...,...
487,Marriage Story,Drama,2019-12-06,136,7.9,English,492188,2019-09-28,18000000,2300000,-15700000,2,Adam Driver
488,The Ivory Game,Documentary,2016-11-04,112,7.9,English,411023,2016-09-02,0,0,0,2,Ofir Drori
489,My Octopus Teacher,Documentary,2020-09-07,85,8.1,English,682110,2020-09-04,0,0,0,2,Craig Foster
491,Klaus,Animation/Christmas/Comedy/Adventure,2019-11-15,97,8.2,English,508965,2019-11-08,0,0,0,2,Jason Schwartzman
