# The Movie Database: Data Extraction & Cleaning

In [1]:
#  %pip install cpi

In [2]:
# Import dependencies
import pandas as pd
import json
import requests
from pprint import pprint
import numpy as np
import cpi

from datetime import datetime

# Import config
from config import api_key, db_user, db_password, db_host, db_port, db_name

In [3]:
cpi.update()

### Testing:

In [3]:
# Single Test: Discover Most Popular Movies
page_number = 1

# Endpoint for finding most popular movies
discover_movies = "https://api.themoviedb.org/3/discover/movie"
most_popular_url = f"{discover_movies}?api_key={api_key}&page={page_number}&sort_by=popularity.desc"

# Most popular movies
tmdb_response = requests.get(most_popular_url).json()
results = tmdb_response["results"]

json_string = json.dumps(results)
df = pd.read_json(json_string)
df

Unnamed: 0,adult,backdrop_path,genre_ids,id,original_language,original_title,overview,popularity,poster_path,release_date,title,video,vote_average,vote_count
0,False,/70nxSw3mFBsGmtkvcs91PbjerwD.jpg,"[878, 28, 12]",580489,en,Venom: Let There Be Carnage,After finding a host body in investigative rep...,8633.976,/rjkmN1dniUHVYAtwuV3Tji7FsDO.jpg,2021-09-30,Venom: Let There Be Carnage,False,7.2,4398
1,False,/7ajHGIAYNMiIzejy1LJWdPrcAx8.jpg,"[28, 35, 80, 53]",512195,en,Red Notice,An Interpol-issued Red Notice is a global aler...,4825.133,/lAXONuqg41NwUMuzMiFvicDET9Y.jpg,2021-11-04,Red Notice,False,6.8,1970
2,False,/zBkHCpLmHjW2uVURs5uZkaVmgKR.jpg,"[16, 35, 10751]",585245,en,Clifford the Big Red Dog,As Emily struggles to fit in at home and at sc...,3280.16,/ygPTrycbMSFDc5zUpy4K5ZZtQSC.jpg,2021-11-10,Clifford the Big Red Dog,False,7.5,530
3,False,/mFbS5TwN95BcSEfiztdchLgTQ0v.jpg,"[28, 18, 36]",617653,en,The Last Duel,King Charles VI declares that Knight Jean de C...,3256.364,/zjrJE0fpzPvX8saJXj8VNfcjBoU.jpg,2021-10-13,The Last Duel,False,7.6,870
4,False,/cinER0ESG0eJ49kXlExM0MEWGxW.jpg,"[28, 12, 14]",566525,en,Shang-Chi and the Legend of the Ten Rings,Shang-Chi must confront the past he thought he...,3189.442,/1BIoJGKbXjdFDAqUEiA2VHqkK1Z.jpg,2021-09-01,Shang-Chi and the Legend of the Ten Rings,False,7.8,4201
5,False,/5RuR7GhOI5fElADXZb0X2sr9w5n.jpg,"[16, 35, 10751, 14, 10402]",568124,en,Encanto,"The tale of an extraordinary family, the Madri...",2989.673,/4j0PNHkMr5ax3IA8tjtxcmPU3QT.jpg,2021-11-24,Encanto,False,7.4,307
6,False,/VlHt27nCqOuTnuX6bku8QZapzO.jpg,"[28, 12, 878]",634649,en,Spider-Man: No Way Home,Peter Parker is unmasked and no longer able to...,2774.306,/1g0dhYtq4irTY1GPXvft6k4YLjm.jpg,2021-12-15,Spider-Man: No Way Home,False,8.4,42
7,False,/iUeeZ5PWfZGgUtCJfwcgmCfdzoI.jpg,"[16, 878, 10751, 35]",482321,en,Ron's Gone Wrong,"In a world where walking, talking, digitally c...",2662.308,/gA9QxSravC2EVEkEKgyEmDrfL0e.jpg,2021-10-15,Ron's Gone Wrong,False,8.5,320
8,False,/xGrTm3J0FTafmuQ85vF7ZCw94x6.jpg,"[18, 36, 12]",589761,ru,Чернобыль,The aftermath of a shocking explosion at the C...,2438.508,/kfQJQWFEoWRVBH8FUKnT0HX1yRS.jpg,2021-04-15,Chernobyl: Abyss,False,6.3,235
9,False,/r2GAjd4rNOHJh6i6Y0FntmYuPQW.jpg,"[12, 28, 53]",370172,en,No Time to Die,Bond has left active service and is enjoying a...,1999.424,/iUgygt3fscRoKWCV1d0C7FbM9TP.jpg,2021-09-29,No Time to Die,False,7.6,2509


In [4]:
#Single Test: Crew
movie_id = 672582

# Endpoint & response
movie_url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}"
movie_response = requests.get(movie_url).json()

pprint(movie_response)

{'adult': False,
 'backdrop_path': '/yL9RRZbDVbptqLwiZcK304ck4PL.jpg',
 'belongs_to_collection': None,
 'budget': 0,
 'genres': [{'id': 27, 'name': 'Horror'}],
 'homepage': '',
 'id': 672582,
 'imdb_id': 'tt11686490',
 'original_language': 'en',
 'original_title': 'The Deep House',
 'overview': 'While diving in a remote French lake, a couple of YouTubers who '
             'specialize in underwater exploration videos discover a house '
             'submerged in the deep waters. What was initially a unique '
             'finding soon turns into a nightmare when they discover that the '
             'house was the scene of atrocious crimes. Trapped, with their '
             'oxygen reserves falling dangerously, they realize the worst is '
             'yet to come: they are not alone in the house.',
 'popularity': 449.82,
 'poster_path': '/5xhAPxRr64oQPEFnUOrttuI4ZEU.jpg',
 'production_companies': [{'id': 12689,
                           'logo_path': None,
                           

# Functions: Define API calls to extract key data points

In [5]:
# Start Timer Function (check on API call performance)
def start_timer():
    start = datetime.now()
    
    return start

In [6]:
# Stop Timer Function (check on API call performance)
def stop_timer(start):
    end = datetime.now()
    elapsed_time = (end - start)

    print(f"Total Time Elapsed:  {elapsed_time.total_seconds()} seconds")

### API CALLS:

In [7]:
# Returns most popular movies
def get_most_popular_movies(api_key):
    
    movies = []
    
    # Loop through pages to get results for movies
    for x in range(1, 101):
        page_number = x

        # Endpoint for finding most popular movies
        discover_movies = "https://api.themoviedb.org/3/discover/movie"
        most_popular_url = f"{discover_movies}?api_key={api_key}&page={page_number}&sort_by=popularity.desc"
        
        # Most popular movies
        tmdb_response = requests.get(most_popular_url).json()
        results = tmdb_response["results"]

#         for y in range(len(results)):
#             movies.append(results[y]) 
        for result in results:
            movies.append(result)
            
    return movies

In [8]:
# Returns movie details as a list
def get_movie_details(api_key, movie_ids):  
    
    movie_details = []
    
    for movie_id in movie_ids:
        # Endpoint & response
        movie_url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}"
        movie_response = requests.get(movie_url).json()

        # Append results to lists
        movie_details.append(movie_response)
        
    return movie_details

In [9]:
# Returns keywords as a list
def get_movie_keywords(api_key, movie_ids):
    keyword_details = []
    
    for movie_id in movie_ids:
        # Get keywords for each movie
        keywords_url = f"https://api.themoviedb.org/3/movie/{movie_id}/keywords?api_key={api_key}"
        keywords_response = requests.get(keywords_url).json()
        keywords_results = keywords_response["keywords"]
        
         # Append results to lists
        keyword_details.append(keywords_response)
        
    return keyword_details

In [10]:
# Returns credits as a list
def get_credits(api_key, movie_ids):
    credit_details = []
    
    for movie_id in movie_ids:
        # Generate movie credits
        movie_credits = f"/movie/{movie_id}/credits"
        credits_url = f"https://api.themoviedb.org/3/{movie_credits}?api_key={api_key}"

        # Get the json response for the credits
        credits_response = requests.get(credits_url).json()
        
        credit_details.append(credits_response)
        
    return credit_details

In [11]:
# Returns certifications as a list (rating: G, PG, etc.)
def get_certifications(api_key, movie_ids):
    certifications = []
    
    for movie_id in movie_ids:
        # Get certifications for each movie
        certification_url = f"https://api.themoviedb.org/3/movie/{movie_id}/release_dates?api_key={api_key}"
        
        certification_response = requests.get(certification_url).json()
        
        certifications.append(certification_response)
        
    return certifications

### GET TITLES & IDS:

In [12]:
# Returns single title for specified index number
def get_title(results, idx):
    movie_title = results[idx]["title"]
    
    return movie_title

In [13]:
# Returns ids list
def get_ids(results):
    movie_ids = []
    for x in results:
        movie_ids.append(x["id"])
        
    return movie_ids

In [14]:
# Returns titles list
def get_titles(results):
    movie_titles = []
    for x in results:
        movie_titles.append(x["title"])
        
    return movie_titles

### EXTRACT NEEDED DETAILS FROM COLUMNS: Cast, Crew, Gender, Production Countries etc. 

In [15]:
# Extract the certification (ratings)
def extract_certification(x):
    name = ""
    
    for i in x:
        # Get ratings for US
        if i['iso_3166_1'] == 'US':
             # Append results to lists
            name = i['release_dates'][0]['certification']
            break
    return name

In [16]:
# Function to get the director out of the crew
def get_director(x):
    names = []
    
    for i in x:
        if i['job'] == 'Director':
            name = i['name']
            names.append(name)
            
    if(names):
        return names
    
    return np.nan

In [17]:
# Function to get the director gender out of the crew
def get_director_gender(x):
    names = []
    
    for i in x:
        if i['job'] == 'Director':
            gender = i['gender']
            names.append(gender)
            
    if(names):
        return names
    
    return np.nan

In [18]:
def fm_percentage(results):
    fm_count = 0
    total_count = 0
    for x in results:
        if x['gender'] == 1:
            fm_count += 1
        if x['gender'] == 1 or x['gender'] == 2:
            total_count += 1
    
    if total_count == 0:        
        return np.nan
            
    return percentage_format(100 * fm_count/total_count)

In [19]:
# Function to get the producers out of the crew
def get_producers(x):
    names = []
    
    for i in x:
        if i['job'] == 'Producer':
            name = i['name']
            names.append(name)
        elif i['job'] == "Executive Producer":
            name = i['name']
            names.append(name)
        elif i['job'] == 'Co-Producer':
            name = i['name']
            names.append(name)
            
    if(names):
        return names
    
    return np.nan

In [20]:
# Function to get the screenplay writers out of the crew
def get_writers(x):
    names = []
    for i in x:
        if i['department'] == 'Writing':
            name = i['name']
            names.append(name)
            
    if(names):
        return names
    
    return np.nan

In [21]:
# Function to create cast list
def get_cast_list(x):
    if isinstance(x, list):
        names = [i['name'] for i in x]
        
        # Condition for cast (keep top 5)
        if len(names) > 5:
            names = names[:5]
        return names

    return []

In [22]:
# Function to create lists of each feature
def get_list(x):
    if isinstance(x, list):
        names = [i['name'] for i in x]
        return names
        
    return []

In [23]:
# Function to get production companies
def get_production_companies(x):
    names = []
    if isinstance(x, list):
        for i in x:
            name = i['name']
            names.append(name)
        return names
        
    return []

In [24]:
# Get each production company's country of origin
def get_production_company_country(x):
    names = []
    if isinstance(x, list):
        for i in x:
            country = i['origin_country']
            names.append(country)
        return names
    return []

In [25]:
# Get list of all languages available for film
def get_languages(x):
    names = []
    for i in x:
        name = i['english_name']
#         iso = i['iso_639_1']
        names.append(name)
    
    if(names):
        return names
    
    return np.nan

In [26]:
# Create binary column for foreign language films
def original_language_binary(x):
    if (x == 'en'):
        return 0
    elif (x != 'en'):
        return 1
    elif (x == "" | x == " "):
        return 0
    else:
        return 0

In [27]:
def percentage_format(percentage):
    return "{:,.2f}%".format(percentage)

### CLEAN DATA FOR SOUP & CREATE SOUP: Used for Machine Learning 

In [28]:
# Convert all strings to lower case and strip names of spaces
def clean_data(x):
    if isinstance(x, list):
        return [str.lower(i.replace(" ", "")) for i in x]
    else:
        if isinstance(x, str):
            return str.lower(x.replace(" ", ""))
        else: 
            return ''

In [29]:
# Clean the overview column (by converting to lowercase)
def clean_overview(x):
    if isinstance(x, str):
        return x.lower()
    else: 
        return ''

In [30]:
# Create soup of words
def create_soup(x):
    space = ' '
    return (space.join(x['keywords_cleaned']) + space + space.join(x['cast_cleaned']) + space 
            + space.join(x['director_cleaned']) + space + space.join(x['producers_cleaned']) + space 
            + space.join(x['writers_cleaned']) + space + space.join(x['genres_cleaned']) + space 
            + space.join(x['production_companies_cleaned']))

In [31]:
# Create soup of words - 2 keywords, 2 genres & overview
def create_soup_overview(x):
    space = ' '
    return (space.join(x['keywords_cleaned']) + space + space.join(x['keywords_cleaned']) + space 
            + space.join(x['cast_cleaned']) + space + space.join(x['director_cleaned']) + space 
            + space.join(x['producers_cleaned']) + space + space.join(x['writers_cleaned']) + space 
            + space.join(x['genres_cleaned']) + space + space.join(x['genres_cleaned']) + space 
            + space.join(x['production_companies_cleaned']) + space + x['overview_cleaned'])

# CALL API

### Get Most Popular Movies JSON Results:

In [32]:
# Start the timer
start = start_timer()

In [33]:
most_popular_movies = get_most_popular_movies(api_key)
# print(most_popular_movies)

In [34]:
# Stop the timer
stop_timer(start)

Total Time Elapsed:  29.319513 seconds


### List of Movie IDs:

In [35]:
movie_ids = get_ids(most_popular_movies)
#print(movie_ids)

In [36]:
len(movie_ids)

2000

### List of Titles:

In [37]:
titles = get_titles(most_popular_movies)
# print(titles)

In [38]:
len(titles)

2000

### Print Individual Title:

In [39]:
print(get_title(most_popular_movies, 0))

Venom: Let There Be Carnage


### Get Movie Details:

In [40]:
# Start the timer
start = start_timer()

In [41]:
details = get_movie_details(api_key, movie_ids)

In [71]:
pprint(details[0:10])

[{'adult': False,
  'backdrop_path': '/70nxSw3mFBsGmtkvcs91PbjerwD.jpg',
  'belongs_to_collection': {'backdrop_path': '/rhLspFB1B8ZCkWEHFYmc3NKagzq.jpg',
                            'id': 558216,
                            'name': 'Venom Collection',
                            'poster_path': '/670x9sf0Ru8y6ezBggmYudx61yB.jpg'},
  'budget': 110000000,
  'genres': [{'id': 878, 'name': 'Science Fiction'},
             {'id': 28, 'name': 'Action'},
             {'id': 12, 'name': 'Adventure'}],
  'homepage': 'https://www.venom.movie',
  'id': 580489,
  'imdb_id': 'tt7097896',
  'original_language': 'en',
  'original_title': 'Venom: Let There Be Carnage',
  'overview': 'After finding a host body in investigative reporter Eddie '
              'Brock, the alien symbiote must face a new enemy, Carnage, the '
              'alter ego of serial killer Cletus Kasady.',
  'popularity': 8633.976,
  'poster_path': '/rjkmN1dniUHVYAtwuV3Tji7FsDO.jpg',
  'production_companies': [{'id': 7505,
       

In [43]:
# Stop the timer
stop_timer(start)

Total Time Elapsed:  624.241883 seconds


### Get Keywords:

In [56]:
# Start the timer
start = start_timer()

In [57]:
keywords = get_movie_keywords(api_key, movie_ids)

In [58]:
# print(keywords)

In [59]:
# Stop the timer
stop_timer(start)

Total Time Elapsed:  445.224551 seconds


### Get Credits:

In [60]:
# Start the timer
start = start_timer()

In [61]:
credits = get_credits(api_key, movie_ids)

In [62]:
# print(credits)

In [63]:
# Stop the timer
stop_timer(start)

Total Time Elapsed:  468.778203 seconds


### Get Certifications:

In [64]:
# Start the timer
start = start_timer()

In [65]:
# Get the certifications for each movie (rating: G, PG, etc.)
certifications = get_certifications(api_key, movie_ids)
# certifications = get_certifications(api_key, [32657, 672582])

In [66]:
# print(certifications)

In [67]:
# Stop the timer
stop_timer(start)

Total Time Elapsed:  461.868641 seconds


## Movie Details DataFrame:

In [227]:
# Convert details to json
json_details_string = json.dumps(details)
# Convert json to dataframe
movie_details_df = pd.read_json(json_details_string)

# Export to save
# movie_details_df.to_csv("./static/data/movie_details.csv", index=False)

movie_details_df.head()

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,/70nxSw3mFBsGmtkvcs91PbjerwD.jpg,"{'id': 558216, 'name': 'Venom Collection', 'po...",110000000,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",https://www.venom.movie,580489,tt7097896,en,Venom: Let There Be Carnage,After finding a host body in investigative rep...,8633.976,/rjkmN1dniUHVYAtwuV3Tji7FsDO.jpg,"[{'id': 7505, 'logo_path': '/837VMM4wOkODc1idN...","[{'iso_3166_1': 'US', 'name': 'United States o...",2021-09-30,482000000,97,"[{'english_name': 'Spanish', 'iso_639_1': 'es'...",Released,,Venom: Let There Be Carnage,False,7.2,4421
1,False,/5uVhMGsps81CN0S4U9NF0Z4tytG.jpg,,200000000,"[{'id': 28, 'name': 'Action'}, {'id': 35, 'nam...",https://www.netflix.com/us/title/81161626,512195,tt7991608,en,Red Notice,An Interpol-issued Red Notice is a global aler...,4825.133,/q2d56YvJ3s9W73lqrk16Nzcc7xD.jpg,"[{'id': 34081, 'logo_path': None, 'name': 'Fly...","[{'iso_3166_1': 'US', 'name': 'United States o...",2021-11-04,0,117,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Pro and cons.,Red Notice,False,6.8,1982
2,False,/zBkHCpLmHjW2uVURs5uZkaVmgKR.jpg,,0,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",,585245,tt2397461,en,Clifford the Big Red Dog,As Emily struggles to fit in at home and at sc...,3280.16,/ygPTrycbMSFDc5zUpy4K5ZZtQSC.jpg,"[{'id': 4, 'logo_path': '/fycMZt242LVjagMByZOL...","[{'iso_3166_1': 'CA', 'name': 'Canada'}, {'iso...",2021-11-10,51000000,97,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Adventure has never been bigger.,Clifford the Big Red Dog,False,7.6,540
3,False,/mFbS5TwN95BcSEfiztdchLgTQ0v.jpg,,100000000,"[{'id': 28, 'name': 'Action'}, {'id': 18, 'nam...",https://www.20thcenturystudios.com/movies/the-...,617653,tt4244994,en,The Last Duel,King Charles VI declares that Knight Jean de C...,3256.364,/zjrJE0fpzPvX8saJXj8VNfcjBoU.jpg,"[{'id': 127928, 'logo_path': '/h0rjX5vjW5r8yEn...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...",2021-10-13,27000000,152,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,The true story of a woman who defied a nation ...,The Last Duel,False,7.6,885
4,False,/cinER0ESG0eJ49kXlExM0MEWGxW.jpg,"{'id': 912503, 'name': 'Shang-Chi Collection',...",150000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",https://www.marvel.com/movies/shang-chi-and-th...,566525,tt9376612,en,Shang-Chi and the Legend of the Ten Rings,Shang-Chi must confront the past he thought he...,3189.442,/1BIoJGKbXjdFDAqUEiA2VHqkK1Z.jpg,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",2021-09-01,430238384,132,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,You can't outrun your destiny.,Shang-Chi and the Legend of the Ten Rings,False,7.8,4211


In [229]:
# Extract year from release date
movie_details_df["year"] = pd.DatetimeIndex(movie_details_df['release_date']).year
# Drop any N/A from year
movie_details_df.dropna(inplace=True, how="any", subset=['year','release_date'])
 
# Convert to int
movie_details_df["year"] = movie_details_df["year"].astype(int)
    
# Rename columns to prepare for inflation calculation
movie_details_df = movie_details_df.rename(columns = {
    "revenue": "original_revenue",
    "budget": "original_budget"
})

In [230]:
len(movie_details_df)

1991

## Adjust Budget & Revenue for Inflation

In [231]:
# Create dataframe for cpi
cpi_df = pd.DataFrame(columns=['year', 'cpi_2021', 'cpi_old'])

In [232]:
# Create list of years (1913-2021)
years = []
for i in range(1913, 2022):
    years.append(i)

In [233]:
# Define 2021 cpi
cpi_2021 = 269.489

# Loop through years to append to dataframe
for year in years:
    if year != 2021:
        cpi_old = cpi.get(year)
        cpi_df = cpi_df.append({"year": year, 'cpi_2021': cpi_2021, 'cpi_old': cpi_old}, ignore_index=True)
    elif year == 2021:
        cpi_df = cpi_df.append({"year": year, 'cpi_2021': cpi_2021, 'cpi_old': cpi_2021}, ignore_index=True)

In [234]:
# Convert year type to int
cpi_df['year'] = cpi_df['year'].astype(int)
cpi_df.head()

Unnamed: 0,year,cpi_2021,cpi_old
0,1913,269.489,9.9
1,1914,269.489,10.0
2,1915,269.489,10.1
3,1916,269.489,10.9
4,1917,269.489,12.8


In [235]:
# Merge movie details df with cpi df
movie_details_df = movie_details_df.merge(cpi_df, on="year")
movie_details_df.head()

Unnamed: 0,adult,backdrop_path,belongs_to_collection,original_budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,original_revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,year,cpi_2021,cpi_old
0,False,/70nxSw3mFBsGmtkvcs91PbjerwD.jpg,"{'id': 558216, 'name': 'Venom Collection', 'po...",110000000,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",https://www.venom.movie,580489,tt7097896,en,Venom: Let There Be Carnage,After finding a host body in investigative rep...,8633.976,/rjkmN1dniUHVYAtwuV3Tji7FsDO.jpg,"[{'id': 7505, 'logo_path': '/837VMM4wOkODc1idN...","[{'iso_3166_1': 'US', 'name': 'United States o...",2021-09-30,482000000,97,"[{'english_name': 'Spanish', 'iso_639_1': 'es'...",Released,,Venom: Let There Be Carnage,False,7.2,4421,2021,269.489,269.489
1,False,/5uVhMGsps81CN0S4U9NF0Z4tytG.jpg,,200000000,"[{'id': 28, 'name': 'Action'}, {'id': 35, 'nam...",https://www.netflix.com/us/title/81161626,512195,tt7991608,en,Red Notice,An Interpol-issued Red Notice is a global aler...,4825.133,/q2d56YvJ3s9W73lqrk16Nzcc7xD.jpg,"[{'id': 34081, 'logo_path': None, 'name': 'Fly...","[{'iso_3166_1': 'US', 'name': 'United States o...",2021-11-04,0,117,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Pro and cons.,Red Notice,False,6.8,1982,2021,269.489,269.489
2,False,/zBkHCpLmHjW2uVURs5uZkaVmgKR.jpg,,0,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",,585245,tt2397461,en,Clifford the Big Red Dog,As Emily struggles to fit in at home and at sc...,3280.16,/ygPTrycbMSFDc5zUpy4K5ZZtQSC.jpg,"[{'id': 4, 'logo_path': '/fycMZt242LVjagMByZOL...","[{'iso_3166_1': 'CA', 'name': 'Canada'}, {'iso...",2021-11-10,51000000,97,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Adventure has never been bigger.,Clifford the Big Red Dog,False,7.6,540,2021,269.489,269.489
3,False,/mFbS5TwN95BcSEfiztdchLgTQ0v.jpg,,100000000,"[{'id': 28, 'name': 'Action'}, {'id': 18, 'nam...",https://www.20thcenturystudios.com/movies/the-...,617653,tt4244994,en,The Last Duel,King Charles VI declares that Knight Jean de C...,3256.364,/zjrJE0fpzPvX8saJXj8VNfcjBoU.jpg,"[{'id': 127928, 'logo_path': '/h0rjX5vjW5r8yEn...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...",2021-10-13,27000000,152,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,The true story of a woman who defied a nation ...,The Last Duel,False,7.6,885,2021,269.489,269.489
4,False,/cinER0ESG0eJ49kXlExM0MEWGxW.jpg,"{'id': 912503, 'name': 'Shang-Chi Collection',...",150000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",https://www.marvel.com/movies/shang-chi-and-th...,566525,tt9376612,en,Shang-Chi and the Legend of the Ten Rings,Shang-Chi must confront the past he thought he...,3189.442,/1BIoJGKbXjdFDAqUEiA2VHqkK1Z.jpg,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",2021-09-01,430238384,132,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,You can't outrun your destiny.,Shang-Chi and the Legend of the Ten Rings,False,7.8,4211,2021,269.489,269.489


In [236]:
# Create adjusted column for each feature
features = ["revenue", "budget"]

for feature in features:
    movie_details_df[f'adjusted_{feature}'] = (movie_details_df[f'original_{feature}'] * movie_details_df['cpi_2021']) / movie_details_df['cpi_old']

movie_details_df.head()

Unnamed: 0,adult,backdrop_path,belongs_to_collection,original_budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,original_revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,year,cpi_2021,cpi_old,adjusted_revenue,adjusted_budget
0,False,/70nxSw3mFBsGmtkvcs91PbjerwD.jpg,"{'id': 558216, 'name': 'Venom Collection', 'po...",110000000,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",https://www.venom.movie,580489,tt7097896,en,Venom: Let There Be Carnage,After finding a host body in investigative rep...,8633.976,/rjkmN1dniUHVYAtwuV3Tji7FsDO.jpg,"[{'id': 7505, 'logo_path': '/837VMM4wOkODc1idN...","[{'iso_3166_1': 'US', 'name': 'United States o...",2021-09-30,482000000,97,"[{'english_name': 'Spanish', 'iso_639_1': 'es'...",Released,,Venom: Let There Be Carnage,False,7.2,4421,2021,269.489,269.489,482000000.0,110000000.0
1,False,/5uVhMGsps81CN0S4U9NF0Z4tytG.jpg,,200000000,"[{'id': 28, 'name': 'Action'}, {'id': 35, 'nam...",https://www.netflix.com/us/title/81161626,512195,tt7991608,en,Red Notice,An Interpol-issued Red Notice is a global aler...,4825.133,/q2d56YvJ3s9W73lqrk16Nzcc7xD.jpg,"[{'id': 34081, 'logo_path': None, 'name': 'Fly...","[{'iso_3166_1': 'US', 'name': 'United States o...",2021-11-04,0,117,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Pro and cons.,Red Notice,False,6.8,1982,2021,269.489,269.489,0.0,200000000.0
2,False,/zBkHCpLmHjW2uVURs5uZkaVmgKR.jpg,,0,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",,585245,tt2397461,en,Clifford the Big Red Dog,As Emily struggles to fit in at home and at sc...,3280.16,/ygPTrycbMSFDc5zUpy4K5ZZtQSC.jpg,"[{'id': 4, 'logo_path': '/fycMZt242LVjagMByZOL...","[{'iso_3166_1': 'CA', 'name': 'Canada'}, {'iso...",2021-11-10,51000000,97,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Adventure has never been bigger.,Clifford the Big Red Dog,False,7.6,540,2021,269.489,269.489,51000000.0,0.0
3,False,/mFbS5TwN95BcSEfiztdchLgTQ0v.jpg,,100000000,"[{'id': 28, 'name': 'Action'}, {'id': 18, 'nam...",https://www.20thcenturystudios.com/movies/the-...,617653,tt4244994,en,The Last Duel,King Charles VI declares that Knight Jean de C...,3256.364,/zjrJE0fpzPvX8saJXj8VNfcjBoU.jpg,"[{'id': 127928, 'logo_path': '/h0rjX5vjW5r8yEn...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...",2021-10-13,27000000,152,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,The true story of a woman who defied a nation ...,The Last Duel,False,7.6,885,2021,269.489,269.489,27000000.0,100000000.0
4,False,/cinER0ESG0eJ49kXlExM0MEWGxW.jpg,"{'id': 912503, 'name': 'Shang-Chi Collection',...",150000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",https://www.marvel.com/movies/shang-chi-and-th...,566525,tt9376612,en,Shang-Chi and the Legend of the Ten Rings,Shang-Chi must confront the past he thought he...,3189.442,/1BIoJGKbXjdFDAqUEiA2VHqkK1Z.jpg,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",2021-09-01,430238384,132,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,You can't outrun your destiny.,Shang-Chi and the Legend of the Ten Rings,False,7.8,4211,2021,269.489,269.489,430238384.0,150000000.0


In [237]:
# Check values
movie_details_df[['original_budget', 'adjusted_budget', 'original_revenue', 'adjusted_revenue', 'year']]

Unnamed: 0,original_budget,adjusted_budget,original_revenue,adjusted_revenue,year
0,110000000,1.100000e+08,482000000,4.820000e+08,2021
1,200000000,2.000000e+08,0,0.000000e+00,2021
2,0,0.000000e+00,51000000,5.100000e+07,2021
3,100000000,1.000000e+08,27000000,2.700000e+07,2021
4,150000000,1.500000e+08,430238384,4.302384e+08,2021
...,...,...,...,...,...
1969,0,0.000000e+00,0,0.000000e+00,1978
1970,660000,2.727956e+06,16000000,6.613227e+07,1978
1971,6000000,5.215916e+07,102272727,8.890766e+08,1964
1972,0,0.000000e+00,0,0.000000e+00,1964


## Low Budget:

In [238]:
# Create budget bins
bins = [1, 15000000, 50000000, 150000000, 380000000] 
bin_names = ["1 to 15m", "16m to 50m", "51m to 150m", "151m to 380m"]

# Append a budget bin column
movie_details_df["budget_bins"] = pd.cut(movie_details_df["adjusted_budget"], bins, labels=bin_names)
movie_details_df["budget_bins"].value_counts()

51m to 150m     382
16m to 50m      337
151m to 380m    247
1 to 15m        198
Name: budget_bins, dtype: int64

In [239]:
# Low Budget
movie_details_df.loc[movie_details_df["budget_bins"] == "1 to 15m"].head()

Unnamed: 0,adult,backdrop_path,belongs_to_collection,original_budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,original_revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,year,cpi_2021,cpi_old,adjusted_revenue,adjusted_budget,budget_bins
8,False,/xGrTm3J0FTafmuQ85vF7ZCw94x6.jpg,,9100000,"[{'id': 18, 'name': 'Drama'}, {'id': 36, 'name...",,589761,tt10648714,ru,Чернобыль,The aftermath of a shocking explosion at the C...,2438.508,/kfQJQWFEoWRVBH8FUKnT0HX1yRS.jpg,"[{'id': 5630, 'logo_path': '/s0mHCw53fp6EAapR7...","[{'iso_3166_1': 'RU', 'name': 'Russia'}]",2021-04-15,5370393,136,"[{'english_name': 'Russian', 'iso_639_1': 'ru'...",Released,,Chernobyl: Abyss,False,6.3,235,2021,269.489,269.489,5370393.0,9100000.0,1 to 15m
24,False,/4EJSMQOM1bZPHvzqAQe87suBxdf.jpg,,250000,"[{'id': 10752, 'name': 'War'}, {'id': 36, 'nam...",https://www.netflix.com/title/81450071,885110,tt15847872,en,Amina,"In 16th-century Zazzau, now Zaria, Nigeria, Am...",1255.799,/hMIQiwLpBfTfe3ZbRlNx4225Mgg.jpg,"[{'id': 162630, 'logo_path': None, 'name': 'Vi...","[{'iso_3166_1': 'NG', 'name': 'Nigeria'}]",2021-11-04,0,120,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,She is Amina and she fears nobody.,Amina,False,6.3,55,2021,269.489,269.489,0.0,250000.0,1 to 15m
29,False,/qXctHIfK4LAgHH7qZiJJVCPclaY.jpg,,45223,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",https://www.disneyplus.com/movies/ciao-alberto...,876716,tt15476620,en,Ciao Alberto,"With his best friend Luca away at school, Albe...",911.505,/1SyTnaY0wte69oKdqxQLvxPT3hs.jpg,"[{'id': 2, 'logo_path': '/wdrCwmRnLFJhEoH8GSfy...","[{'iso_3166_1': 'US', 'name': 'United States o...",2021-11-12,0,7,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,They go together like pasta and pesto.,Ciao Alberto,False,7.7,252,2021,269.489,269.489,0.0,45223.0,1 to 15m
30,False,/lV3UFPPxDIPelh46G9oySXN9Mcz.jpg,"{'id': 702624, 'name': 'After Collection', 'po...",14000000,"[{'id': 10749, 'name': 'Romance'}, {'id': 18, ...",,744275,tt13069986,en,After We Fell,"Just as Tessa's life begins to become unglued,...",843.613,/dU4HfnTEJDf9KvxGS9hgO7BVeju.jpg,"[{'id': 6626, 'logo_path': '/A1BnMoWjzjOrjzpWi...","[{'iso_3166_1': 'FR', 'name': 'France'}, {'iso...",2021-09-01,19000000,99,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,,After We Fell,False,7.2,1123,2021,269.489,269.489,19000000.0,14000000.0,1 to 15m
39,False,/dIibeeq4QMay5bTJ2vjr72IFFRo.jpg,"{'id': 589755, 'name': 'Последний богатырь (Ко...",8400000,"[{'id': 14, 'name': 'Fantasy'}, {'id': 12, 'na...",,589754,tt13606158,ru,Последний богатырь: Корень зла,Peace and tranquility have set in Belogorie. T...,692.329,/5VJSIAhSn4qUsg5nOj4MhQhF5wQ.jpg,"[{'id': 42877, 'logo_path': '/15THT1W3NROsbDXP...","[{'iso_3166_1': 'RU', 'name': 'Russia'}]",2021-01-01,27658331,120,"[{'english_name': 'Russian', 'iso_639_1': 'ru'...",Released,,The Last Warrior: Root of Evil,False,7.0,67,2021,269.489,269.489,27658331.0,8400000.0,1 to 15m


## Keywords DataFrame:

In [240]:
# Convert keywords to json
json_keywords_string = json.dumps(keywords)
# Convert json to dataframe
movie_keywords_df = pd.read_json(json_keywords_string)

# Export to save
# movie_keywords_df.to_csv("./static/data/movie_keywords.csv", index=False)
pprint(json_keywords_string)

('[{"id": 580489, "keywords": [{"id": 1701, "name": "hero"}, {"id": 2095, '
 '"name": "anti hero"}, {"id": 3289, "name": "villain"}, {"id": 9663, "name": '
 '"sequel"}, {"id": 9715, "name": "superhero"}, {"id": 9717, "name": "based on '
 'comic"}, {"id": 179430, "name": "aftercreditsstinger"}, {"id": 193132, '
 '"name": "symbiote"}, {"id": 265894, "name": "marvel comics"}]}, {"id": '
 '512195, "keywords": [{"id": 1812, "name": "fbi"}, {"id": 6710, "name": '
 '"interpol"}, {"id": 10051, "name": "heist"}, {"id": 14789, "name": "art '
 'thief"}]}, {"id": 585245, "keywords": [{"id": 818, "name": "based on novel '
 'or book"}, {"id": 162628, "name": "giant dog"}]}, {"id": 617653, "keywords": '
 '[{"id": 818, "name": "based on novel or book"}, {"id": 4299, "name": '
 '"normandy, france"}, {"id": 9672, "name": "based on a true story"}, {"id": '
 '10466, "name": "knight"}, {"id": 12965, "name": "duel"}, {"id": 15126, '
 '"name": "historical"}, {"id": 175245, "name": "sword duel"}, {"id": 22284

 '"name": "antidote"}, {"id": 188950, "name": "corporation"}, {"id": 188957, '
 '"name": "virus"}, {"id": 204967, "name": "runner"}, {"id": 213987, "name": '
 '"city ruin"}, {"id": 219843, "name": "immunity"}, {"id": 246466, "name": '
 '"based on young adult novel"}]}, {"id": 283995, "keywords": [{"id": 9663, '
 '"name": "sequel"}, {"id": 9715, "name": "superhero"}, {"id": 9717, "name": '
 '"based on comic"}, {"id": 9789, "name": "misfit"}, {"id": 9882, "name": '
 '"space"}, {"id": 179430, "name": "aftercreditsstinger"}, {"id": 179431, '
 '"name": "duringcreditsstinger"}, {"id": 180547, "name": "marvel cinematic '
 'universe (mcu)"}]}, {"id": 812456, "keywords": [{"id": 10542, "name": "based '
 'on toy"}, {"id": 267848, "name": "talking animals"}]}, {"id": 257344, '
 '"keywords": [{"id": 242, "name": "new york city"}, {"id": 282, "name": '
 '"video game"}, {"id": 5801, "name": "nerd"}, {"id": 9951, "name": "alien"}, '
 '{"id": 10439, "name": "gamer"}, {"id": 173374, "name": "gaming"}, 

 '{"id": 14903, "name": "home invasion"}, {"id": 34094, "name": "extramarital '
 'affair"}, {"id": 155262, "name": "threesome"}, {"id": 156688, "name": '
 '"seductress"}, {"id": 195439, "name": "father\'s day"}, {"id": 219833, '
 '"name": "married man"}]}, {"id": 682254, "keywords": [{"id": 6054, "name": '
 '"friendship"}, {"id": 9713, "name": "friends"}, {"id": 10092, "name": '
 '"mystery"}]}, {"id": 297761, "keywords": [{"id": 849, "name": "dc comics"}, '
 '{"id": 2095, "name": "anti hero"}, {"id": 3269, "name": "secret mission"}, '
 '{"id": 3289, "name": "villain"}, {"id": 9715, "name": "superhero"}, {"id": '
 '194404, "name": "supervillain"}, {"id": 229266, "name": "dc extended '
 'universe"}]}, {"id": 68718, "keywords": [{"id": 801, "name": "bounty '
 'hunter"}, {"id": 1701, "name": "hero"}, {"id": 3136, "name": "plantation"}, '
 '{"id": 3679, "name": "society"}, {"id": 6054, "name": "friendship"}, {"id": '
 '9713, "name": "friends"}, {"id": 9748, "name": "revenge"}, {"id": 9823, 

 '7942, "name": "imaginary friend"}, {"id": 10216, "name": "outcast"}, {"id": '
 '15097, "name": "shark"}]}, {"id": 19912, "keywords": [{"id": 830, "name": '
 '"car race"}, {"id": 3737, "name": "dying and death"}, {"id": 4812, "name": '
 '"plan"}, {"id": 6315, "name": "stock car racing"}, {"id": 9844, "name": "car '
 'crash"}, {"id": 10170, "name": "premonition"}, {"id": 10292, "name": '
 '"gore"}, {"id": 10629, "name": "vision"}]}, {"id": 257346, "keywords": '
 '[{"id": 9715, "name": "superhero"}, {"id": 9717, "name": "based on '
 'comic"}]}, {"id": 881554, "keywords": []}, {"id": 622386, "keywords": '
 '[{"id": 10714, "name": "serial killer"}, {"id": 227428, "name": "female '
 'serial killer"}, {"id": 256728, "name": "chucky"}]}, {"id": 11324, '
 '"keywords": [{"id": 818, "name": "based on novel or book"}, {"id": 2041, '
 '"name": "island"}, {"id": 2215, "name": "hurricane"}, {"id": 5340, "name": '
 '"investigation"}, {"id": 10323, "name": "psychiatric hospital"}, {"id": '
 '11207, "

 'brother"}, {"id": 169213, "name": "milwaukee wisconsin"}, {"id": 169590, '
 '"name": "police raid"}, {"id": 178036, "name": "police harassment"}, {"id": '
 '179431, "name": "duringcreditsstinger"}, {"id": 195162, "name": "record '
 'company"}, {"id": 204236, "name": "music tour"}, {"id": 208289, "name": '
 '"1980s"}, {"id": 217474, "name": "intimidation by police"}, {"id": 217475, '
 '"name": "los angeles riots"}, {"id": 217482, "name": "compton, '
 'california"}]}, {"id": 348, "keywords": [{"id": 803, "name": "android"}, '
 '{"id": 1612, "name": "spacecraft"}, {"id": 1825, "name": "countdown"}, '
 '{"id": 1826, "name": "space marine"}, {"id": 1828, "name": "space suit"}, '
 '{"id": 1829, "name": "beheading"}, {"id": 3801, "name": "space travel"}, '
 '{"id": 4565, "name": "dystopia"}, {"id": 6016, "name": "biology"}, {"id": '
 '7956, "name": "cowardice"}, {"id": 9882, "name": "space"}, {"id": 9951, '
 '"name": "alien"}, {"id": 11322, "name": "female protagonist"}, {"id": 34117, '
 '"

 'relationships"}, {"id": 10360, "name": "prejudice"}, {"id": 11707, "name": '
 '"suitor"}, {"id": 15060, "name": "period drama"}, {"id": 156501, "name": '
 '"pride"}, {"id": 160279, "name": "18th century"}, {"id": 165301, "name": '
 '"opposites attract"}, {"id": 182376, "name": "gentleman"}, {"id": 210085, '
 '"name": "georgian or regency era"}, {"id": 270338, "name": "1790s"}]}, '
 '{"id": 2668, "keywords": [{"id": 1415, "name": "small town"}, {"id": 10028, '
 '"name": "steampunk"}, {"id": 207928, "name": "19th century"}, {"id": 208024, '
 '"name": "headless horseman"}]}, {"id": 24831, "keywords": [{"id": 1261, '
 '"name": "river"}, {"id": 1357, "name": "fish"}, {"id": 1706, "name": '
 '"experiment"}, {"id": 4595, "name": "us army"}, {"id": 5160, "name": "animal '
 'attack"}, {"id": 5767, "name": "summer camp"}, {"id": 12193, "name": '
 '"reporter"}, {"id": 158045, "name": "piranha"}, {"id": 158291, "name": "tent '
 'camp"}, {"id": 158292, "name": "eugenics"}, {"id": 209662, "name": 

## Credits DataFrame:

In [241]:
# Convert credits to json
json_credits_string = json.dumps(credits)
# Convert json to dataframe
movie_credits_df = pd.read_json(json_credits_string)

# Export to save
# movie_credits_df.to_csv("./static/data/movie_credits.csv", index=False)
movie_credits_df.head()

Unnamed: 0,id,cast,crew
0,580489,"[{'adult': False, 'gender': 2, 'id': 2524, 'kn...","[{'adult': False, 'gender': 2, 'id': 149, 'kno..."
1,512195,"[{'adult': False, 'gender': 2, 'id': 18918, 'k...","[{'adult': False, 'gender': 2, 'id': 9543, 'kn..."
2,585245,"[{'adult': False, 'gender': 1, 'id': 1696017, ...","[{'adult': False, 'gender': 1, 'id': 8220, 'kn..."
3,617653,"[{'adult': False, 'gender': 2, 'id': 1892, 'kn...","[{'adult': False, 'gender': 2, 'id': 120, 'kno..."
4,566525,"[{'adult': False, 'gender': 2, 'id': 1489211, ...","[{'adult': False, 'gender': 1, 'id': 7232, 'kn..."


In [242]:
movie_credits_df["cast_crew"] = movie_credits_df["cast"] + movie_credits_df["crew"]
movie_credits_df

Unnamed: 0,id,cast,crew,cast_crew
0,580489,"[{'adult': False, 'gender': 2, 'id': 2524, 'kn...","[{'adult': False, 'gender': 2, 'id': 149, 'kno...","[{'adult': False, 'gender': 2, 'id': 2524, 'kn..."
1,512195,"[{'adult': False, 'gender': 2, 'id': 18918, 'k...","[{'adult': False, 'gender': 2, 'id': 9543, 'kn...","[{'adult': False, 'gender': 2, 'id': 18918, 'k..."
2,585245,"[{'adult': False, 'gender': 1, 'id': 1696017, ...","[{'adult': False, 'gender': 1, 'id': 8220, 'kn...","[{'adult': False, 'gender': 1, 'id': 1696017, ..."
3,617653,"[{'adult': False, 'gender': 2, 'id': 1892, 'kn...","[{'adult': False, 'gender': 2, 'id': 120, 'kno...","[{'adult': False, 'gender': 2, 'id': 1892, 'kn..."
4,566525,"[{'adult': False, 'gender': 2, 'id': 1489211, ...","[{'adult': False, 'gender': 1, 'id': 7232, 'kn...","[{'adult': False, 'gender': 2, 'id': 1489211, ..."
...,...,...,...,...
1995,277355,"[{'adult': False, 'gender': 1, 'id': 3136, 'kn...","[{'adult': False, 'gender': 1, 'id': 53680, 'k...","[{'adult': False, 'gender': 1, 'id': 3136, 'kn..."
1996,271706,"[{'adult': False, 'gender': 0, 'id': 1674211, ...","[{'adult': False, 'gender': 0, 'id': 81722, 'k...","[{'adult': False, 'gender': 0, 'id': 1674211, ..."
1997,227973,"[{'adult': False, 'gender': 2, 'id': 1393177, ...","[{'adult': False, 'gender': 0, 'id': 13223, 'k...","[{'adult': False, 'gender': 2, 'id': 1393177, ..."
1998,716799,"[{'adult': False, 'gender': 2, 'id': 1402803, ...","[{'adult': False, 'gender': 2, 'id': 224409, '...","[{'adult': False, 'gender': 2, 'id': 1402803, ..."


In [243]:
movie_credits_df['percent_fm'] = movie_credits_df['cast_crew'].apply(fm_percentage)

In [246]:
# cast_crew_fm_percentage
movie_credits_df = movie_credits_df.drop(columns=['cast_crew'])
movie_credits_df

Unnamed: 0,id,cast,crew,percent_fm
0,580489,"[{'adult': False, 'gender': 2, 'id': 2524, 'kn...","[{'adult': False, 'gender': 2, 'id': 149, 'kno...",22.54%
1,512195,"[{'adult': False, 'gender': 2, 'id': 18918, 'k...","[{'adult': False, 'gender': 2, 'id': 9543, 'kn...",14.58%
2,585245,"[{'adult': False, 'gender': 1, 'id': 1696017, ...","[{'adult': False, 'gender': 1, 'id': 8220, 'kn...",39.29%
3,617653,"[{'adult': False, 'gender': 2, 'id': 1892, 'kn...","[{'adult': False, 'gender': 2, 'id': 120, 'kno...",26.32%
4,566525,"[{'adult': False, 'gender': 2, 'id': 1489211, ...","[{'adult': False, 'gender': 1, 'id': 7232, 'kn...",33.33%
...,...,...,...,...
1995,277355,"[{'adult': False, 'gender': 1, 'id': 3136, 'kn...","[{'adult': False, 'gender': 1, 'id': 53680, 'k...",38.89%
1996,271706,"[{'adult': False, 'gender': 0, 'id': 1674211, ...","[{'adult': False, 'gender': 0, 'id': 81722, 'k...",0.00%
1997,227973,"[{'adult': False, 'gender': 2, 'id': 1393177, ...","[{'adult': False, 'gender': 0, 'id': 13223, 'k...",25.33%
1998,716799,"[{'adult': False, 'gender': 2, 'id': 1402803, ...","[{'adult': False, 'gender': 2, 'id': 224409, '...",40.00%


## Certifications DataFrame:

In [248]:
# Convert certifications to json
json_certifications_string = json.dumps(certifications)
# Convert json to dataframe
movie_certifications_df = pd.read_json(json_certifications_string)

# Export to save
# movie_certifications_df.to_csv("./static/data/movie_certifications.csv", index=False)
movie_certifications_df.head()

Unnamed: 0,id,results
0,580489,"[{'iso_3166_1': 'BR', 'release_dates': [{'cert..."
1,512195,"[{'iso_3166_1': 'GB', 'release_dates': [{'cert..."
2,585245,"[{'iso_3166_1': 'SE', 'release_dates': [{'cert..."
3,617653,"[{'iso_3166_1': 'DE', 'release_dates': [{'cert..."
4,566525,"[{'iso_3166_1': 'CZ', 'release_dates': [{'cert..."


In [249]:
# Extract certification information and append to new column
movie_certifications_df['certification'] = movie_certifications_df['results'].apply(extract_certification)
movie_certifications_df = movie_certifications_df[['id', 'certification']]

In [250]:
movie_certifications_df.head()

Unnamed: 0,id,certification
0,580489,PG-13
1,512195,PG-13
2,585245,PG
3,617653,
4,566525,PG-13


In [251]:
# Convert ids to ints (to merge correctly)
movie_keywords_df['id'] = movie_keywords_df['id'].astype('int')
movie_details_df['id'] = movie_details_df['id'].astype('int')
movie_credits_df['id'] = movie_credits_df['id'].astype('int')
movie_certifications_df['id'] = movie_certifications_df['id'].astype('int')

In [252]:
# Merge keywords with details
movie_df = movie_details_df.merge(movie_keywords_df, on='id')
movie_df = movie_df.merge(movie_credits_df, on='id')
movie_df = movie_df.merge(movie_certifications_df, on='id')

# Export to save
# movie_df.to_csv("./static/data/movies_merged.csv", index=False)

In [253]:
movie_df.head(20)

Unnamed: 0,adult,backdrop_path,belongs_to_collection,original_budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,original_revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,year,cpi_2021,cpi_old,adjusted_revenue,adjusted_budget,budget_bins,keywords,cast,crew,percent_fm,certification
0,False,/70nxSw3mFBsGmtkvcs91PbjerwD.jpg,"{'id': 558216, 'name': 'Venom Collection', 'po...",110000000,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",https://www.venom.movie,580489,tt7097896,en,Venom: Let There Be Carnage,After finding a host body in investigative rep...,8633.976,/rjkmN1dniUHVYAtwuV3Tji7FsDO.jpg,"[{'id': 7505, 'logo_path': '/837VMM4wOkODc1idN...","[{'iso_3166_1': 'US', 'name': 'United States o...",2021-09-30,482000000,97,"[{'english_name': 'Spanish', 'iso_639_1': 'es'...",Released,,Venom: Let There Be Carnage,False,7.2,4421,2021,269.489,269.489,482000000.0,110000000.0,51m to 150m,"[{'id': 1701, 'name': 'hero'}, {'id': 2095, 'n...","[{'adult': False, 'gender': 2, 'id': 2524, 'kn...","[{'adult': False, 'gender': 2, 'id': 149, 'kno...",22.54%,PG-13
1,False,/5uVhMGsps81CN0S4U9NF0Z4tytG.jpg,,200000000,"[{'id': 28, 'name': 'Action'}, {'id': 35, 'nam...",https://www.netflix.com/us/title/81161626,512195,tt7991608,en,Red Notice,An Interpol-issued Red Notice is a global aler...,4825.133,/q2d56YvJ3s9W73lqrk16Nzcc7xD.jpg,"[{'id': 34081, 'logo_path': None, 'name': 'Fly...","[{'iso_3166_1': 'US', 'name': 'United States o...",2021-11-04,0,117,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Pro and cons.,Red Notice,False,6.8,1982,2021,269.489,269.489,0.0,200000000.0,151m to 380m,"[{'id': 1812, 'name': 'fbi'}, {'id': 6710, 'na...","[{'adult': False, 'gender': 2, 'id': 18918, 'k...","[{'adult': False, 'gender': 2, 'id': 9543, 'kn...",14.58%,PG-13
2,False,/zBkHCpLmHjW2uVURs5uZkaVmgKR.jpg,,0,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",,585245,tt2397461,en,Clifford the Big Red Dog,As Emily struggles to fit in at home and at sc...,3280.16,/ygPTrycbMSFDc5zUpy4K5ZZtQSC.jpg,"[{'id': 4, 'logo_path': '/fycMZt242LVjagMByZOL...","[{'iso_3166_1': 'CA', 'name': 'Canada'}, {'iso...",2021-11-10,51000000,97,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Adventure has never been bigger.,Clifford the Big Red Dog,False,7.6,540,2021,269.489,269.489,51000000.0,0.0,,"[{'id': 818, 'name': 'based on novel or book'}...","[{'adult': False, 'gender': 1, 'id': 1696017, ...","[{'adult': False, 'gender': 1, 'id': 8220, 'kn...",39.29%,PG
3,False,/mFbS5TwN95BcSEfiztdchLgTQ0v.jpg,,100000000,"[{'id': 28, 'name': 'Action'}, {'id': 18, 'nam...",https://www.20thcenturystudios.com/movies/the-...,617653,tt4244994,en,The Last Duel,King Charles VI declares that Knight Jean de C...,3256.364,/zjrJE0fpzPvX8saJXj8VNfcjBoU.jpg,"[{'id': 127928, 'logo_path': '/h0rjX5vjW5r8yEn...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...",2021-10-13,27000000,152,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,The true story of a woman who defied a nation ...,The Last Duel,False,7.6,885,2021,269.489,269.489,27000000.0,100000000.0,51m to 150m,"[{'id': 818, 'name': 'based on novel or book'}...","[{'adult': False, 'gender': 2, 'id': 1892, 'kn...","[{'adult': False, 'gender': 2, 'id': 120, 'kno...",26.32%,
4,False,/cinER0ESG0eJ49kXlExM0MEWGxW.jpg,"{'id': 912503, 'name': 'Shang-Chi Collection',...",150000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",https://www.marvel.com/movies/shang-chi-and-th...,566525,tt9376612,en,Shang-Chi and the Legend of the Ten Rings,Shang-Chi must confront the past he thought he...,3189.442,/1BIoJGKbXjdFDAqUEiA2VHqkK1Z.jpg,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",2021-09-01,430238384,132,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,You can't outrun your destiny.,Shang-Chi and the Legend of the Ten Rings,False,7.8,4211,2021,269.489,269.489,430238384.0,150000000.0,51m to 150m,"[{'id': 779, 'name': 'martial arts'}, {'id': 9...","[{'adult': False, 'gender': 2, 'id': 1489211, ...","[{'adult': False, 'gender': 1, 'id': 7232, 'kn...",33.33%,PG-13
5,False,/g2djzUqA6mFplzC03gDk0WSyg99.jpg,,0,"[{'id': 12, 'name': 'Adventure'}, {'id': 16, '...",https://movies.disney.com/encanto,568124,tt2953050,en,Encanto,"The tale of an extraordinary family, the Madri...",2989.673,/4j0PNHkMr5ax3IA8tjtxcmPU3QT.jpg,"[{'id': 6125, 'logo_path': '/tVPmo07IHhBs4Huil...","[{'iso_3166_1': 'US', 'name': 'United States o...",2021-11-24,151000000,102,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,There's a little magic in all of us ...almost ...,Encanto,False,7.4,313,2021,269.489,269.489,151000000.0,0.0,,[],"[{'adult': False, 'gender': 1, 'id': 968367, '...","[{'adult': False, 'gender': 0, 'id': 8159, 'kn...",42.86%,
6,False,/VlHt27nCqOuTnuX6bku8QZapzO.jpg,"{'id': 531241, 'name': 'Spider-Man (Avengers) ...",0,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",https://www.spidermannowayhome.movie,634649,tt10872600,en,Spider-Man: No Way Home,Peter Parker is unmasked and no longer able to...,2774.306,/1g0dhYtq4irTY1GPXvft6k4YLjm.jpg,"[{'id': 420, 'logo_path': '/hUzeosd33nzE5MCNsZ...","[{'iso_3166_1': 'US', 'name': 'United States o...",2021-12-15,0,148,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,The Multiverse unleashed.,Spider-Man: No Way Home,False,8.4,44,2021,269.489,269.489,0.0,0.0,,"[{'id': 1701, 'name': 'hero'}, {'id': 5451, 'n...","[{'adult': False, 'gender': 2, 'id': 1136406, ...","[{'adult': False, 'gender': 1, 'id': 2519, 'kn...",24.14%,PG-13
7,False,/iUeeZ5PWfZGgUtCJfwcgmCfdzoI.jpg,,0,"[{'id': 16, 'name': 'Animation'}, {'id': 878, ...",https://family.20thcenturystudios.com/movies/r...,482321,tt7504818,en,Ron's Gone Wrong,"In a world where walking, talking, digitally c...",2662.308,/gA9QxSravC2EVEkEKgyEmDrfL0e.jpg,"[{'id': 121605, 'logo_path': '/4rc0vsJWygNm9oj...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...",2021-10-15,57000000,106,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Best Friend Forever* (*battery life may vary),Ron's Gone Wrong,False,8.5,330,2021,269.489,269.489,57000000.0,0.0,,"[{'id': 18035, 'name': 'family'}, {'id': 28579...","[{'adult': False, 'gender': 2, 'id': 58225, 'k...","[{'adult': False, 'gender': 0, 'id': 5542, 'kn...",35.00%,PG
8,False,/xGrTm3J0FTafmuQ85vF7ZCw94x6.jpg,,9100000,"[{'id': 18, 'name': 'Drama'}, {'id': 36, 'name...",,589761,tt10648714,ru,Чернобыль,The aftermath of a shocking explosion at the C...,2438.508,/kfQJQWFEoWRVBH8FUKnT0HX1yRS.jpg,"[{'id': 5630, 'logo_path': '/s0mHCw53fp6EAapR7...","[{'iso_3166_1': 'RU', 'name': 'Russia'}]",2021-04-15,5370393,136,"[{'english_name': 'Russian', 'iso_639_1': 'ru'...",Released,,Chernobyl: Abyss,False,6.3,235,2021,269.489,269.489,5370393.0,9100000.0,1 to 15m,"[{'id': 210326, 'name': 'chernobyl'}]","[{'adult': False, 'gender': 2, 'id': 562730, '...","[{'adult': False, 'gender': 1, 'id': 30247, 'k...",37.14%,
9,False,/r2GAjd4rNOHJh6i6Y0FntmYuPQW.jpg,"{'id': 645, 'name': 'James Bond Collection', '...",242000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 28, '...",https://www.007.com/no-time-to-die/,370172,tt2382320,en,No Time to Die,Bond has left active service and is enjoying a...,1999.424,/iUgygt3fscRoKWCV1d0C7FbM9TP.jpg,"[{'id': 7576, 'logo_path': '/oYcUi1byZ312Z3xiz...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...",2021-09-29,758000000,163,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,The mission that changes everything begins…,No Time to Die,False,7.6,2521,2021,269.489,269.489,758000000.0,242000000.0,151m to 380m,"[{'id': 470, 'name': 'spy'}, {'id': 156095, 'n...","[{'adult': False, 'gender': 2, 'id': 8784, 'kn...","[{'adult': False, 'gender': 2, 'id': 947, 'kno...",22.06%,PG-13


## Call Functions to Extract Details:

In [254]:
# Director
movie_df['director'] = movie_df['crew'].apply(get_director)

# Director Gender
movie_df['director_gender'] = movie_df['crew'].apply(get_director_gender)

# Producers
movie_df['producers'] = movie_df['crew'].apply(get_producers)

# Screenplay writers
movie_df['writers'] = movie_df['crew'].apply(get_writers)

# Cast
movie_df['cast'] = movie_df['cast'].apply(get_cast_list)

# Production Company Country of Origin
movie_df['production_company_origin'] = movie_df['production_companies'].apply(get_production_company_country)

# Spoken Languages in Movie
movie_df['spoken_languages']  = movie_df['spoken_languages'].apply(get_languages)

# Create foreign language column for if the original language was not English
movie_df['foreign_language'] = movie_df['original_language'].apply(original_language_binary)

# Get List: Keywords, genres, & production countries
features = ['keywords', 'genres', 'production_companies']
            
for feature in features:
    movie_df[feature] = movie_df[feature].apply(get_list)

In [255]:
# Show the results
movie_df[['title', 'cast', 'director', 'director_gender', 'percent_fm', 'producers', 'writers', 'keywords', 'genres', 'production_companies', 'production_company_origin', 'spoken_languages', 'original_language', 'foreign_language']].head()

Unnamed: 0,title,cast,director,director_gender,percent_fm,producers,writers,keywords,genres,production_companies,production_company_origin,spoken_languages,original_language,foreign_language
0,Venom: Let There Be Carnage,"[Tom Hardy, Woody Harrelson, Michelle Williams...",[Andy Serkis],[2],22.54%,"[Tom Hardy, K.C. Hodenfield, Avi Arad, Jonatha...","[Tom Hardy, Todd McFarlane, Kelly Marcel, Kell...","[hero, anti hero, villain, sequel, superhero, ...","[Science Fiction, Action, Adventure]","[Marvel Entertainment, Columbia Pictures, Pasc...","[US, US, US, US, US]","[Spanish, English]",en,0
1,Red Notice,"[Dwayne Johnson, Ryan Reynolds, Gal Gadot, Rit...",[Rawson Marshall Thurber],[2],14.58%,"[Beau Flynn, Dwayne Johnson, Rawson Marshall T...",[Rawson Marshall Thurber],"[fbi, interpol, heist, art thief]","[Action, Comedy, Crime, Thriller]","[Flynn Picture Company, Seven Bucks Production...","[US, US, US, ]","[English, Italian, Russian]",en,0
2,Clifford the Big Red Dog,"[Darby Camp, Jack Whitehall, Izaac Wang, John ...",[Walt Becker],[2],39.29%,"[Brad Fischer, Deborah Forte, Deborah Forte, B...","[David Ronn, Jay Scherick, Norman Bridwell, An...","[based on novel or book, giant dog]","[Animation, Comedy, Family]","[Paramount, Entertainment One, Kerner Entertai...","[US, CA, , US, US, US]",[English],en,0
3,The Last Duel,"[Matt Damon, Adam Driver, Jodie Comer, Harriet...",[Ridley Scott],[2],26.32%,"[Ridley Scott, Ben Affleck, Matt Damon, Jennif...","[Ben Affleck, Matt Damon, Nicole Holofcener, E...","[based on novel or book, normandy, france, bas...","[Action, Drama, History]","[20th Century Studios, Scott Free Productions,...","[US, GB, US, US]","[English, French, Latin]",en,0
4,Shang-Chi and the Legend of the Ten Rings,"[Simu Liu, Tony Leung Chiu-wai, Awkwafina, Men...",[Destin Daniel Cretton],[2],33.33%,"[Kevin Feige, Charles Newirth, Louis D'Esposit...","[Dave Callaham, Dave Callaham, Destin Daniel C...","[martial arts, superhero, based on comic, mixe...","[Action, Adventure, Fantasy]",[Marvel Studios],[US],"[English, Mandarin]",en,0


In [256]:
# Drop NA from selected columns
movie_df.dropna(inplace=True, how="any", subset=['genres', 'production_companies', 'keywords', 'cast'])
movie_df.dropna(inplace=True, how="any", subset=['director', 'producers', 'writers'])

In [257]:
len(movie_df)

1688

In [258]:
# Clean Data: Convert all features to lowercase and remove spaces
features = ['cast', 'director', 'producers', 'writers', 'keywords', 'genres', 'production_companies']

for feature in features:
    movie_df[f"{feature}_cleaned"] = movie_df[feature].apply(clean_data)

In [259]:
# Call the clean overview function
movie_df['overview_cleaned'] = movie_df['overview'].apply(clean_overview)

In [260]:
# Create soup columns
movie_df['soup'] = movie_df.apply(create_soup, axis = 1)
movie_df['soup_overview'] = movie_df.apply(create_soup_overview, axis = 1)

# Export to save
# movie_df.to_csv("./static/data/movies_cleaned_soup.csv", index=False)

In [261]:
movie_df[['soup']].head(5)

Unnamed: 0,soup
0,hero antihero villain sequel superhero basedon...
1,fbi interpol heist artthief dwaynejohnson ryan...
2,basedonnovelorbook giantdog darbycamp jackwhit...
3,"basedonnovelorbook normandy,france basedonatru..."
4,martialarts superhero basedoncomic mixedmartia...


In [262]:
movie_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1688 entries, 0 to 1973
Data columns (total 52 columns):
 #   Column                        Non-Null Count  Dtype   
---  ------                        --------------  -----   
 0   adult                         1688 non-null   bool    
 1   backdrop_path                 1678 non-null   object  
 2   belongs_to_collection         707 non-null    object  
 3   original_budget               1688 non-null   int64   
 4   genres                        1688 non-null   object  
 5   homepage                      1688 non-null   object  
 6   id                            1688 non-null   int64   
 7   imdb_id                       1684 non-null   object  
 8   original_language             1688 non-null   object  
 9   original_title                1688 non-null   object  
 10  overview                      1688 non-null   object  
 11  popularity                    1688 non-null   float64 
 12  poster_path                   1688 non-null   ob

## Configuration for Posters:

In [263]:
configuration_url = f"https://api.themoviedb.org/3/configuration?api_key={api_key}"
config_response = requests.get(configuration_url).json()
# config_response

In [264]:
# Get images structure
images_url = config_response['images']
# Get Base URL
secure_base_url = images_url['secure_base_url']
# secure_base_url

In [265]:
# Get the size of poster: 2: w185px, 4: w500px, 5: 780px
# images_url['poster_sizes']
poster_size = images_url['poster_sizes'][5]
# poster_size

In [266]:
# Copy poster paths to new df
poster_df = movie_df[['poster_path']].copy()

In [267]:
# Create column with full image path for posters
poster_df['poster_url'] = secure_base_url + poster_size + poster_df['poster_path']

In [268]:
# Export to separate csv
# poster_df.to_csv("./static/data/poster_path.csv", index=False)
poster_df.head()

Unnamed: 0,poster_path,poster_url
0,/rjkmN1dniUHVYAtwuV3Tji7FsDO.jpg,https://image.tmdb.org/t/p/w780/rjkmN1dniUHVYA...
1,/q2d56YvJ3s9W73lqrk16Nzcc7xD.jpg,https://image.tmdb.org/t/p/w780/q2d56YvJ3s9W73...
2,/ygPTrycbMSFDc5zUpy4K5ZZtQSC.jpg,https://image.tmdb.org/t/p/w780/ygPTrycbMSFDc5...
3,/zjrJE0fpzPvX8saJXj8VNfcjBoU.jpg,https://image.tmdb.org/t/p/w780/zjrJE0fpzPvX8s...
4,/1BIoJGKbXjdFDAqUEiA2VHqkK1Z.jpg,https://image.tmdb.org/t/p/w780/1BIoJGKbXjdFDA...


In [269]:
# Create column with full image path for posters (in real df this time)
movie_df['poster_url'] = secure_base_url + poster_size + movie_df['poster_path']

# TESTING

## Export CSV:

In [270]:
# Create a lowercase column for easier search
movie_df["lowercase_title"] = movie_df['title'].apply(lambda x: x.lower())

In [271]:
# Save file - used for calling for information
movie_df.to_csv("../static/data/movie_db.csv", index=False)

In [272]:
movie_df.head()

Unnamed: 0,adult,backdrop_path,belongs_to_collection,original_budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,original_revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,year,cpi_2021,cpi_old,adjusted_revenue,adjusted_budget,budget_bins,keywords,cast,crew,percent_fm,certification,director,director_gender,producers,writers,production_company_origin,foreign_language,cast_cleaned,director_cleaned,producers_cleaned,writers_cleaned,keywords_cleaned,genres_cleaned,production_companies_cleaned,overview_cleaned,soup,soup_overview,poster_url,lowercase_title
0,False,/70nxSw3mFBsGmtkvcs91PbjerwD.jpg,"{'id': 558216, 'name': 'Venom Collection', 'po...",110000000,"[Science Fiction, Action, Adventure]",https://www.venom.movie,580489,tt7097896,en,Venom: Let There Be Carnage,After finding a host body in investigative rep...,8633.976,/rjkmN1dniUHVYAtwuV3Tji7FsDO.jpg,"[Marvel Entertainment, Columbia Pictures, Pasc...","[{'iso_3166_1': 'US', 'name': 'United States o...",2021-09-30,482000000,97,"[Spanish, English]",Released,,Venom: Let There Be Carnage,False,7.2,4421,2021,269.489,269.489,482000000.0,110000000.0,51m to 150m,"[hero, anti hero, villain, sequel, superhero, ...","[Tom Hardy, Woody Harrelson, Michelle Williams...","[{'adult': False, 'gender': 2, 'id': 149, 'kno...",22.54%,PG-13,[Andy Serkis],[2],"[Tom Hardy, K.C. Hodenfield, Avi Arad, Jonatha...","[Tom Hardy, Todd McFarlane, Kelly Marcel, Kell...","[US, US, US, US, US]",0,"[tomhardy, woodyharrelson, michellewilliams, n...",[andyserkis],"[tomhardy, k.c.hodenfield, aviarad, jonathanca...","[tomhardy, toddmcfarlane, kellymarcel, kellyma...","[hero, antihero, villain, sequel, superhero, b...","[sciencefiction, action, adventure]","[marvelentertainment, columbiapictures, pascal...",after finding a host body in investigative rep...,hero antihero villain sequel superhero basedon...,hero antihero villain sequel superhero basedon...,https://image.tmdb.org/t/p/w780/rjkmN1dniUHVYA...,venom: let there be carnage
1,False,/5uVhMGsps81CN0S4U9NF0Z4tytG.jpg,,200000000,"[Action, Comedy, Crime, Thriller]",https://www.netflix.com/us/title/81161626,512195,tt7991608,en,Red Notice,An Interpol-issued Red Notice is a global aler...,4825.133,/q2d56YvJ3s9W73lqrk16Nzcc7xD.jpg,"[Flynn Picture Company, Seven Bucks Production...","[{'iso_3166_1': 'US', 'name': 'United States o...",2021-11-04,0,117,"[English, Italian, Russian]",Released,Pro and cons.,Red Notice,False,6.8,1982,2021,269.489,269.489,0.0,200000000.0,151m to 380m,"[fbi, interpol, heist, art thief]","[Dwayne Johnson, Ryan Reynolds, Gal Gadot, Rit...","[{'adult': False, 'gender': 2, 'id': 9543, 'kn...",14.58%,PG-13,[Rawson Marshall Thurber],[2],"[Beau Flynn, Dwayne Johnson, Rawson Marshall T...",[Rawson Marshall Thurber],"[US, US, US, ]",0,"[dwaynejohnson, ryanreynolds, galgadot, rituar...",[rawsonmarshallthurber],"[beauflynn, dwaynejohnson, rawsonmarshallthurb...",[rawsonmarshallthurber],"[fbi, interpol, heist, artthief]","[action, comedy, crime, thriller]","[flynnpicturecompany, sevenbucksproductions, l...",an interpol-issued red notice is a global aler...,fbi interpol heist artthief dwaynejohnson ryan...,fbi interpol heist artthief fbi interpol heist...,https://image.tmdb.org/t/p/w780/q2d56YvJ3s9W73...,red notice
2,False,/zBkHCpLmHjW2uVURs5uZkaVmgKR.jpg,,0,"[Animation, Comedy, Family]",,585245,tt2397461,en,Clifford the Big Red Dog,As Emily struggles to fit in at home and at sc...,3280.16,/ygPTrycbMSFDc5zUpy4K5ZZtQSC.jpg,"[Paramount, Entertainment One, Kerner Entertai...","[{'iso_3166_1': 'CA', 'name': 'Canada'}, {'iso...",2021-11-10,51000000,97,[English],Released,Adventure has never been bigger.,Clifford the Big Red Dog,False,7.6,540,2021,269.489,269.489,51000000.0,0.0,,"[based on novel or book, giant dog]","[Darby Camp, Jack Whitehall, Izaac Wang, John ...","[{'adult': False, 'gender': 1, 'id': 8220, 'kn...",39.29%,PG,[Walt Becker],[2],"[Brad Fischer, Deborah Forte, Deborah Forte, B...","[David Ronn, Jay Scherick, Norman Bridwell, An...","[US, CA, , US, US, US]",0,"[darbycamp, jackwhitehall, izaacwang, johnclee...",[waltbecker],"[bradfischer, deborahforte, deborahforte, bria...","[davidronn, jayscherick, normanbridwell, annie...","[basedonnovelorbook, giantdog]","[animation, comedy, family]","[paramount, entertainmentone, kernerentertainm...",as emily struggles to fit in at home and at sc...,basedonnovelorbook giantdog darbycamp jackwhit...,basedonnovelorbook giantdog basedonnovelorbook...,https://image.tmdb.org/t/p/w780/ygPTrycbMSFDc5...,clifford the big red dog
3,False,/mFbS5TwN95BcSEfiztdchLgTQ0v.jpg,,100000000,"[Action, Drama, History]",https://www.20thcenturystudios.com/movies/the-...,617653,tt4244994,en,The Last Duel,King Charles VI declares that Knight Jean de C...,3256.364,/zjrJE0fpzPvX8saJXj8VNfcjBoU.jpg,"[20th Century Studios, Scott Free Productions,...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...",2021-10-13,27000000,152,"[English, French, Latin]",Released,The true story of a woman who defied a nation ...,The Last Duel,False,7.6,885,2021,269.489,269.489,27000000.0,100000000.0,51m to 150m,"[based on novel or book, normandy, france, bas...","[Matt Damon, Adam Driver, Jodie Comer, Harriet...","[{'adult': False, 'gender': 2, 'id': 120, 'kno...",26.32%,,[Ridley Scott],[2],"[Ridley Scott, Ben Affleck, Matt Damon, Jennif...","[Ben Affleck, Matt Damon, Nicole Holofcener, E...","[US, GB, US, US]",0,"[mattdamon, adamdriver, jodiecomer, harrietwal...",[ridleyscott],"[ridleyscott, benaffleck, mattdamon, jenniferf...","[benaffleck, mattdamon, nicoleholofcener, eric...","[basedonnovelorbook, normandy,france, basedona...","[action, drama, history]","[20thcenturystudios, scottfreeproductions, pea...",king charles vi declares that knight jean de c...,"basedonnovelorbook normandy,france basedonatru...","basedonnovelorbook normandy,france basedonatru...",https://image.tmdb.org/t/p/w780/zjrJE0fpzPvX8s...,the last duel
4,False,/cinER0ESG0eJ49kXlExM0MEWGxW.jpg,"{'id': 912503, 'name': 'Shang-Chi Collection',...",150000000,"[Action, Adventure, Fantasy]",https://www.marvel.com/movies/shang-chi-and-th...,566525,tt9376612,en,Shang-Chi and the Legend of the Ten Rings,Shang-Chi must confront the past he thought he...,3189.442,/1BIoJGKbXjdFDAqUEiA2VHqkK1Z.jpg,[Marvel Studios],"[{'iso_3166_1': 'US', 'name': 'United States o...",2021-09-01,430238384,132,"[English, Mandarin]",Released,You can't outrun your destiny.,Shang-Chi and the Legend of the Ten Rings,False,7.8,4211,2021,269.489,269.489,430238384.0,150000000.0,51m to 150m,"[martial arts, superhero, based on comic, mixe...","[Simu Liu, Tony Leung Chiu-wai, Awkwafina, Men...","[{'adult': False, 'gender': 1, 'id': 7232, 'kn...",33.33%,PG-13,[Destin Daniel Cretton],[2],"[Kevin Feige, Charles Newirth, Louis D'Esposit...","[Dave Callaham, Dave Callaham, Destin Daniel C...",[US],0,"[simuliu, tonyleungchiu-wai, awkwafina, meng'e...",[destindanielcretton],"[kevinfeige, charlesnewirth, louisd'esposito, ...","[davecallaham, davecallaham, destindanielcrett...","[martialarts, superhero, basedoncomic, mixedma...","[action, adventure, fantasy]",[marvelstudios],shang-chi must confront the past he thought he...,martialarts superhero basedoncomic mixedmartia...,martialarts superhero basedoncomic mixedmartia...,https://image.tmdb.org/t/p/w780/1BIoJGKbXjdFDA...,shang-chi and the legend of the ten rings


# Machine Learning Recommender:

In [273]:
from sklearn.feature_extraction.text import CountVectorizer

# Use Count Vectorizer to create counts for each word
count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(movie_df['soup_overview'])

In [274]:
count_matrix.shape

(1688, 29906)

In [275]:
from sklearn.metrics.pairwise import cosine_similarity

# Calculate cosine similarity
cosine_sim = cosine_similarity(count_matrix, count_matrix)

In [276]:
# Reset the index
# movie_df = movie_df.reset_index()
# Create series with index & titles of movies
indices = pd.Series(movie_df.index, index=movie_df['title']).drop_duplicates()

In [297]:
movie_df.head()

Unnamed: 0,adult,backdrop_path,belongs_to_collection,original_budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,original_revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,year,cpi_2021,cpi_old,adjusted_revenue,adjusted_budget,budget_bins,keywords,cast,crew,percent_fm,certification,director,director_gender,producers,writers,production_company_origin,foreign_language,cast_cleaned,director_cleaned,producers_cleaned,writers_cleaned,keywords_cleaned,genres_cleaned,production_companies_cleaned,overview_cleaned,soup,soup_overview,poster_url,lowercase_title
0,False,/70nxSw3mFBsGmtkvcs91PbjerwD.jpg,"{'id': 558216, 'name': 'Venom Collection', 'po...",110000000,"[Science Fiction, Action, Adventure]",https://www.venom.movie,580489,tt7097896,en,Venom: Let There Be Carnage,After finding a host body in investigative rep...,8633.976,/rjkmN1dniUHVYAtwuV3Tji7FsDO.jpg,"[Marvel Entertainment, Columbia Pictures, Pasc...","[{'iso_3166_1': 'US', 'name': 'United States o...",2021-09-30,482000000,97,"[Spanish, English]",Released,,Venom: Let There Be Carnage,False,7.2,4421,2021,269.489,269.489,482000000.0,110000000.0,51m to 150m,"[hero, anti hero, villain, sequel, superhero, ...","[{""adult"": false, ""gender"": 2, ""id"": 2524, ""kn...","[{""adult"": false, ""gender"": 2, ""id"": 149, ""kno...",22.54%,PG-13,[Andy Serkis],[2],"[Tom Hardy, K.C. Hodenfield, Avi Arad, Jonatha...","[Tom Hardy, Todd McFarlane, Kelly Marcel, Kell...","[US, US, US, US, US]",0,"[tomhardy, woodyharrelson, michellewilliams, n...",[andyserkis],"[tomhardy, k.c.hodenfield, aviarad, jonathanca...","[tomhardy, toddmcfarlane, kellymarcel, kellyma...","[hero, antihero, villain, sequel, superhero, b...","[sciencefiction, action, adventure]","[marvelentertainment, columbiapictures, pascal...",after finding a host body in investigative rep...,hero antihero villain sequel superhero basedon...,hero antihero villain sequel superhero basedon...,https://image.tmdb.org/t/p/w780/rjkmN1dniUHVYA...,venom: let there be carnage
1,False,/5uVhMGsps81CN0S4U9NF0Z4tytG.jpg,,200000000,"[Action, Comedy, Crime, Thriller]",https://www.netflix.com/us/title/81161626,512195,tt7991608,en,Red Notice,An Interpol-issued Red Notice is a global aler...,4825.133,/q2d56YvJ3s9W73lqrk16Nzcc7xD.jpg,"[Flynn Picture Company, Seven Bucks Production...","[{'iso_3166_1': 'US', 'name': 'United States o...",2021-11-04,0,117,"[English, Italian, Russian]",Released,Pro and cons.,Red Notice,False,6.8,1982,2021,269.489,269.489,0.0,200000000.0,151m to 380m,"[fbi, interpol, heist, art thief]","[{""adult"": false, ""gender"": 2, ""id"": 18918, ""k...","[{""adult"": false, ""gender"": 2, ""id"": 9543, ""kn...",14.58%,PG-13,[Rawson Marshall Thurber],[2],"[Beau Flynn, Dwayne Johnson, Rawson Marshall T...",[Rawson Marshall Thurber],"[US, US, US, ]",0,"[dwaynejohnson, ryanreynolds, galgadot, rituar...",[rawsonmarshallthurber],"[beauflynn, dwaynejohnson, rawsonmarshallthurb...",[rawsonmarshallthurber],"[fbi, interpol, heist, artthief]","[action, comedy, crime, thriller]","[flynnpicturecompany, sevenbucksproductions, l...",an interpol-issued red notice is a global aler...,fbi interpol heist artthief dwaynejohnson ryan...,fbi interpol heist artthief fbi interpol heist...,https://image.tmdb.org/t/p/w780/q2d56YvJ3s9W73...,red notice
2,False,/zBkHCpLmHjW2uVURs5uZkaVmgKR.jpg,,0,"[Animation, Comedy, Family]",,585245,tt2397461,en,Clifford the Big Red Dog,As Emily struggles to fit in at home and at sc...,3280.16,/ygPTrycbMSFDc5zUpy4K5ZZtQSC.jpg,"[Paramount, Entertainment One, Kerner Entertai...","[{'iso_3166_1': 'CA', 'name': 'Canada'}, {'iso...",2021-11-10,51000000,97,[English],Released,Adventure has never been bigger.,Clifford the Big Red Dog,False,7.6,540,2021,269.489,269.489,51000000.0,0.0,,"[based on novel or book, giant dog]","[{""adult"": false, ""gender"": 1, ""id"": 1696017, ...","[{""adult"": false, ""gender"": 1, ""id"": 8220, ""kn...",39.29%,PG,[Walt Becker],[2],"[Brad Fischer, Deborah Forte, Deborah Forte, B...","[David Ronn, Jay Scherick, Norman Bridwell, An...","[US, CA, , US, US, US]",0,"[darbycamp, jackwhitehall, izaacwang, johnclee...",[waltbecker],"[bradfischer, deborahforte, deborahforte, bria...","[davidronn, jayscherick, normanbridwell, annie...","[basedonnovelorbook, giantdog]","[animation, comedy, family]","[paramount, entertainmentone, kernerentertainm...",as emily struggles to fit in at home and at sc...,basedonnovelorbook giantdog darbycamp jackwhit...,basedonnovelorbook giantdog basedonnovelorbook...,https://image.tmdb.org/t/p/w780/ygPTrycbMSFDc5...,clifford the big red dog
3,False,/mFbS5TwN95BcSEfiztdchLgTQ0v.jpg,,100000000,"[Action, Drama, History]",https://www.20thcenturystudios.com/movies/the-...,617653,tt4244994,en,The Last Duel,King Charles VI declares that Knight Jean de C...,3256.364,/zjrJE0fpzPvX8saJXj8VNfcjBoU.jpg,"[20th Century Studios, Scott Free Productions,...","[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...",2021-10-13,27000000,152,"[English, French, Latin]",Released,The true story of a woman who defied a nation ...,The Last Duel,False,7.6,885,2021,269.489,269.489,27000000.0,100000000.0,51m to 150m,"[based on novel or book, normandy, france, bas...","[{""adult"": false, ""gender"": 2, ""id"": 1892, ""kn...","[{""adult"": false, ""gender"": 2, ""id"": 120, ""kno...",26.32%,,[Ridley Scott],[2],"[Ridley Scott, Ben Affleck, Matt Damon, Jennif...","[Ben Affleck, Matt Damon, Nicole Holofcener, E...","[US, GB, US, US]",0,"[mattdamon, adamdriver, jodiecomer, harrietwal...",[ridleyscott],"[ridleyscott, benaffleck, mattdamon, jenniferf...","[benaffleck, mattdamon, nicoleholofcener, eric...","[basedonnovelorbook, normandy,france, basedona...","[action, drama, history]","[20thcenturystudios, scottfreeproductions, pea...",king charles vi declares that knight jean de c...,"basedonnovelorbook normandy,france basedonatru...","basedonnovelorbook normandy,france basedonatru...",https://image.tmdb.org/t/p/w780/zjrJE0fpzPvX8s...,the last duel
4,False,/cinER0ESG0eJ49kXlExM0MEWGxW.jpg,"{'id': 912503, 'name': 'Shang-Chi Collection',...",150000000,"[Action, Adventure, Fantasy]",https://www.marvel.com/movies/shang-chi-and-th...,566525,tt9376612,en,Shang-Chi and the Legend of the Ten Rings,Shang-Chi must confront the past he thought he...,3189.442,/1BIoJGKbXjdFDAqUEiA2VHqkK1Z.jpg,[Marvel Studios],"[{'iso_3166_1': 'US', 'name': 'United States o...",2021-09-01,430238384,132,"[English, Mandarin]",Released,You can't outrun your destiny.,Shang-Chi and the Legend of the Ten Rings,False,7.8,4211,2021,269.489,269.489,430238384.0,150000000.0,51m to 150m,"[martial arts, superhero, based on comic, mixe...","[{""adult"": false, ""gender"": 2, ""id"": 1489211, ...","[{""adult"": false, ""gender"": 1, ""id"": 7232, ""kn...",33.33%,PG-13,[Destin Daniel Cretton],[2],"[Kevin Feige, Charles Newirth, Louis D'Esposit...","[Dave Callaham, Dave Callaham, Destin Daniel C...",[US],0,"[simuliu, tonyleungchiu-wai, awkwafina, meng'e...",[destindanielcretton],"[kevinfeige, charlesnewirth, louisd'esposito, ...","[davecallaham, davecallaham, destindanielcrett...","[martialarts, superhero, basedoncomic, mixedma...","[action, adventure, fantasy]",[marvelstudios],shang-chi must confront the past he thought he...,martialarts superhero basedoncomic mixedmartia...,martialarts superhero basedoncomic mixedmartia...,https://image.tmdb.org/t/p/w780/1BIoJGKbXjdFDA...,shang-chi and the legend of the ten rings


## Function to Recommend Titles:

In [278]:
def get_similarity_scores(title, cosine_sim):
    # Get the index of the movie that matches the title
    idx = indices[title]
    
    # Get the pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the scores of the most similar movies
    sim_scores = sim_scores[1:11]
#     print(sim_scores)
    
    # Convert list to DataFrame
    sim_scores_df = pd.DataFrame(sim_scores, columns = ["index", "similarity_score"])
    
    # Return top 10 most similar scores
    return sim_scores_df

In [279]:
def get_recommendations(original_df, score_df):
    original_df = original_df.merge(score_df, on="index")
    original_df = original_df.sort_values("similarity_score", ascending=False)
    
    return original_df

In [280]:
# Test out function
movie_title = "Get Out"
similarity_scores_df = get_similarity_scores(movie_title, cosine_sim)

In [None]:
# Call Get Recommendations Function
recommendations = get_recommendations(movie_df, similarity_scores_df)

In [None]:
# Print out dataframe
# recommendations[['title', 'similarity_score']].to_csv("./soup_test_3.csv")
recommendations[['title', 'similarity_score', 'id']].head(10)

### Convert columns to list for SQL Database connection

In [359]:
movie_df_columns = list(movie_df.columns)
movie_df_columns

['adult',
 'backdrop_path',
 'belongs_to_collection',
 'original_budget',
 'genres',
 'homepage',
 'id',
 'imdb_id',
 'original_language',
 'original_title',
 'overview',
 'popularity',
 'poster_path',
 'production_companies',
 'production_countries',
 'release_date',
 'original_revenue',
 'runtime',
 'spoken_languages',
 'status',
 'tagline',
 'title',
 'video',
 'vote_average',
 'vote_count',
 'year',
 'cpi_2021',
 'cpi_old',
 'adjusted_revenue',
 'adjusted_budget',
 'budget_bins',
 'keywords',
 'cast',
 'crew',
 'percent_fm',
 'certification',
 'director',
 'director_gender',
 'producers',
 'writers',
 'production_company_origin',
 'foreign_language',
 'cast_cleaned',
 'director_cleaned',
 'producers_cleaned',
 'writers_cleaned',
 'keywords_cleaned',
 'genres_cleaned',
 'production_companies_cleaned',
 'overview_cleaned',
 'soup',
 'soup_overview',
 'poster_url',
 'lowercase_title']

In [360]:
for column_name in list(movie_df.columns):
    movie_df[column_name] = movie_df[column_name].apply(json.dumps)

In [362]:
movie_dict = movie_df.applymap(lambda x: isinstance(x, (dict, list))).all()
print(movie_dict)

adult                           False
backdrop_path                   False
belongs_to_collection           False
original_budget                 False
genres                          False
homepage                        False
id                              False
imdb_id                         False
original_language               False
original_title                  False
overview                        False
popularity                      False
poster_path                     False
production_companies            False
production_countries            False
release_date                    False
original_revenue                False
runtime                         False
spoken_languages                False
status                          False
tagline                         False
title                           False
video                           False
vote_average                    False
vote_count                      False
year                            False
cpi_2021    

## Database:

In [2]:
from sqlalchemy import create_engine, inspect

In [None]:
# configure the connection string
rds_connection_string = f'postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}'
        
# connect to the database
engine = create_engine(rds_connection_string)
conn = engine.connect()

In [365]:
engine.execute("DROP TABLE IF EXISTS movies")

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7f89f7b19eb8>

In [366]:
# Append data to table
movie_df.to_sql(name='movies', con=conn, if_exists='append', index=False)

In [None]:
# Use inspector to find table names
Inspector = inspect(engine)
Inspector.get_table_names()

In [368]:
# Check movies table
pd.read_sql_query('select * from movies', con=conn).head()

Unnamed: 0,adult,backdrop_path,belongs_to_collection,original_budget,genres,homepage,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,original_revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,year,cpi_2021,cpi_old,adjusted_revenue,adjusted_budget,budget_bins,keywords,cast,crew,percent_fm,certification,director,director_gender,producers,writers,production_company_origin,foreign_language,cast_cleaned,director_cleaned,producers_cleaned,writers_cleaned,keywords_cleaned,genres_cleaned,production_companies_cleaned,overview_cleaned,soup,soup_overview,poster_url,lowercase_title
0,False,"""/70nxSw3mFBsGmtkvcs91PbjerwD.jpg""","{""id"": 558216, ""name"": ""Venom Collection"", ""po...",110000000,"""\""[\\\""Science Fiction\\\"", \\\""Action\\\"", \...","""https://www.venom.movie""",580489,"""tt7097896""","""en""","""Venom: Let There Be Carnage""","""After finding a host body in investigative re...",8633.976,"""/rjkmN1dniUHVYAtwuV3Tji7FsDO.jpg""","""\""[\\\""Marvel Entertainment\\\"", \\\""Columbia...","""\""[{\\\""iso_3166_1\\\"": \\\""US\\\"", \\\""name\...","""2021-09-30""",482000000,97,"[""Spanish"", ""English""]","""Released""","""""","""Venom: Let There Be Carnage""",False,7.2,4421,2021,269.489,269.489,482000000.0,110000000.0,"""51m to 150m""","""[\""hero\"", \""anti hero\"", \""villain\"", \""sequ...","""\""\\\""[{\\\\\\\""adult\\\\\\\"": false, \\\\\\\...","""\""\\\""[{\\\\\\\""adult\\\\\\\"": false, \\\\\\\...","""22.54%""","""PG-13""","""[\""Andy Serkis\""]""","""[2]""","""[\""Tom Hardy\"", \""K.C. Hodenfield\"", \""Avi Ar...","""[\""Tom Hardy\"", \""Todd McFarlane\"", \""Kelly M...","""[\""US\"", \""US\"", \""US\"", \""US\"", \""US\""]""",0,"""[\""tomhardy\"", \""woodyharrelson\"", \""michelle...","""[\""andyserkis\""]""","""[\""tomhardy\"", \""k.c.hodenfield\"", \""aviarad\...","""[\""tomhardy\"", \""toddmcfarlane\"", \""kellymarc...","""[\""hero\"", \""antihero\"", \""villain\"", \""seque...","""[\""sciencefiction\"", \""action\"", \""adventure\""]""","""[\""marvelentertainment\"", \""columbiapictures\...","""after finding a host body in investigative re...","""hero antihero villain sequel superhero basedo...","""hero antihero villain sequel superhero basedo...","""https://image.tmdb.org/t/p/w780/rjkmN1dniUHVY...","""venom: let there be carnage"""
1,False,"""/5uVhMGsps81CN0S4U9NF0Z4tytG.jpg""",,200000000,"""\""[\\\""Action\\\"", \\\""Comedy\\\"", \\\""Crime\...","""https://www.netflix.com/us/title/81161626""",512195,"""tt7991608""","""en""","""Red Notice""","""An Interpol-issued Red Notice is a global ale...",4825.133,"""/q2d56YvJ3s9W73lqrk16Nzcc7xD.jpg""","""\""[\\\""Flynn Picture Company\\\"", \\\""Seven B...","""\""[{\\\""iso_3166_1\\\"": \\\""US\\\"", \\\""name\...","""2021-11-04""",0,117,"[""English"", ""Italian"", ""Russian""]","""Released""","""Pro and cons.""","""Red Notice""",False,6.8,1982,2021,269.489,269.489,0.0,200000000.0,"""151m to 380m""","""[\""fbi\"", \""interpol\"", \""heist\"", \""art thie...","""\""\\\""[{\\\\\\\""adult\\\\\\\"": false, \\\\\\\...","""\""\\\""[{\\\\\\\""adult\\\\\\\"": false, \\\\\\\...","""14.58%""","""PG-13""","""[\""Rawson Marshall Thurber\""]""","""[2]""","""[\""Beau Flynn\"", \""Dwayne Johnson\"", \""Rawson...","""[\""Rawson Marshall Thurber\""]""","""[\""US\"", \""US\"", \""US\"", \""\""]""",0,"""[\""dwaynejohnson\"", \""ryanreynolds\"", \""galga...","""[\""rawsonmarshallthurber\""]""","""[\""beauflynn\"", \""dwaynejohnson\"", \""rawsonma...","""[\""rawsonmarshallthurber\""]""","""[\""fbi\"", \""interpol\"", \""heist\"", \""artthief...","""[\""action\"", \""comedy\"", \""crime\"", \""thrille...","""[\""flynnpicturecompany\"", \""sevenbucksproduct...","""an interpol-issued red notice is a global ale...","""fbi interpol heist artthief dwaynejohnson rya...","""fbi interpol heist artthief fbi interpol heis...","""https://image.tmdb.org/t/p/w780/q2d56YvJ3s9W7...","""red notice"""
2,False,"""/zBkHCpLmHjW2uVURs5uZkaVmgKR.jpg""",,0,"""\""[\\\""Animation\\\"", \\\""Comedy\\\"", \\\""Fam...","""""",585245,"""tt2397461""","""en""","""Clifford the Big Red Dog""","""As Emily struggles to fit in at home and at s...",3280.16,"""/ygPTrycbMSFDc5zUpy4K5ZZtQSC.jpg""","""\""[\\\""Paramount\\\"", \\\""Entertainment One\\...","""\""[{\\\""iso_3166_1\\\"": \\\""CA\\\"", \\\""name\...","""2021-11-10""",51000000,97,"[""English""]","""Released""","""Adventure has never been bigger.""","""Clifford the Big Red Dog""",False,7.6,540,2021,269.489,269.489,51000000.0,0.0,,"""[\""based on novel or book\"", \""giant dog\""]""","""\""\\\""[{\\\\\\\""adult\\\\\\\"": false, \\\\\\\...","""\""\\\""[{\\\\\\\""adult\\\\\\\"": false, \\\\\\\...","""39.29%""","""PG""","""[\""Walt Becker\""]""","""[2]""","""[\""Brad Fischer\"", \""Deborah Forte\"", \""Debor...","""[\""David Ronn\"", \""Jay Scherick\"", \""Norman B...","""[\""US\"", \""CA\"", \""\"", \""US\"", \""US\"", \""US\""]""",0,"""[\""darbycamp\"", \""jackwhitehall\"", \""izaacwan...","""[\""waltbecker\""]""","""[\""bradfischer\"", \""deborahforte\"", \""deborah...","""[\""davidronn\"", \""jayscherick\"", \""normanbrid...","""[\""basedonnovelorbook\"", \""giantdog\""]""","""[\""animation\"", \""comedy\"", \""family\""]""","""[\""paramount\"", \""entertainmentone\"", \""kerne...","""as emily struggles to fit in at home and at s...","""basedonnovelorbook giantdog darbycamp jackwhi...","""basedonnovelorbook giantdog basedonnovelorboo...","""https://image.tmdb.org/t/p/w780/ygPTrycbMSFDc...","""clifford the big red dog"""
3,False,"""/mFbS5TwN95BcSEfiztdchLgTQ0v.jpg""",,100000000,"""\""[\\\""Action\\\"", \\\""Drama\\\"", \\\""History...","""https://www.20thcenturystudios.com/movies/the...",617653,"""tt4244994""","""en""","""The Last Duel""","""King Charles VI declares that Knight Jean de ...",3256.364,"""/zjrJE0fpzPvX8saJXj8VNfcjBoU.jpg""","""\""[\\\""20th Century Studios\\\"", \\\""Scott Fr...","""\""[{\\\""iso_3166_1\\\"": \\\""GB\\\"", \\\""name\...","""2021-10-13""",27000000,152,"[""English"", ""French"", ""Latin""]","""Released""","""The true story of a woman who defied a nation...","""The Last Duel""",False,7.6,885,2021,269.489,269.489,27000000.0,100000000.0,"""51m to 150m""","""[\""based on novel or book\"", \""normandy, fran...","""\""\\\""[{\\\\\\\""adult\\\\\\\"": false, \\\\\\\...","""\""\\\""[{\\\\\\\""adult\\\\\\\"": false, \\\\\\\...","""26.32%""","""""","""[\""Ridley Scott\""]""","""[2]""","""[\""Ridley Scott\"", \""Ben Affleck\"", \""Matt Da...","""[\""Ben Affleck\"", \""Matt Damon\"", \""Nicole Ho...","""[\""US\"", \""GB\"", \""US\"", \""US\""]""",0,"""[\""mattdamon\"", \""adamdriver\"", \""jodiecomer\...","""[\""ridleyscott\""]""","""[\""ridleyscott\"", \""benaffleck\"", \""mattdamon...","""[\""benaffleck\"", \""mattdamon\"", \""nicoleholof...","""[\""basedonnovelorbook\"", \""normandy,france\"",...","""[\""action\"", \""drama\"", \""history\""]""","""[\""20thcenturystudios\"", \""scottfreeproductio...","""king charles vi declares that knight jean de ...","""basedonnovelorbook normandy,france basedonatr...","""basedonnovelorbook normandy,france basedonatr...","""https://image.tmdb.org/t/p/w780/zjrJE0fpzPvX8...","""the last duel"""
4,False,"""/cinER0ESG0eJ49kXlExM0MEWGxW.jpg""","{""id"": 912503, ""name"": ""Shang-Chi Collection"",...",150000000,"""\""[\\\""Action\\\"", \\\""Adventure\\\"", \\\""Fan...","""https://www.marvel.com/movies/shang-chi-and-t...",566525,"""tt9376612""","""en""","""Shang-Chi and the Legend of the Ten Rings""","""Shang-Chi must confront the past he thought h...",3189.442,"""/1BIoJGKbXjdFDAqUEiA2VHqkK1Z.jpg""","""\""[\\\""Marvel Studios\\\""]\""""","""\""[{\\\""iso_3166_1\\\"": \\\""US\\\"", \\\""name\...","""2021-09-01""",430238384,132,"[""English"", ""Mandarin""]","""Released""","""You can't outrun your destiny.""","""Shang-Chi and the Legend of the Ten Rings""",False,7.8,4211,2021,269.489,269.489,430238384.0,150000000.0,"""51m to 150m""","""[\""martial arts\"", \""superhero\"", \""based on ...","""\""\\\""[{\\\\\\\""adult\\\\\\\"": false, \\\\\\\...","""\""\\\""[{\\\\\\\""adult\\\\\\\"": false, \\\\\\\...","""33.33%""","""PG-13""","""[\""Destin Daniel Cretton\""]""","""[2]""","""[\""Kevin Feige\"", \""Charles Newirth\"", \""Loui...","""[\""Dave Callaham\"", \""Dave Callaham\"", \""Dest...","""[\""US\""]""",0,"""[\""simuliu\"", \""tonyleungchiu-wai\"", \""awkwaf...","""[\""destindanielcretton\""]""","""[\""kevinfeige\"", \""charlesnewirth\"", \""louisd...","""[\""davecallaham\"", \""davecallaham\"", \""destin...","""[\""martialarts\"", \""superhero\"", \""basedoncom...","""[\""action\"", \""adventure\"", \""fantasy\""]""","""[\""marvelstudios\""]""","""shang-chi must confront the past he thought h...","""martialarts superhero basedoncomic mixedmarti...","""martialarts superhero basedoncomic mixedmarti...","""https://image.tmdb.org/t/p/w780/1BIoJGKbXjdFD...","""shang-chi and the legend of the ten rings"""
