In [12]:
# 1. Import dependencies and config
# 2. Define functions to:
    # 2.1 Get most popular movies 
    # 2.2 Get movie_details: titles and IDs
    # 2.3 Get credits
# 2. Clean data for soup and create soup
# 3. Create details dataframe 
# 4. Create credits dataframe with json dump (movie_id, cast, crew)
# 5. For each ID, divide cast'gender'2/total_cast and crew'gender'2/total_crew then create FM_crew and FM_cast percentage columns

In [13]:
# 1. Import dependencies and config
# Import dependencies
import pandas as pd
import json
import requests
from pprint import pprint
import numpy as np
from datetime import datetime

# Import config
from config import api_key, db_user, db_password, db_host, db_port, db_name

In [15]:
# Define function to get the most popular movies

# Returns most popular movies
def get_most_popular_movies(api_key):
    
    movies = []
    
    # Loop through pages to get results for movies
    for x in range(1, 101):
        page_number = x

        # Endpoint for finding most popular movies
        discover_movies = "https://api.themoviedb.org/3/discover/movie"
        most_popular_url = f"{discover_movies}?api_key={api_key}&page={page_number}&sort_by=popularity.desc"
        
        # Most popular movies
        tmdb_response = requests.get(most_popular_url).json()
        results = tmdb_response["results"]

#         for y in range(len(results)):
#             movies.append(results[y]) 
        for result in results:
            movies.append(result)
            
    return movies

In [16]:
# Returns movie details as a list
def get_movie_details(api_key, movie_ids):  
    
    movie_details = []
    
    for movie_id in movie_ids:
        # Endpoint & response
        movie_url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}"
        movie_response = requests.get(movie_url).json()

        # Append results to lists
        movie_details.append(movie_response)
        
    return movie_details

In [17]:
# Returns credits as a list
def get_credits(api_key, movie_ids):
    
    credit_details = []
    
    for movie_id in movie_ids:
        # Generate movie credits
        movie_credits = f"/movie/{movie_id}/credits"
        credits_url = f"https://api.themoviedb.org/3/{movie_credits}?api_key={api_key}"

        # Get the json response for the credits
        credits_response = requests.get(credits_url).json()
        
        credit_details.append(credits_response)
        
    return credit_details

In [18]:
# Returns single title for specified index number
def get_title(results, idx):
    movie_title = results[idx]["title"]
    
    return movie_title

In [19]:
# Returns ids list
def get_ids(results):
    movie_ids = []
    for x in results:
        movie_ids.append(x["id"])
        
    return movie_ids

In [20]:
# Returns titles list
def get_titles(results):
    movie_titles = []
    for x in results:
        movie_titles.append(x["title"])
        
    return movie_titles

In [21]:
# Function to get the percent of gender out of total cast for each movie ID
def get_fm_cast(results):
    fm_cast = [] 
    for x in results:
        if x['gender'] == 1:
            fm_cast.append(x["gender"])
        
    return fm_cast

In [85]:
def get_m_count(results):
    m_count = 0
    for x in results:
        if x['gender'] == 2:
            m_count += 1
        
    return m_count

In [86]:
def get_fm_count(results):
    fm_count = 0
    for x in results:
        if x['gender'] == 1:
            fm_count += 1
        
    return fm_count

In [87]:
def get_total_count(results):
    total_count = 0
    for x in results:
        if x['gender'] == 1 or x['gender'] == 2:
            total_count += 1
        
    return total_count

In [101]:
def percentage_format(percentage):
    return "{:,.2f}%".format(percentage)

In [109]:
def fm_percentage(results):
    fm_count = 0
    total_count = 0
    for x in results:
        if x['gender'] == 1:
            fm_count += 1
        if x['gender'] == 1 or x['gender'] == 2:
            total_count += 1
    
    if total_count == 0:        
        return np.nan
            
    return percentage_format(100 * fm_count/total_count)

In [39]:
def get_total_cast(results):
    total_cast = []
    for x in results:
        if x['gender'] == 1:
            total_cast.append(x['gender'])
        elif x['gender'] == 2:
            total_cast.append(x['gender'])

    return total_cast

In [23]:
# Get most popular movies as JSON result
most_popular_movies = get_most_popular_movies(api_key)

In [24]:
# Get movie IDs as JSON result
movie_ids = get_ids(most_popular_movies)

In [25]:
# Get titles as JSON result
titles = get_titles(most_popular_movies)
# print(titles)

In [26]:
# Get credits as JSON result
credits = get_credits(api_key, movie_ids)

In [27]:
# Get details as JSON result
details = get_movie_details(api_key, movie_ids)

In [28]:
# Movie details dataframe
# Convert details to json
json_details_string = json.dumps(details)
# Convert json to dataframe
movie_details_df = pd.read_json(json_details_string)

# Export to save
# movie_details_df.to_csv("./static/data/movie_details.csv", index=False)
movie_details_df.head()

Unnamed: 0,adult,backdrop_path,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
0,False,/lNyLSOKMMeUPr1RsL4KcRuIXwHt.jpg,"{'id': 558216, 'name': 'Venom Collection', 'po...",110000000,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",https://www.venom.movie,580489,tt7097896,en,Venom: Let There Be Carnage,...,2021-09-30,482000000,97,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,,Venom: Let There Be Carnage,False,7.2,4123
1,False,/5uVhMGsps81CN0S4U9NF0Z4tytG.jpg,,200000000,"[{'id': 28, 'name': 'Action'}, {'id': 35, 'nam...",https://www.netflix.com/us/title/81161626,512195,tt7991608,en,Red Notice,...,2021-11-04,0,116,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Pro and cons.,Red Notice,False,6.8,1805
2,False,/zBkHCpLmHjW2uVURs5uZkaVmgKR.jpg,,0,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",,585245,tt2397461,en,Clifford the Big Red Dog,...,2021-11-10,51000000,97,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,Adventure has never been bigger.,Clifford the Big Red Dog,False,7.6,438
3,False,/cinER0ESG0eJ49kXlExM0MEWGxW.jpg,"{'id': 912503, 'name': 'Shang-Chi Collection',...",150000000,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...",https://www.marvel.com/movies/shang-chi-and-th...,566525,tt9376612,en,Shang-Chi and the Legend of the Ten Rings,...,2021-09-01,430238384,132,"[{'english_name': 'English', 'iso_639_1': 'en'...",Released,You can't outrun your destiny.,Shang-Chi and the Legend of the Ten Rings,False,7.8,4032
4,False,/xGrTm3J0FTafmuQ85vF7ZCw94x6.jpg,,9100000,"[{'id': 18, 'name': 'Drama'}, {'id': 36, 'name...",,589761,tt10648714,ru,Чернобыль,...,2021-04-15,5370393,136,"[{'english_name': 'Russian', 'iso_639_1': 'ru'...",Released,,Chernobyl: Abyss,False,6.2,228


In [113]:
# Convert credits to json
json_credits_string = json.dumps(credits)
# Convert json to dataframe
movie_credits_df = pd.read_json(json_credits_string)

# Export to save
# movie_credits_df.to_csv("./static/data/movie_credits.csv", index=False)
movie_credits_df.head()

Unnamed: 0,id,cast,crew
0,580489,"[{'adult': False, 'gender': 2, 'id': 2524, 'kn...","[{'adult': False, 'gender': 2, 'id': 149, 'kno..."
1,512195,"[{'adult': False, 'gender': 2, 'id': 18918, 'k...","[{'adult': False, 'gender': 2, 'id': 9543, 'kn..."
2,585245,"[{'adult': False, 'gender': 1, 'id': 1696017, ...","[{'adult': False, 'gender': 1, 'id': 8220, 'kn..."
3,566525,"[{'adult': False, 'gender': 2, 'id': 1489211, ...","[{'adult': False, 'gender': 1, 'id': 7232, 'kn..."
4,589761,"[{'adult': False, 'gender': 2, 'id': 562730, '...","[{'adult': False, 'gender': 1, 'id': 30247, 'k..."


In [117]:
movie_credits_df["cast_crew"] = movie_credits_df["cast"] + movie_credits_df["crew"]
movie_credits_df

Unnamed: 0,id,cast,crew,cast_crew,title
0,580489,"[{'adult': False, 'gender': 2, 'id': 2524, 'kn...","[{'adult': False, 'gender': 2, 'id': 149, 'kno...","[{'adult': False, 'gender': 2, 'id': 2524, 'kn...",Venom: Let There Be Carnage
1,512195,"[{'adult': False, 'gender': 2, 'id': 18918, 'k...","[{'adult': False, 'gender': 2, 'id': 9543, 'kn...","[{'adult': False, 'gender': 2, 'id': 18918, 'k...",Red Notice
2,585245,"[{'adult': False, 'gender': 1, 'id': 1696017, ...","[{'adult': False, 'gender': 1, 'id': 8220, 'kn...","[{'adult': False, 'gender': 1, 'id': 1696017, ...",Clifford the Big Red Dog
3,566525,"[{'adult': False, 'gender': 2, 'id': 1489211, ...","[{'adult': False, 'gender': 1, 'id': 7232, 'kn...","[{'adult': False, 'gender': 2, 'id': 1489211, ...",Shang-Chi and the Legend of the Ten Rings
4,589761,"[{'adult': False, 'gender': 2, 'id': 562730, '...","[{'adult': False, 'gender': 1, 'id': 30247, 'k...","[{'adult': False, 'gender': 2, 'id': 562730, '...",Чернобыль
...,...,...,...,...,...
1995,228967,"[{'adult': False, 'gender': 2, 'id': 17051, 'k...","[{'adult': False, 'gender': 1, 'id': 6410, 'kn...","[{'adult': False, 'gender': 2, 'id': 17051, 'k...",The Interview
1996,329996,"[{'adult': False, 'gender': 2, 'id': 72466, 'k...","[{'adult': False, 'gender': 2, 'id': 1226, 'kn...","[{'adult': False, 'gender': 2, 'id': 72466, 'k...",Dumbo
1997,350312,"[{'adult': False, 'gender': 2, 'id': 237045, '...","[{'adult': False, 'gender': 2, 'id': 92508, 'k...","[{'adult': False, 'gender': 2, 'id': 237045, '...",బాహుబలి 2: ది కన్ క్లూజన్
1998,9480,"[{'adult': False, 'gender': 2, 'id': 880, 'kno...","[{'adult': False, 'gender': 2, 'id': 376, 'kno...","[{'adult': False, 'gender': 2, 'id': 880, 'kno...",Daredevil


In [118]:
# Functions to extract gender details from credits df

# title
movie_credits_df['title'] = movie_details_df['original_title']

# cast_crew_fm_percentage
movie_credits_df['cast_crew_fm_percentage'] = movie_credits_df['cast_crew'].apply(fm_percentage)

# # fm_cast_percentage
# movie_credits_df['fm_cast_percentage'] = movie_credits_df['cast'].apply(fm_percentage)

# # fm_crew_percentage
# movie_credits_df['fm_crew_percentage'] = movie_credits_df['crew'].apply(fm_percentage)

In [119]:
movie_credits_df

Unnamed: 0,id,cast,crew,cast_crew,title,cast_crew_fm_percentage
0,580489,"[{'adult': False, 'gender': 2, 'id': 2524, 'kn...","[{'adult': False, 'gender': 2, 'id': 149, 'kno...","[{'adult': False, 'gender': 2, 'id': 2524, 'kn...",Venom: Let There Be Carnage,22.54%
1,512195,"[{'adult': False, 'gender': 2, 'id': 18918, 'k...","[{'adult': False, 'gender': 2, 'id': 9543, 'kn...","[{'adult': False, 'gender': 2, 'id': 18918, 'k...",Red Notice,14.58%
2,585245,"[{'adult': False, 'gender': 1, 'id': 1696017, ...","[{'adult': False, 'gender': 1, 'id': 8220, 'kn...","[{'adult': False, 'gender': 1, 'id': 1696017, ...",Clifford the Big Red Dog,39.62%
3,566525,"[{'adult': False, 'gender': 2, 'id': 1489211, ...","[{'adult': False, 'gender': 1, 'id': 7232, 'kn...","[{'adult': False, 'gender': 2, 'id': 1489211, ...",Shang-Chi and the Legend of the Ten Rings,33.33%
4,589761,"[{'adult': False, 'gender': 2, 'id': 562730, '...","[{'adult': False, 'gender': 1, 'id': 30247, 'k...","[{'adult': False, 'gender': 2, 'id': 562730, '...",Чернобыль,37.14%
...,...,...,...,...,...,...
1995,228967,"[{'adult': False, 'gender': 2, 'id': 17051, 'k...","[{'adult': False, 'gender': 1, 'id': 6410, 'kn...","[{'adult': False, 'gender': 2, 'id': 17051, 'k...",The Interview,15.38%
1996,329996,"[{'adult': False, 'gender': 2, 'id': 72466, 'k...","[{'adult': False, 'gender': 2, 'id': 1226, 'kn...","[{'adult': False, 'gender': 2, 'id': 72466, 'k...",Dumbo,27.14%
1997,350312,"[{'adult': False, 'gender': 2, 'id': 237045, '...","[{'adult': False, 'gender': 2, 'id': 92508, 'k...","[{'adult': False, 'gender': 2, 'id': 237045, '...",బాహుబలి 2: ది కన్ క్లూజన్,19.44%
1998,9480,"[{'adult': False, 'gender': 2, 'id': 880, 'kno...","[{'adult': False, 'gender': 2, 'id': 376, 'kno...","[{'adult': False, 'gender': 2, 'id': 880, 'kno...",Daredevil,20.13%


0          [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
1                                  [1, 1]
2                [1, 1, 1, 1, 1, 1, 1, 1]
3       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
4             [1, 1, 1, 1, 1, 1, 1, 1, 1]
                      ...                
1995                [1, 1, 1, 1, 1, 1, 1]
1996    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
1997                   [1, 1, 1, 1, 1, 1]
1998                [1, 1, 1, 1, 1, 1, 1]
1999             [1, 1, 1, 1, 1, 1, 1, 1]
Name: fm_cast, Length: 2000, dtype: object
0       [2, 2, 1, 1, 2, 2, 1, 1, 2, 1, 2, 1, 2, 2, 2, ...
1       [2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...
2       [1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 1, 1, ...
3       [2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 2, 2, ...
4       [2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, ...
                              ...                        
1995    [2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, ...
1996    [2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, ...
1997           [2, 2, 2, 1, 2, 