# Bechdeltest Exploration

In [1]:
# standard library imports
import time

# third party imports
import pandas as pd
import numpy as np
import requests
import requests_cache
import altair as alt
from tqdm import tqdm

In [2]:
# enable correct rendering
alt.renderers.enable("default");

# uses intermediate json files to speed things up
alt.data_transformers.enable("json");

# Set up a cache that expires after an hour
requests_cache.install_cache("movie_cache", expire_after=3600)

## Request IMDB and Benchdel Test Data

In [3]:
def make_request(url, params=None):
    response = requests.get(url, params=params)
    if response.status_code != 200:
        print(f"Request to {url} failed with status code {response.status_code}")
        return None
    return response.json()


def update_movie_data(movies, rating_url):
    for movie in tqdm(movies, desc="Processing movies", unit="movie"):
        imdb_id = movie["imdbid"]
        movie_data = make_request(f"{rating_url}", params={"imdbid": imdb_id})
        if movie_data is not None:
            movie.update(movie_data)
        time.sleep(0.1)  # sleep for 100 milliseconds

# get all movie IMBD ids
movies_url = "http://bechdeltest.com/api/v1/getAllMovies"
movies = make_request(movies_url)

# use the IMBD ids to get the benchdel ratings, save results
if movies is not None:
    rating_url = "http://bechdeltest.com/api/v1/getMovieByImdbId"
    update_movie_data(movies, rating_url)
    movies_df = pd.DataFrame(movies)

Processing movies:  20%|█▉        | 2040/10289 [37:54<2:42:12,  1.18s/movie]

In [None]:
# save the results
movies_df.to_csv("movies.csv", index=False)

- https://bechdeltest.com/api/v1/doc
- https://www.kaggle.com/datasets/treelunar/bechdel-test-movies-as-of-feb-28-2023
- https://www.kaggle.com/datasets/alisonyao/movie-bechdel-test-scores/data