### Import Required Libraries and Set Up Environment Variables

In [2]:
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json

In [17]:
# Set environment variables from the .env in the local environment
load_dotenv("example.env")

nyt_api_key = os.getenv("NYT_API_KEY")
tmdb_api_key = os.getenv("TMDB_API_KEY")

### Access the New York Times API

In [18]:
# Set the base URL
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?"

# Filter for movie reviews with "love" in the headline
# section_name should be "Movies"
# type_of_material should be "Review"
filter_query = 'section_name:"Movies" AND type_of_material:"Review" AND headline:"love"'

# Use a sort filter, sort by newest
sort = "newest"

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = "headline,web_url,snippet,source,keywords,pub_date,byline,word_count"

# Search for reviews published between a begin and end date
begin_date = "20130101"
end_date = "20230531"

# Build URL
query_url = (
    f"{url}api-key={nyt_api_key}&begin_date={begin_date}&end_date={end_date}"
    + f'&fq={filter_query}&sort={sort}&fl={field_list}')


In [19]:
# Create an empty list to store the reviews
reviews = []

# loop through pages 0-19
for page in range(20):
    
    # create query with a page number
    # API results show 10 articles at a time
    query_url = (
    f"{url}api-key={nyt_api_key}&begin_date={begin_date}&end_date={end_date}"
    + f'&fq={filter_query}&sort={sort}&fl={field_list}&page={page}')
    
    # Make a "GET" request and retrieve the JSON
    response = requests.get(query_url)
    response_json = response.json()

    # Add a twelve second interval between queries to stay within API query limits
    time.sleep(12)
    
    # Try and save the reviews to the reviews_list
    try:
        for doc in response_json["response"]["docs"]:
            reviews.append(doc)
        # loop through the reviews["response"]["docs"] and append each review to the list
        
        # Print the page that was just retrieved
        print(f"{page} found")

        # Print the page number that had no results then break from the loop
    except KeyError:
        print(f"{page} no results found")
        break

print(f"Total reviews found: {len(reviews)}")

0 found
1 found
2 found
3 found
4 found
5 found
6 found
7 found
8 found
9 found
10 found
11 found
12 found
13 found
14 found
15 found
16 found
17 found
18 found
19 found
Total reviews found: 200


In [21]:
# Convert reviews_list to a Pandas DataFrame using json_normalize()
reviews_df = pd.json_normalize(reviews)

In [30]:
# Extract the title from the "headline.main" column and
# save it to a new column "title"
# Title is between unicode characters \u2018 and \u2019. 
# End string should include " Review" to avoid cutting title early
titles = []
for headline in reviews_df['headline.main']:
    start = headline.find('\u2018')
    end = headline.find(' Review\u2019')
    if start != -1 and end != -1:
        titles.append(headline[start+1:end+7])
    else:
        end = headline.find('\u2019')
        if start != -1 and end != -1:
            title = headline[start+1:end]
            if ' Review' not in title:
                title += ' Review'
            titles.append(title)
        else:
            titles.append(None)

reviews_df['title'] = titles

reviews_df.head()


Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,title
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,The Attachment Diaries Review
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,What Review
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,You Can Live Forever Review
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,A Tourist Review
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",,Other People Review


In [32]:
# Extract 'name' and 'value' from items in "keywords" column
def extract_keywords(keyword_list):
    extracted_keywords = ""
    for item in keyword_list:
        # Extract 'name' and 'value'
        keyword = f"{item['name']}: {item['value']};" 
        # Append the keyword item to the extracted_keywords list
        extracted_keywords += keyword
    return extracted_keywords

# Fix the "keywords" column by converting cells from a list to a string
reviews_df['keywords'] = reviews_df['keywords'].apply(extract_keywords)
reviews_df.head()

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,title
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,subject: Movies;creative_works: The Attachment...,2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,The Attachment Diaries Review
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"subject: Movies;persons: Kapur, Shekhar;person...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,What Review
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,subject: Movies;creative_works: You Can Live F...,2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,You Can Live Forever Review
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,subject: Movies;creative_works: A Tourist's Gu...,2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,A Tourist Review
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"subject: Movies;persons: Zlotowski, Rebecca;cr...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",,Other People Review


In [33]:
# Create a list from the "title" column using to_list()
# These titles will be used in the query for The Movie Database
title_list = reviews_df['title'].to_list()
title_list

['The Attachment Diaries Review',
 'What Review',
 'You Can Live Forever Review',
 'A Tourist Review',
 'Other People Review',
 'One True Loves Review',
 'The Lost Weekend: A Love Story Review',
 'A Thousand and One Review',
 'Your Place or Mine Review',
 'Love in the Time of Fentanyl Review',
 'Pamela, a Love Story Review',
 'In From the Side Review',
 'After Love Review',
 'Alcarràs Review',
 'Nelly & Nadine Review',
 'Lady Chatterley Review',
 'The Sound of Christmas Review',
 'The Inspection Review',
 'Bones and All Review',
 'My Policeman Review',
 'About Fate Review',
 'Waiting for Bojangles Review',
 'I Love My Dad Review',
 'A Love Song Review',
 'Alone Together Review',
 'Art of Love Review',
 'The Wheel Review',
 'Thor: Love and Thunder Review',
 'Both Sides of the Blade Review',
 'Fire of Love Review',
 'Love & Gelato Review',
 'Stay Prayed Up Review',
 'Benediction Review',
 'Dinner in America Review',
 'In a New York Minute Review',
 'Anaïs in Love Review',
 'I Love Americ

### Access The Movie Database API

In [34]:
# Prepare The Movie Database query
url = "https://api.themoviedb.org/3/search/movie?query="
tmdb_key_string = "&api_key=" + tmdb_api_key

In [38]:
# Create an empty list to store the results
results = []

# Create a request counter to sleep the requests after a multiple
# of 50 requests
request_counter = 0

# Loop through the titles
for title in titles:
    if request_counter > 0 and request_counter % 50 == 0:
        time.sleep(12)
    # Check if we need to sleep before making a request


    # Add 1 to the request counter
    request_counter += 1
    
    # Perform a "GET" request for The Movie Database
    search_url = f"{url}{title}&api_key={tmdb_api_key}"
    response = requests.get(search_url)

    # Include a try clause to search for the full movie details.
    # Use the except clause to print out a statement if a movie
    # is not found.
    try:
        search_results = response.json()
        if search_results['results']:
            movie_id = search_results['results'][0]['id']
        # Get movie id


        # Make a request for a the full movie details
            details_url = f"{details_url_base}{movie_id}?api_key={tmdb_api_key}"

        # Execute "GET" request with url
            details_response = requests.get(details_url)
            movie_details = details_response.json()
        
        # Extract the genre names into a list
            genres = [genre["name"] for genre in movie_details.get("genres", [])]

        # Extract the spoken_languages' English name into a list
            spoken_languages = [language["english_name"] for language in movie_details.get("spoken_languages", [])]

        # Extract the production_countries' name into a list
            production_countries = [country["name"] for country in movie_details.get("production_countries", [])]

        # Add the relevant data to a dictionary and
        # append it to the tmdb_movies_list list
            movie_data = {
                "title": movie_details.get("title"),
                "release_date": movie_details.get("release_date"),
                "genres": genres,
                "spoken_languages": spoken_languages,
                "production_countries": production_countries
            }
            results.append(movie_data)
        
        # Print out the title that was found
            print(f"Found movie: {movie_details.get('title')}")
        else:
            print(f"Movie found: {title}")
    except Exception as e:
        print(f"Error occurred for movie: {title}. Error: {str(e)}")


Movie not found: The Attachment Diaries Review
Movie not found: What Review
Movie not found: You Can Live Forever Review
Movie not found: A Tourist Review
Movie not found: Other People Review
Movie not found: One True Loves Review
Movie not found: The Lost Weekend: A Love Story Review
Movie not found: A Thousand and One Review
Movie not found: Your Place or Mine Review
Movie not found: Love in the Time of Fentanyl Review
Movie not found: Pamela, a Love Story Review
Movie not found: In From the Side Review
Movie not found: After Love Review
Movie not found: Alcarràs Review
Error occurred for movie: Nelly & Nadine Review. Error: name 'details_url_base' is not defined
Movie not found: Lady Chatterley Review
Movie not found: The Sound of Christmas Review
Movie not found: The Inspection Review
Movie not found: Bones and All Review
Movie not found: My Policeman Review
Movie not found: About Fate Review
Movie not found: Waiting for Bojangles Review
Movie not found: I Love My Dad Review
Movie 

KeyboardInterrupt: 

In [39]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
print(json.dumps(results[:5], indent=4))

[]


In [40]:
# Convert the results to a DataFrame
results_df = pd.DataFrame(results)
results_df

### Merge and Clean the Data for Export

In [46]:
# Merge the New York Times reviews and TMDB DataFrames on title
merged_df = pd.merge(results_df, reviews_df, on='title')
merged_df

KeyError: 'title'

In [43]:
# Remove list brackets and quotation marks on the columns containing lists
# Create a list of the columns that need fixing
columns_to_fix = ['genres', 'spoken_languages', 'production_countries']

# Create a list of characters to remove
chars_to_remove = ["[", "]", "'", '"']

# Loop through the list of columns to fix
for column in columns_to_fix:
    # Convert the column to type 'str'
    merged_df[column] = merged_df[column].astype(str)

    # Loop through characters to remove
    for char in chars_to_remove:
        merged_df[column] = merged_df[column].str.replace(char, "")


# Display the fixed DataFrame
merged_df.head()

NameError: name 'merged_df' is not defined

In [16]:
# Drop "byline.person" column
merged_df = merged_df.drop(columns=['byline.person'])

In [47]:
# Delete duplicate rows and reset index
merged_df = merged_df.drop_duplicates()

merged_df = merged_df.reset_index(drop=True)

NameError: name 'merged_df' is not defined

In [18]:
# Export data to CSV without the index
export = merged_df.to_csv('merged_data.csv', index=False)