### Import Required Libraries and Set Up Environment Variables

In [74]:
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json
import re

In [75]:
#nyt_api_key = "mLhpBahhMngtBaKbNsTBG2wV1jsAqcHN"
#TMDB_API_KEY = "10541b9c262c1414aed0265c397891bf"

In [76]:
# Set environment variables from the .env in the local environment
load_dotenv()

nyt_api_key = os.getenv("NYT_API_KEY")
tmdb_api_key = os.getenv("TMDB_API_KEY")


### Access the New York Times API

In [77]:
# Set the base URL
url = "https://api.nytimes.com/svc/search/v2/articlesearch.json"

# Filter for movie reviews with "love" in the headline
# section_name should be "Movies"
# type_of_material should be "Review"
filter_query = 'section_name:("Movies") AND type_of_material:("Review") AND headline:("love")'

# Use a sort filter, sort by newest
sort = "newest"

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = "headline,web_url,snippet,source,keywords,pub_date,byline,word_count"

# Search for reviews published between a begin and end date
begin_date = "20130101"
end_date = "20230531"

# API key (make sure to securely store your API key in practice)
nyt_api_key = "mLhpBahhMngtBaKbNsTBG2wV1jsAqcHN"

# Build URL 
query_url = (
    f"{url}?"
    f"api-key={nyt_api_key}&"
    f"fq={filter_query}&"
    f"sort={sort}&"
    f"fl={field_list}&"
    f"begin_date={begin_date}&"
    f"end_date={end_date}"
)

print("Constructed URL:", query_url)

Constructed URL: https://api.nytimes.com/svc/search/v2/articlesearch.json?api-key=mLhpBahhMngtBaKbNsTBG2wV1jsAqcHN&fq=section_name:("Movies") AND type_of_material:("Review") AND headline:("love")&sort=newest&fl=headline,web_url,snippet,source,keywords,pub_date,byline,word_count&begin_date=20130101&end_date=20230531


In [78]:
# Create an empty list to store the reviews
reviews_list = []
# loop through pages 0-19
for page in range(0, 19):
    # create query with a page number
    # API results show 10 articles at a time
    pages_query = f"{query_url}&page={page}"
    
    # Make a "GET" request and retrieve the JSON
    reviews = requests.get(pages_query).json()
    
    # Add a twelve second interval between queries to stay within API query limits
    time.sleep(12)
    
    # Try and save the reviews to the reviews_list
    
        # loop through the reviews["response"]["docs"] and append each review to the list
    try:
        for review in reviews["response"]["docs"]:
            reviews_list.append(review)
        # Print the page that was just retrieved
        print(f"Checked page {page}")
    except:
        # Print the page number that had no results then break from the loop
        print(f"No results. {page}.")
        break


Checked page 0
Checked page 1
Checked page 2
Checked page 3
Checked page 4
Checked page 5
Checked page 6
Checked page 7
Checked page 8
Checked page 9
Checked page 10
Checked page 11
Checked page 12
Checked page 13
Checked page 14
Checked page 15
Checked page 16
Checked page 17
Checked page 18


In [79]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
print(json.dumps(reviews_list[:5], indent=4))


[
    {
        "web_url": "https://www.nytimes.com/2023/05/25/movies/the-attachment-diaries-review.html",
        "snippet": "A gynecologist and her patient form a horrifyingly twisted connection in this batty, bloody Argentine melodrama.",
        "source": "The New York Times",
        "headline": {
            "main": "\u2018The Attachment Diaries\u2019 Review: Love, Sick",
            "kicker": null,
            "content_kicker": null,
            "print_headline": "The Attachment Diaries",
            "name": null,
            "seo": null,
            "sub": null
        },
        "keywords": [
            {
                "name": "subject",
                "value": "Movies",
                "rank": 1,
                "major": "N"
            },
            {
                "name": "creative_works",
                "value": "The Attachment Diaries (Movie)",
                "rank": 2,
                "major": "N"
            },
            {
                "name": "persons",
 

In [80]:
# Convert reviews_list to a Pandas DataFrame using json_normalize()
reviews_df = pd.json_normalize(reviews_list)
reviews_df


Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185,https://www.nytimes.com/2017/06/22/movies/my-j...,"In “My Journey Through French Cinema,” Mr. Tav...",The New York Times,"[{'name': 'subject', 'value': 'Documentary Fil...",2017-06-22T21:14:56+0000,773,"Review: Those Movies, Himself — Bertrand Taver...",,,Reflecting on a Lifelong Love,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",
186,https://www.nytimes.com/2017/06/22/movies/the-...,Kumail Nanjiani stars opposite Zoe Kazan in th...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2017-06-22T09:00:31+0000,1073,"Review: In ‘The Big Sick,’ Comedy Is Hard, Lov...",,,Love and (Almost) Death,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",
187,https://www.nytimes.com/2017/06/15/movies/lost...,The filmmakers Fiona Gordon and Dominique Abel...,The New York Times,"[{'name': 'creative_works', 'value': 'Lost in ...",2017-06-15T19:37:09+0000,254,Review: Finding Love (and Slapstick) While ‘Lo...,,,Lost in Paris,,,,By Ben Kenigsberg,"[{'firstname': 'Ben', 'middlename': None, 'las...",
188,https://www.nytimes.com/2017/06/01/movies/vinc...,This combination crime drama and romance finds...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2017-06-01T19:52:51+0000,287,"Review: In ‘Vincent N Roxxy,’ Love Is in the A...",,,Vincent N Roxxy,,,,By Ken Jaworowski,"[{'firstname': 'Ken', 'middlename': None, 'las...",


In [81]:
# Extract the title from the "headline.main" column and
# save it to a new column "title"
# Title is between unicode characters \u2018 and \u2019. 
# End string should include " Review" to avoid cutting title early
def extract_title(headline):
    match = re.search(r'\u2018(.+?)\u2019', headline)
    if match:
        return match.group(1) + " Review"
    else:
        return None  
reviews_df['title'] = reviews_df["headline.main"].apply(extract_title)

reviews_df

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,title
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,The Attachment Diaries Review
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,What Review
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,You Can Live Forever Review
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,A Tourist Review
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",,Other People Review
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185,https://www.nytimes.com/2017/06/22/movies/my-j...,"In “My Journey Through French Cinema,” Mr. Tav...",The New York Times,"[{'name': 'subject', 'value': 'Documentary Fil...",2017-06-22T21:14:56+0000,773,"Review: Those Movies, Himself — Bertrand Taver...",,,Reflecting on a Lifelong Love,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",,
186,https://www.nytimes.com/2017/06/22/movies/the-...,Kumail Nanjiani stars opposite Zoe Kazan in th...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2017-06-22T09:00:31+0000,1073,"Review: In ‘The Big Sick,’ Comedy Is Hard, Lov...",,,Love and (Almost) Death,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",,"The Big Sick, Review"
187,https://www.nytimes.com/2017/06/15/movies/lost...,The filmmakers Fiona Gordon and Dominique Abel...,The New York Times,"[{'name': 'creative_works', 'value': 'Lost in ...",2017-06-15T19:37:09+0000,254,Review: Finding Love (and Slapstick) While ‘Lo...,,,Lost in Paris,,,,By Ben Kenigsberg,"[{'firstname': 'Ben', 'middlename': None, 'las...",,Lost in Paris Review
188,https://www.nytimes.com/2017/06/01/movies/vinc...,This combination crime drama and romance finds...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2017-06-01T19:52:51+0000,287,"Review: In ‘Vincent N Roxxy,’ Love Is in the A...",,,Vincent N Roxxy,,,,By Ken Jaworowski,"[{'firstname': 'Ken', 'middlename': None, 'las...",,"Vincent N Roxxy, Review"


In [82]:
# Extract 'name' and 'value' from items in "keywords" column
def extract_keywords(keyword_list):
    extracted_keywords = ""
    for item in keyword_list:
        # Extract 'name' and 'value'
        keyword = f"{item['name']}: {item['value']};" 
        # Append the keyword item to the extracted_keywords list
        extracted_keywords += keyword
    return extracted_keywords

# Fix the "keywords" column by converting cells from a list to a string
reviews_df["keywords"] = reviews_df["keywords"].apply(extract_keywords)
reviews_df

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,title
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,subject: Movies;creative_works: The Attachment...,2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,The Attachment Diaries Review
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"subject: Movies;persons: Kapur, Shekhar;person...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,What Review
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,subject: Movies;creative_works: You Can Live F...,2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,You Can Live Forever Review
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,subject: Movies;creative_works: A Tourist's Gu...,2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,A Tourist Review
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"subject: Movies;persons: Zlotowski, Rebecca;cr...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",,Other People Review
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185,https://www.nytimes.com/2017/06/22/movies/my-j...,"In “My Journey Through French Cinema,” Mr. Tav...",The New York Times,subject: Documentary Films and Programs;person...,2017-06-22T21:14:56+0000,773,"Review: Those Movies, Himself — Bertrand Taver...",,,Reflecting on a Lifelong Love,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",,
186,https://www.nytimes.com/2017/06/22/movies/the-...,Kumail Nanjiani stars opposite Zoe Kazan in th...,The New York Times,subject: Movies;creative_works: The Big Sick (...,2017-06-22T09:00:31+0000,1073,"Review: In ‘The Big Sick,’ Comedy Is Hard, Lov...",,,Love and (Almost) Death,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",,"The Big Sick, Review"
187,https://www.nytimes.com/2017/06/15/movies/lost...,The filmmakers Fiona Gordon and Dominique Abel...,The New York Times,creative_works: Lost in Paris (Movie);subject:...,2017-06-15T19:37:09+0000,254,Review: Finding Love (and Slapstick) While ‘Lo...,,,Lost in Paris,,,,By Ben Kenigsberg,"[{'firstname': 'Ben', 'middlename': None, 'las...",,Lost in Paris Review
188,https://www.nytimes.com/2017/06/01/movies/vinc...,This combination crime drama and romance finds...,The New York Times,subject: Movies;creative_works: Vincent N Roxx...,2017-06-01T19:52:51+0000,287,"Review: In ‘Vincent N Roxxy,’ Love Is in the A...",,,Vincent N Roxxy,,,,By Ken Jaworowski,"[{'firstname': 'Ken', 'middlename': None, 'las...",,"Vincent N Roxxy, Review"


In [83]:
# Create a list from the "title" column using to_list()
# These titles will be used in the query for The Movie Database
titles = reviews_df["title"].to_list()
titles

['The Attachment Diaries Review',
 'What Review',
 'You Can Live Forever Review',
 'A Tourist Review',
 'Other People Review',
 'One True Loves Review',
 'The Lost Weekend: A Love Story Review',
 'A Thousand and One Review',
 'Your Place or Mine Review',
 'Love in the Time of Fentanyl Review',
 'Pamela, a Love Story Review',
 'In From the Side Review',
 'After Love Review',
 'Alcarràs Review',
 'Nelly & Nadine Review',
 'Lady Chatterley Review',
 'The Sound of Christmas Review',
 'The Inspection Review',
 'Bones and All Review',
 'My Policeman Review',
 'About Fate Review',
 'Waiting for Bojangles Review',
 'I Love My Dad Review',
 'A Love Song Review',
 'Alone Together Review',
 'Art of Love Review',
 'The Wheel Review',
 'Thor: Love and Thunder Review',
 'Both Sides of the Blade Review',
 'Fire of Love Review',
 'Love & Gelato Review',
 'Stay Prayed Up Review',
 'Benediction Review',
 'Dinner in America Review',
 'In a New York Minute Review',
 'Anaïs in Love Review',
 'I Love Americ

### Access The Movie Database API

In [84]:
# Define your TMDB API key
tmdb_api_key = "your_tmdb_api_key_here"  # Replace with your actual API key

# Prepare The Movie Database query
url = "https://api.themoviedb.org/3/search/movie?query="
tmdb_key_string = "&api_key=" + tmdb_api_key

# Example query for a specific movie title
movie_title = "Inception"  # Replace with the desired movie title
query_url = url + movie_title + tmdb_key_string

print("Constructed URL:", query_url)

Constructed URL: https://api.themoviedb.org/3/search/movie?query=Inception&api_key=your_tmdb_api_key_here


In [85]:
# Create an empty list to store the results
tmdb_movies_list = []

# Create a request counter to sleep the requests after a multiple of 50 requests
request_counter = 1

# Example list of titles to search for
titles = ["Inception", "The Matrix", "Avatar", "Nonexistent Movie"]  # Replace with your actual list of titles

# Base URL for TMDB search
search_url = "https://api.themoviedb.org/3/search/movie?query="

# Loop through the titles
for title in titles:
    if title is None:
        print("Skipping None title")
        continue
    
    # Check if we need to sleep before making a request
    if request_counter % 50 == 0:
        time.sleep(1)
        print(f"Sleeping at {request_counter} requests")

    # Add 1 to the request counter
    request_counter += 1
    
    # Perform a "GET" request for The Movie Database
    response = requests.get(search_url + title + "&api_key=" + tmdb_api_key)
    data = response.json()

    # Include a try clause to search for the full movie details.
    # Use the except clause to print out a statement if a movie is not found.
    try:
        # Get movie id
        movie_id = data["results"][0]["id"]

        # Make a request for the full movie details
        query_url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={tmdb_api_key}"

        # Execute "GET" request with url
        data = requests.get(query_url).json()
        
        # Extract the genre names into a list
        genres = [genre["name"] for genre in data['genres']]

        # Extract the spoken_languages' English name into a list
        spoken_languages = [language["english_name"] for language in data['spoken_languages']]

        # Extract the production_countries' name into a list
        production_countries = [country["name"] for country in data['production_countries']]

        # Add the relevant data to a dictionary and append it to the tmdb_movies_list list
        tmdb_movies_list.append({
            "title": data['title'],
            "original_title": data['original_title'],
            "budget": data['budget'],
            "genre": genres,
            "language": data['original_language'],
            "spoken_languages": spoken_languages,
            "homepage": data['homepage'],
            "overview": data['overview'],
            "popularity": data['popularity'],
            "runtime": data['runtime'],
            "revenue": data['revenue'],
            "release_date": data['release_date'],
            "vote_average": data['vote_average'],
            "vote_count": data['vote_count'],
            "production_countries": production_countries
        })
        
        # Print out the title that was found
        print(f"Found {title}")
    except (IndexError, KeyError):
        print(f"{title} not found.")

# Convert the list of dictionaries to a DataFrame
tmdb_movies_df = pd.DataFrame(tmdb_movies_list)

# Display the DataFrame
tmdb_movies_df = [tmdb_movies_df.dropna() for tmdb_movies_df in tmdb_movies_df if tmdb_movies_df is not None]


Inception not found.
The Matrix not found.
Avatar not found.
Nonexistent Movie not found.


In [90]:
# Rename the 'title' column to 'tytle'
#tmdb_movies_df = tmdb_movies_df.rename(columns={"title": "title"})

# Display the DataFrame to confirm the change
reviews_df

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,title
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,subject: Movies;creative_works: The Attachment...,2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,The Attachment Diaries Review
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"subject: Movies;persons: Kapur, Shekhar;person...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,What Review
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,subject: Movies;creative_works: You Can Live F...,2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,You Can Live Forever Review
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,subject: Movies;creative_works: A Tourist's Gu...,2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,A Tourist Review
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"subject: Movies;persons: Zlotowski, Rebecca;cr...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",,Other People Review
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185,https://www.nytimes.com/2017/06/22/movies/my-j...,"In “My Journey Through French Cinema,” Mr. Tav...",The New York Times,subject: Documentary Films and Programs;person...,2017-06-22T21:14:56+0000,773,"Review: Those Movies, Himself — Bertrand Taver...",,,Reflecting on a Lifelong Love,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",,
186,https://www.nytimes.com/2017/06/22/movies/the-...,Kumail Nanjiani stars opposite Zoe Kazan in th...,The New York Times,subject: Movies;creative_works: The Big Sick (...,2017-06-22T09:00:31+0000,1073,"Review: In ‘The Big Sick,’ Comedy Is Hard, Lov...",,,Love and (Almost) Death,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",,"The Big Sick, Review"
187,https://www.nytimes.com/2017/06/15/movies/lost...,The filmmakers Fiona Gordon and Dominique Abel...,The New York Times,creative_works: Lost in Paris (Movie);subject:...,2017-06-15T19:37:09+0000,254,Review: Finding Love (and Slapstick) While ‘Lo...,,,Lost in Paris,,,,By Ben Kenigsberg,"[{'firstname': 'Ben', 'middlename': None, 'las...",,Lost in Paris Review
188,https://www.nytimes.com/2017/06/01/movies/vinc...,This combination crime drama and romance finds...,The New York Times,subject: Movies;creative_works: Vincent N Roxx...,2017-06-01T19:52:51+0000,287,"Review: In ‘Vincent N Roxxy,’ Love Is in the A...",,,Vincent N Roxxy,,,,By Ken Jaworowski,"[{'firstname': 'Ken', 'middlename': None, 'las...",,"Vincent N Roxxy, Review"


In [91]:
reviews_df.columns

Index(['web_url', 'snippet', 'source', 'keywords', 'pub_date', 'word_count',
       'headline.main', 'headline.kicker', 'headline.content_kicker',
       'headline.print_headline', 'headline.name', 'headline.seo',
       'headline.sub', 'byline.original', 'byline.person',
       'byline.organization', 'title'],
      dtype='object')

In [92]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
print(json.dumps(tmdb_movies_list[:5], indent=4))

[]


In [93]:
# Convert the results to a DataFrame
tmdb_df = pd.DataFrame(tmdb_movies_list)
tmdb_df

### Merge and Clean the Data for Export

In [104]:
# Ensure the title columns are in the same format
tmdb_df['title'] = tmdb_df['title'].str.strip().str.lower()
reviews_df['title'] = reviews_df['title'].str.strip().str.lower()

# Merge the New York Times reviews and TMDB DataFrames on title
merged_df = pd.merge(tmdb_df, reviews_df, on="title")

# Display the merged DataFrame
print(merged_df)

# If you need to merge on different column names, specify the columns explicitly
# Uncomment the line below and provide the correct column names if they differ
#merged_df = pd.merge(tmdb_df, reviews_df, left_on="tmdb_title_column", right_on="nyt_title_column")

# Display the merged DataFrame again if using different column names
#print(merged_df)


KeyError: 'title'

In [98]:
# Remove list brackets and quotation marks on the columns containing lists
# Create a list of the columns that need fixing
columns_to_fix = ["genre", "spoken_languages", "production_countries"]

# Create a list of characters to remove
characters_to_remove = ["[", "]", "'"]
    
# Loop through the list of columns to fix
for column in columns_to_fix:
    # Convert the column to type 'str'
    merged_df[column] = merged_df[column].astype("str")

    # Loop through characters to remove
    for character in characters_to_remove:
        merged_df[column] = merged_df[column].str.replace(character, "", regex=False)

# Display the fixed DataFrame
merged_df.head()

NameError: name 'merged_df' is not defined

In [99]:
# Drop "byline.person" column
merged_df = merged_df.drop(columns="byline.person")

NameError: name 'merged_df' is not defined

In [None]:
# Delete duplicate rows and reset index
cleaned_df = merged_df.drop_duplicates().reset_index(drop=True)
cleaned_df.head()

In [None]:
# Export data to CSV without the index
cleaned_df.to_csv("output/collected_data.csv", index=False)