### Import Required Libraries and Set Up Environment Variables

In [5]:
# Import libraries and Set up environment
# Dependencies
import requests
import time
from dotenv import load_dotenv
import os
import pandas as pd
import json


In [6]:
# Set environment variables from the .env in the local environment
load_dotenv()

nyt_api_key = os.getenv("NYT_API_KEY")
tmdb_api_key = os.getenv("TMDB_API_KEY")

### Access the New York Times API

In [7]:
# Access the API key environment variable
# Set the base URL

url = "https://api.nytimes.com/svc/search/v2/articlesearch.json?"

# Filter for movie reviews with "love" in the headline
# section_name should be "Movies"
# type_of_material should be "Review"
filter_query = 'section_name:"Movies" AND type_of_material:"Review" AND headline:"love"'

# Use a sort filter, sort by newest
sort = "newest"

# Select the following fields to return:
# headline, web_url, snippet, source, keywords, pub_date, byline, word_count
field_list = "headline,web_url,snippet,source,keywords,pub_date,byline,word_count"

# Search for reviews published between a begin and end date
begin_date = "20130101"
end_date = "20230531"

# Build URL
query_url = (
    f"{url}api-key={nyt_api_key}&begin_date={begin_date}&end_date={end_date}"
    + f'&fq={filter_query}&sort={sort}&fl={field_list}'
)
print(query_url)

https://api.nytimes.com/svc/search/v2/articlesearch.json?api-key=sqVG6wgAGcAXT54v97qbnqBrN3jjd6SC&begin_date=20130101&end_date=20230531&fq=section_name:"Movies" AND type_of_material:"Review" AND headline:"love"&sort=newest&fl=headline,web_url,snippet,source,keywords,pub_date,byline,word_count


In [8]:
# Create an empty list to store the reviews
reviews_list = []

# loop through pages 0-19
for page in range(0,20):
    
    # create query with a page number
    # API results show 10 articles at a time
    page_query_url = f"{query_url}&page={page}" 

    # Make a "GET" request and retrieve the JSON
    response = requests.get(page_query_url).json()
    
    # add a twelve second interval between queries to avoid hitting the API too quickly
    time.sleep(12)

    # Try and save the reviews to the reviews_list
    try:
        reviews = response
        for review in reviews["response"]["docs"]:
            reviews_list.append(review)
        print(f"Checked page {page}") 

        # loop through the reviews["response"]["docs"] and append each review to the list

        
        # Print the page that was just retrieved
    except:     
        print(f"Page {page} not retrieved")
        break
        # Print the page number that had no results then break from the loop
    print(f"Total reviews retrieved: {len(reviews_list)}")
         


Checked page 0
Total reviews retrieved: 10
Checked page 1
Total reviews retrieved: 20
Checked page 2
Total reviews retrieved: 30
Checked page 3
Total reviews retrieved: 40
Checked page 4
Total reviews retrieved: 50
Checked page 5
Total reviews retrieved: 60
Checked page 6
Total reviews retrieved: 70
Checked page 7
Total reviews retrieved: 80
Checked page 8
Total reviews retrieved: 90
Checked page 9
Total reviews retrieved: 100
Checked page 10
Total reviews retrieved: 110
Checked page 11
Total reviews retrieved: 120
Checked page 12
Total reviews retrieved: 130
Checked page 13
Total reviews retrieved: 140
Checked page 14
Total reviews retrieved: 150
Checked page 15
Total reviews retrieved: 160
Checked page 16
Total reviews retrieved: 170
Checked page 17
Total reviews retrieved: 180
Checked page 18
Total reviews retrieved: 190
Checked page 19
Total reviews retrieved: 200


In [9]:
# Preview the first 5 results in JSON format
num_reviews_to_preview = min(5, len(reviews_list))
for review in reviews_list[:num_reviews_to_preview]:
    print(json.dumps(review, indent=4))
# Use json.dumps with argument indent=4 to format data

print(json.dumps(reviews_list, indent=4))


{
    "web_url": "https://www.nytimes.com/2023/05/25/movies/the-attachment-diaries-review.html",
    "snippet": "A gynecologist and her patient form a horrifyingly twisted connection in this batty, bloody Argentine melodrama.",
    "source": "The New York Times",
    "headline": {
        "main": "\u2018The Attachment Diaries\u2019 Review: Love, Sick",
        "kicker": null,
        "content_kicker": null,
        "print_headline": "The Attachment Diaries",
        "name": null,
        "seo": null,
        "sub": null
    },
    "keywords": [
        {
            "name": "subject",
            "value": "Movies",
            "rank": 1,
            "major": "N"
        },
        {
            "name": "creative_works",
            "value": "The Attachment Diaries (Movie)",
            "rank": 2,
            "major": "N"
        },
        {
            "name": "persons",
            "value": "Diment, Valentin Javier",
            "rank": 3,
            "major": "N"
        }
    ],
  

In [10]:
# Convert reviews_list to a Pandas DataFrame using json_normalize()
reviews_df = pd.json_normalize(reviews_list)
reviews_df.head()

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",


In [11]:
# Extract the title from the "headline.main" column and
# save it to a new column "title"
# Title is between unicode characters \u2018 and \u2019. 
# End string should include " Review" to avoid cutting title early
reviews_df["title"] = reviews_df["headline.main"].apply(lambda st: st[st.find("\u2018")+1:st.find("\u2019 Review")])

reviews_df.head()

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,title
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,The Attachment Diaries
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,What’s Love Got to Do With It?’ Probably a Lo
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,You Can Live Forever
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,A Tourist’s Guide to Love
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"[{'name': 'subject', 'value': 'Movies', 'rank'...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",,Other People’s Children


In [12]:
# Extract 'name' and 'value' from items in "keywords" column
def extract_keywords(keyword_list):
    extracted_keywords = ""
    for item in keyword_list:
        # Extract 'name' and 'value'
        keyword = f"{item['name']}: {item['value']};" 
        # Append the keyword item to the extracted_keywords list
        extracted_keywords += keyword
    return extracted_keywords

# Fix the "keywords" column by converting cells from a list to a string
reviews_df["keywords"] = reviews_df["keywords"].apply(extract_keywords)
reviews_df.head()

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,headline.name,headline.seo,headline.sub,byline.original,byline.person,byline.organization,title
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,subject: Movies;creative_works: The Attachment...,2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,The Attachment Diaries
1,https://www.nytimes.com/2023/05/04/movies/what...,Two childhood friends navigate cultural differ...,The New York Times,"subject: Movies;persons: Kapur, Shekhar;person...",2023-05-04T17:16:45+0000,287,Review: ‘What’s Love Got to Do With It?’ Proba...,,,What’s Love Got to Do With It?,,,,By Jeannette Catsoulis,"[{'firstname': 'Jeannette', 'middlename': None...",,What’s Love Got to Do With It?’ Probably a Lo
2,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,subject: Movies;creative_works: You Can Live F...,2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,You Can Live Forever
3,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,subject: Movies;creative_works: A Tourist's Gu...,2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,,,,By Elisabeth Vincentelli,"[{'firstname': 'Elisabeth', 'middlename': None...",,A Tourist’s Guide to Love
4,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"subject: Movies;persons: Zlotowski, Rebecca;cr...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,,,,By Manohla Dargis,"[{'firstname': 'Manohla', 'middlename': None, ...",,Other People’s Children


In [13]:
reviews_df.columns

Index(['web_url', 'snippet', 'source', 'keywords', 'pub_date', 'word_count',
       'headline.main', 'headline.kicker', 'headline.content_kicker',
       'headline.print_headline', 'headline.name', 'headline.seo',
       'headline.sub', 'byline.original', 'byline.person',
       'byline.organization', 'title'],
      dtype='object')

In [14]:
# Create a list from the "title" column using to_list()
# These titles will be used in the query for The Movie Database
for title in reviews_df["title"].to_list():
    print(title)

The Attachment Diaries
What’s Love Got to Do With It?’ Probably a Lo
You Can Live Forever
A Tourist’s Guide to Love
Other People’s Children
One True Loves
The Lost Weekend: A Love Story
A Thousand and One
Your Place or Mine
Love in the Time of Fentanyl
Pamela, a Love Story
In From the Side
After Love
Alcarràs
Nelly & Nadine
Lady Chatterley’s Lover
The Sound of Christmas
The Inspection
Bones and All
My Policeman
About Fate
Waiting for Bojangles
I Love My Dad
A Love Song
Alone Together
Art of Love
The Wheel
Thor: Love and Thunder
Both Sides of the Blade
Fire of Love
Love & Gelato
Stay Prayed Up
Benediction
Dinner in America
In a New York Minute
Anaïs in Love
I Love America
See You Then
La Mami
Love After Love
Deep Water
Lucy and Desi
Cyrano
The In Between
Book of Love
Lingui, the Sacred Bonds
The Pink Cloud
A Journal for Jordan
West Side Story
Aulcie
Love Is Love Is Love
Love Hard
Bergman Island
Hard Luck Love Song
South of Heaven
Wife of a Spy
Happier Than Ever
Together
Annette
Resort t

### Access The Movie Database API

In [15]:
### Access The Movie Database API
# Prepare The Movie Database query
url = "https://api.themoviedb.org/3/search/movie?query="
tmdb_key_string = "&api_key=" + tmdb_api_key

In [16]:
# Create an empty list to store the results
tmdb_movies_list = []

# Create a request counter to manage the rate limit
request_counter = 0

# Loop through the titles
for title in reviews_df["title"].to_list():
    # Check if we need to sleep before making a request
    if request_counter % 50 == 0 and request_counter != 0:
        time.sleep(10)
    # Add 1 to the request counter
    request_counter += 1

    # Perform a "GET" request for The Movie Database
    response = requests.get(url, params={'api_key': tmdb_api_key, 'query': title})

    # Include a try clause to search for the full movie details.
    # Use the except clause to print out a statement if a movie
    # is not found.
    if response.status_code != 200:
        print(f"Error fetching data for {title}: {response.status_code} - {response.text}")
        continue

    try:
        movie_data = response.json()
        if movie_data['results']:
            # Get movie ID
            movie_id = movie_data['results'][0]['id']
            # Make a request for the movie details
            movie_detail_url = f"https://api.themoviedb.org/3/movie/{movie_id}?api_key={tmdb_api_key}"
            details_response = requests.get(movie_detail_url)
            if details_response.status_code == 200:
                # Extract the details
                movie_details = details_response.json()

                # Extract genre names, spoken languages, and production countries
                genre_names = [genre['name'] for genre in movie_details.get('genres', [])]
                spoken_languages = [language['name'] for language in movie_details.get('spoken_languages', [])]
                production_countries = [country['name'] for country in movie_details.get('production_countries', [])]

                # Prepare the movie info dictionary
                movie_info = {
                    'title': title,
                    'genres': genre_names,
                    'languages': spoken_languages,
                    'countries': production_countries,  
                    'release_date': movie_details['release_date'],
                    'runtime': movie_details['runtime'],
                    'vote_average': movie_details['vote_average'],
                    'budget': movie_details['budget'],
                    'revenue': movie_details['revenue'],
                    'overview': movie_details['overview'],
                    'tagline': movie_details['tagline'],
                    'imdb_id': movie_details['imdb_id'],
                    'poster_path': movie_details['poster_path'],
                    'backdrop_path': movie_details['backdrop_path'],
                    'original_language': movie_details['original_language'],
                    'original_title': movie_details['original_title'],
                    'popularity': movie_details['popularity'],
                    'vote_count': movie_details['vote_count'],
                    'homepage': movie_details['homepage'],
                    'spoken_languages': movie_details['spoken_languages'],
                    'production_countries': movie_details['production_countries'],
                        
                }       

                # Append the movie info to the list
                tmdb_movies_list.append(movie_info)

                print(f"Found {title}")
            else:
                print(f"Details not found for {title}")
        else:
            print(f"No results for {title}")
    except Exception as e:
        print(f"Exception for {title}: {e}")

# Print the tmdb_movies_list
print(tmdb_movies_list)



Found The Attachment Diaries
No results for What’s Love Got to Do With It?’ Probably a Lo
Found You Can Live Forever
Found A Tourist’s Guide to Love
Found Other People’s Children
Found One True Loves
Found The Lost Weekend: A Love Story
Found A Thousand and One
Found Your Place or Mine
Found Love in the Time of Fentanyl
Found Pamela, a Love Story
Found In From the Side
Found After Love
Found Alcarràs
Found Nelly & Nadine
Found Lady Chatterley’s Lover
Found The Sound of Christmas
Found The Inspection
Found Bones and All
Found My Policeman
Found About Fate
Found Waiting for Bojangles
Found I Love My Dad
Found A Love Song
Found Alone Together
Found Art of Love
Found The Wheel
Found Thor: Love and Thunder
Found Both Sides of the Blade
Found Fire of Love
Found Love & Gelato
Found Stay Prayed Up
Found Benediction
Found Dinner in America
Found In a New York Minute
Found Anaïs in Love
Found I Love America
Found See You Then
Found La Mami
Found Love After Love
Found Deep Water
Found Lucy and De

In [17]:
# Preview the first 5 results in JSON format
# Use json.dumps with argument indent=4 to format data
import json
num_movies_to_preview = min(5, len(tmdb_movies_list))
for movie in tmdb_movies_list[:num_movies_to_preview]:
    print(json.dumps(movie, indent=4))
    print()

{
    "title": "The Attachment Diaries",
    "genres": [
        "Drama",
        "Mystery",
        "Thriller",
        "Horror"
    ],
    "languages": [
        "Espa\u00f1ol"
    ],
    "countries": [
        "Argentina"
    ],
    "release_date": "2021-10-07",
    "runtime": 102,
    "vote_average": 4.0,
    "budget": 0,
    "revenue": 0,
    "overview": "Argentina, 1970s. A desperate young woman goes to a clinic to have a clandestine abortion. As her pregnancy is already through the fourth month, the doctor refuses. Instead, she proposes to sell the baby to one of her clients and offers to provide shelter in her house until the child is born. Their disturbed personalities will become intertwined in a strange and dangerous relationship.",
    "tagline": "",
    "imdb_id": "tt11359158",
    "poster_path": "/hKZHk6I1p58ZeXbwjQok7DSWfZ.jpg",
    "backdrop_path": null,
    "original_language": "es",
    "original_title": "El apego",
    "popularity": 0.997,
    "vote_count": 2,
    "h

In [18]:
# Convert the results to a DataFrame
tmdb_movies_df = pd.DataFrame(tmdb_movies_list) 
tmdb_movies_df.head()


Unnamed: 0,title,genres,languages,countries,release_date,runtime,vote_average,budget,revenue,overview,...,imdb_id,poster_path,backdrop_path,original_language,original_title,popularity,vote_count,homepage,spoken_languages,production_countries
0,The Attachment Diaries,"[Drama, Mystery, Thriller, Horror]",[Español],[Argentina],2021-10-07,102,4.0,0,0,"Argentina, 1970s. A desperate young woman goes...",...,tt11359158,/hKZHk6I1p58ZeXbwjQok7DSWfZ.jpg,,es,El apego,0.997,2,,"[{'english_name': 'Spanish', 'iso_639_1': 'es'...","[{'iso_3166_1': 'AR', 'name': 'Argentina'}]"
1,You Can Live Forever,"[Drama, Romance]","[English, Français]","[Canada, United States of America]",2023-03-24,96,6.8,0,15055,"When Jaime, a gay teenager, is sent to live in...",...,tt15782570,/10MJURrsPfDQmS2AP3fueOHJ0ch.jpg,/sClvKM8C6a1DSPzMwkKjk8pNFDx.jpg,en,You Can Live Forever,5.922,28,https://gooddeedentertainment.com/you-can-live...,"[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'CA', 'name': 'Canada'}, {'iso..."
2,A Tourist’s Guide to Love,"[Romance, Comedy]","[English, Tiếng Việt]",[United States of America],2023-04-21,96,6.365,0,0,"After an unexpected break up, a travel executi...",...,tt20115096,/uWkpjbBe4gRZilXRXbYfsMUZMhz.jpg,/z3Qc5a0grcCREAQKqI195P05dVf.jpg,en,A Tourist's Guide to Love,23.381,144,https://www.netflix.com/title/81424906,"[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'US', 'name': 'United States o..."
3,Other People’s Children,"[Drama, Comedy]","[English, Français]",[France],2022-09-21,104,6.9,0,84178,"Rachel loves her life, her students, her frien...",...,tt15403712,/4vsQ4894pzd7nbvP5FoSOHaS1mI.jpg,/33UGFiijBDX18NHecj5xz5GkkHt.jpg,fr,Les Enfants des autres,13.638,169,https://www.wildbunch.biz/movie/other-peoples-...,"[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'FR', 'name': 'France'}]"
4,One True Loves,"[Romance, Drama, Comedy]",[English],"[Germany, United States of America]",2023-04-07,100,6.669,0,37820,Emma and Jesse are living the perfect life tog...,...,tt14770620,/6MF3JvIZkluUijw9rvDKNqPT8kC.jpg,/omkxkjj0IEhzOc9CBOwK2lrmHSk.jpg,en,One True Loves,17.637,59,,"[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'DE', 'name': 'Germany'}, {'is..."


### Merge and Clean the Data for Export

In [19]:
# Merge the New York Times reviews and TMDB DataFrames on title
# Use an inner join to keep only the movies that are in both DataFrames
merged_df = pd.merge(reviews_df, tmdb_movies_df, on="title", how="inner")
merged_df.head()

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,...,imdb_id,poster_path,backdrop_path,original_language,original_title,popularity,vote_count,homepage,spoken_languages,production_countries
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,subject: Movies;creative_works: The Attachment...,2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,...,tt11359158,/hKZHk6I1p58ZeXbwjQok7DSWfZ.jpg,,es,El apego,0.997,2,,"[{'english_name': 'Spanish', 'iso_639_1': 'es'...","[{'iso_3166_1': 'AR', 'name': 'Argentina'}]"
1,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,subject: Movies;creative_works: You Can Live F...,2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,...,tt15782570,/10MJURrsPfDQmS2AP3fueOHJ0ch.jpg,/sClvKM8C6a1DSPzMwkKjk8pNFDx.jpg,en,You Can Live Forever,5.922,28,https://gooddeedentertainment.com/you-can-live...,"[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'CA', 'name': 'Canada'}, {'iso..."
2,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,subject: Movies;creative_works: A Tourist's Gu...,2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,...,tt20115096,/uWkpjbBe4gRZilXRXbYfsMUZMhz.jpg,/z3Qc5a0grcCREAQKqI195P05dVf.jpg,en,A Tourist's Guide to Love,23.381,144,https://www.netflix.com/title/81424906,"[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'US', 'name': 'United States o..."
3,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"subject: Movies;persons: Zlotowski, Rebecca;cr...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,...,tt15403712,/4vsQ4894pzd7nbvP5FoSOHaS1mI.jpg,/33UGFiijBDX18NHecj5xz5GkkHt.jpg,fr,Les Enfants des autres,13.638,169,https://www.wildbunch.biz/movie/other-peoples-...,"[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'FR', 'name': 'France'}]"
4,https://www.nytimes.com/2023/04/13/movies/one-...,A film adaptation of Taylor Jenkins Reid’s nov...,The New York Times,"subject: Movies;persons: Bracey, Luke (1989- )...",2023-04-13T11:00:06+0000,320,‘One True Loves’ Review: A Romance Lost at Sea,,,One True Loves,...,tt14770620,/6MF3JvIZkluUijw9rvDKNqPT8kC.jpg,/omkxkjj0IEhzOc9CBOwK2lrmHSk.jpg,en,One True Loves,17.637,59,,"[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'DE', 'name': 'Germany'}, {'is..."


In [20]:
# Remove list brackets and quotation marks on the columns containing lists
# Create a list of the columns that need fixing
columns_to_fix = ["genres", "languages", "countries", "production_companies", "production_countries", "spoken_languages", "keywords", "cast", "crew",   "belongs_to_collection", "production_companies", "production_countries", "spoken_languages", "keywords", "cast", "crew", "belongs_to_collection"]

# Create a list of characters to remove
characters_to_remove = ["[", "]", "'", '"', "{", "}", "name", ":", "id","'"]
                        

# Loop through the list of columns to fix
for column in columns_to_fix:
    if column in tmdb_movies_df.columns:
    # Loop through the list of characters to remove for tmdb_movies_df
        for character in characters_to_remove:
        # Remove the character from tmdb_movies_df
            tmdb_movies_df[column] = tmdb_movies_df[column].astype(str).str.replace(character, "")

    # Loop through the list of characters to remove for reviews_df
    if column in reviews_df.columns:
        for character in characters_to_remove:
        # Remove the character from reviews_df
            reviews_df[column] = reviews_df[column].astype(str).str.replace(character, "")

# Display the fixed DataFrame
tmdb_movies_df.head()


Unnamed: 0,title,genres,languages,countries,release_date,runtime,vote_average,budget,revenue,overview,...,imdb_id,poster_path,backdrop_path,original_language,original_title,popularity,vote_count,homepage,spoken_languages,production_countries
0,The Attachment Diaries,"Drama, Mystery, Thriller, Horror",Español,Argentina,2021-10-07,102,4.0,0,0,"Argentina, 1970s. A desperate young woman goes...",...,tt11359158,/hKZHk6I1p58ZeXbwjQok7DSWfZ.jpg,,es,El apego,0.997,2,,"english_ Spanish, iso_639_1 es, Español","iso_3166_1 AR, Argentina"
1,You Can Live Forever,"Drama, Romance","English, Français","Canada, United States of America",2023-03-24,96,6.8,0,15055,"When Jaime, a gay teenager, is sent to live in...",...,tt15782570,/10MJURrsPfDQmS2AP3fueOHJ0ch.jpg,/sClvKM8C6a1DSPzMwkKjk8pNFDx.jpg,en,You Can Live Forever,5.922,28,https://gooddeedentertainment.com/you-can-live...,"english_ English, iso_639_1 en, English, engl...","iso_3166_1 CA, Canada, iso_3166_1 US, United..."
2,A Tourist’s Guide to Love,"Romance, Comedy","English, Tiếng Việt",United States of America,2023-04-21,96,6.365,0,0,"After an unexpected break up, a travel executi...",...,tt20115096,/uWkpjbBe4gRZilXRXbYfsMUZMhz.jpg,/z3Qc5a0grcCREAQKqI195P05dVf.jpg,en,A Tourist's Guide to Love,23.381,144,https://www.netflix.com/title/81424906,"english_ English, iso_639_1 en, English, engl...","iso_3166_1 US, United States of America"
3,Other People’s Children,"Drama, Comedy","English, Français",France,2022-09-21,104,6.9,0,84178,"Rachel loves her life, her students, her frien...",...,tt15403712,/4vsQ4894pzd7nbvP5FoSOHaS1mI.jpg,/33UGFiijBDX18NHecj5xz5GkkHt.jpg,fr,Les Enfants des autres,13.638,169,https://www.wildbunch.biz/movie/other-peoples-...,"english_ English, iso_639_1 en, English, engl...","iso_3166_1 FR, France"
4,One True Loves,"Romance, Drama, Comedy",English,"Germany, United States of America",2023-04-07,100,6.669,0,37820,Emma and Jesse are living the perfect life tog...,...,tt14770620,/6MF3JvIZkluUijw9rvDKNqPT8kC.jpg,/omkxkjj0IEhzOc9CBOwK2lrmHSk.jpg,en,One True Loves,17.637,59,,"english_ English, iso_639_1 en, English","iso_3166_1 DE, Germany, iso_3166_1 US, Unite..."


In [21]:
# Drop "byline.person" column
merged_df = merged_df.drop(columns=["byline.person"])
merged_df.head()

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,...,imdb_id,poster_path,backdrop_path,original_language,original_title,popularity,vote_count,homepage,spoken_languages,production_countries
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,subject: Movies;creative_works: The Attachment...,2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,...,tt11359158,/hKZHk6I1p58ZeXbwjQok7DSWfZ.jpg,,es,El apego,0.997,2,,"[{'english_name': 'Spanish', 'iso_639_1': 'es'...","[{'iso_3166_1': 'AR', 'name': 'Argentina'}]"
1,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,subject: Movies;creative_works: You Can Live F...,2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,...,tt15782570,/10MJURrsPfDQmS2AP3fueOHJ0ch.jpg,/sClvKM8C6a1DSPzMwkKjk8pNFDx.jpg,en,You Can Live Forever,5.922,28,https://gooddeedentertainment.com/you-can-live...,"[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'CA', 'name': 'Canada'}, {'iso..."
2,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,subject: Movies;creative_works: A Tourist's Gu...,2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,...,tt20115096,/uWkpjbBe4gRZilXRXbYfsMUZMhz.jpg,/z3Qc5a0grcCREAQKqI195P05dVf.jpg,en,A Tourist's Guide to Love,23.381,144,https://www.netflix.com/title/81424906,"[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'US', 'name': 'United States o..."
3,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"subject: Movies;persons: Zlotowski, Rebecca;cr...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,...,tt15403712,/4vsQ4894pzd7nbvP5FoSOHaS1mI.jpg,/33UGFiijBDX18NHecj5xz5GkkHt.jpg,fr,Les Enfants des autres,13.638,169,https://www.wildbunch.biz/movie/other-peoples-...,"[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'FR', 'name': 'France'}]"
4,https://www.nytimes.com/2023/04/13/movies/one-...,A film adaptation of Taylor Jenkins Reid’s nov...,The New York Times,"subject: Movies;persons: Bracey, Luke (1989- )...",2023-04-13T11:00:06+0000,320,‘One True Loves’ Review: A Romance Lost at Sea,,,One True Loves,...,tt14770620,/6MF3JvIZkluUijw9rvDKNqPT8kC.jpg,/omkxkjj0IEhzOc9CBOwK2lrmHSk.jpg,en,One True Loves,17.637,59,,"[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'DE', 'name': 'Germany'}, {'is..."


In [22]:
# Delete duplicate rows and reset index
merged_df = merged_df.drop_duplicates(subset=["title"])
merged_df = merged_df.reset_index(drop=True)
merged_df.head()

Unnamed: 0,web_url,snippet,source,keywords,pub_date,word_count,headline.main,headline.kicker,headline.content_kicker,headline.print_headline,...,imdb_id,poster_path,backdrop_path,original_language,original_title,popularity,vote_count,homepage,spoken_languages,production_countries
0,https://www.nytimes.com/2023/05/25/movies/the-...,A gynecologist and her patient form a horrifyi...,The New York Times,subject: Movies;creative_works: The Attachment...,2023-05-25T11:00:03+0000,295,"‘The Attachment Diaries’ Review: Love, Sick",,,The Attachment Diaries,...,tt11359158,/hKZHk6I1p58ZeXbwjQok7DSWfZ.jpg,,es,El apego,0.997,2,,"[{'english_name': 'Spanish', 'iso_639_1': 'es'...","[{'iso_3166_1': 'AR', 'name': 'Argentina'}]"
1,https://www.nytimes.com/2023/05/04/movies/you-...,Religion comes between two girls falling in lo...,The New York Times,subject: Movies;creative_works: You Can Live F...,2023-05-04T11:00:08+0000,294,‘You Can Live Forever’ Review: Do You Love Me ...,,,You Can Live Forever,...,tt15782570,/10MJURrsPfDQmS2AP3fueOHJ0ch.jpg,/sClvKM8C6a1DSPzMwkKjk8pNFDx.jpg,en,You Can Live Forever,5.922,28,https://gooddeedentertainment.com/you-can-live...,"[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'CA', 'name': 'Canada'}, {'iso..."
2,https://www.nytimes.com/2023/04/21/movies/a-to...,Rachael Leigh Cook stars in this bland rom-com...,The New York Times,subject: Movies;creative_works: A Tourist's Gu...,2023-04-21T07:03:25+0000,276,‘A Tourist’s Guide to Love’ Review: A Wearying...,,,A Tourist’s Guide to Love,...,tt20115096,/uWkpjbBe4gRZilXRXbYfsMUZMhz.jpg,/z3Qc5a0grcCREAQKqI195P05dVf.jpg,en,A Tourist's Guide to Love,23.381,144,https://www.netflix.com/title/81424906,"[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'US', 'name': 'United States o..."
3,https://www.nytimes.com/2023/04/20/movies/othe...,A radiant Virginie Efira stars as a Parisian t...,The New York Times,"subject: Movies;persons: Zlotowski, Rebecca;cr...",2023-04-20T15:35:13+0000,801,‘Other People’s Children’ Review: True Romance,Critic’s pick,,Intoxicating Love With a Sobering Turn,...,tt15403712,/4vsQ4894pzd7nbvP5FoSOHaS1mI.jpg,/33UGFiijBDX18NHecj5xz5GkkHt.jpg,fr,Les Enfants des autres,13.638,169,https://www.wildbunch.biz/movie/other-peoples-...,"[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'FR', 'name': 'France'}]"
4,https://www.nytimes.com/2023/04/13/movies/one-...,A film adaptation of Taylor Jenkins Reid’s nov...,The New York Times,"subject: Movies;persons: Bracey, Luke (1989- )...",2023-04-13T11:00:06+0000,320,‘One True Loves’ Review: A Romance Lost at Sea,,,One True Loves,...,tt14770620,/6MF3JvIZkluUijw9rvDKNqPT8kC.jpg,/omkxkjj0IEhzOc9CBOwK2lrmHSk.jpg,en,One True Loves,17.637,59,,"[{'english_name': 'English', 'iso_639_1': 'en'...","[{'iso_3166_1': 'DE', 'name': 'Germany'}, {'is..."


In [23]:
# Export data to CSV without the index
merged_df.to_csv('merged_df.csv', index=False)
print("Exported merged_df.csv")
           


Exported merged_df.csv
