In [17]:
import requests
import time
from bs4 import BeautifulSoup
import re

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [5]:
HEADERS = {
	"x-rapidapi-key": "d4f11cda99mshaf0c893d26831e1p1e08ecjsnf73b7a997619",
	"x-rapidapi-host": "online-movie-database.p.rapidapi.com"
    }

In [101]:
def scrape_reviews(html_content):
    soup = BeautifulSoup(html_content, 'html.parser')
    reviews_soup = soup.find_all('drawer-more')
    reviews = []
    for i in reviews_soup:
        # Get the raw text, strip surrounding whitespace, and replace multiple spaces/newlines with a single space
        clean_text = re.sub(r'\s+', ' ', i.text.strip())
        reviews.append(clean_text)
    return reviews

In [103]:
def perform_clicks(driver):
    num_clicks = 5
    for _ in range(num_clicks):
        try:
            # Wait until the "Load More" button is clickable and then click it
            load_more_button = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CSS_SELECTOR, 'rt-button[data-qa="load-more-btn"]'))
            )
            load_more_button.click()
            time.sleep(2)  # Wait for new reviews to load (adjust based on load time)
        except Exception as e:
            print("Error clicking Load More button:", e)
            break

In [105]:
def get_link(html_source):
    soup = BeautifulSoup(html_source, "html.parser")
    link = soup.find("search-page-media-row", {"data-qa": "data-row"})
    link = soup.find("a", {"data-qa": "info-name"})
    href = link.get("href") if link else None
    return href

In [107]:
def get_movie_site(movie_name, driver):
    q = movie_name.lower().replace(" ", "%20")
    url = f"https://www.rottentomatoes.com/search?search={q}"
    driver.get(url)
    page_html = driver.page_source
    return get_link(page_html)

In [109]:
def get_movie_reviews(movie_name):
    driver = webdriver.Chrome()
    url = get_movie_site(movie_name, driver) + "/reviews?type=user"
    driver.get(url)
    perform_clicks(driver)
    page_html = driver.page_source
    driver.quit()
    reviews = scrape_reviews(page_html)
    return reviews

In [111]:
get_movie_reviews(movie_name)

['Direção: Bom Atuação: Ok Roteiro: Ok Caracterização: Ok Soundtrack: Bom Show Less Show More',
 'the worst bad idea for ben parkers death and not that good of a vilean Show Less Show More',
 'Highly underrated and badly reviewed on this site.Too many haters looking for to be McGuire.This movie goes off the beaten path alittle and delivers magic. Show Less Show More',
 "To be fair even though I was not a fan of Sony rebooting the Sam Raimi Spider-Man franchise I was willing to give this a chance. I didn't like this film. The odd casting choices and The way the story dragged before it got to the death of Uncle Ben. I should also add they lied to the audience and said they was not going to do another origin story but ended up doing one. The way they interpret Peter Parker in this film was not comic accurate at all. Andrew Garfield acting felt phoned in and the costume design I have no idea what the studio was thinking. Overall I was disappointed and didn't even bother to stick around for

In [115]:
def get_movie_id_critic(movie_name):
    metacritic_api_url = "https://online-movie-database.p.rapidapi.com/auto-complete"
    querystring = {"q":movie_name}
    response = requests.get(metacritic_api_url, headers=HEADERS, params=querystring)
    response = response.json()
    for i in response['d']:
        if i['l'].lower() in movie_name:
            return i
        else:
            continue
    return ""

In [117]:
def get_movie_plot(movie_id):
    plot_url = "https://online-movie-database.p.rapidapi.com/title/v2/get-plot"
    querystring = {"tconst":movie_id}
    response = requests.get(plot_url, headers=HEADERS, params=querystring)
    response = response.json()
    try:
        val = response['data']['title']['plot']['plotText']['plainText']
        return val
    except:
        return ""

In [119]:
def parse_ratings_res(response):
    try:
        country = response['data']['title']['releaseDate']['country']['id']
        day = response['data']['title']['releaseDate']['day']
        month = response['data']['title']['releaseDate']['month']
        year = response['data']['title']['releaseDate']['year']
        release_date = f"{day}/{month}/{year}"
        ratings = response['data']['title']['ratingsSummary']['aggregateRating']
        total_votes = response['data']['title']['ratingsSummary']['voteCount']
        return {'country': country, 'release_date': release_date, 'ratings': ratings, 'total_votes':total_votes}
    except:
        return ""

In [121]:
def get_movie_ratings(movie_id):
    rating_url = "https://online-movie-database.p.rapidapi.com/title/v2/get-ratings"
    querystring = {"tconst":movie_id}
    response = requests.get(rating_url, headers=HEADERS, params=querystring)
    response = response.json()
    return parse_ratings_res(response)

In [123]:
def get_revs_res(response):
    reviews = []
    for i in response['reviews']:
        reviews.append(i['reviewText'])
    return reviews

In [125]:
def get_movie_reviews_critic(movie_id):
    reviews_url = "https://online-movie-database.p.rapidapi.com/title/get-user-reviews"
    querystring = {"tconst":movie_id}
    response = requests.get(reviews_url, headers=HEADERS, params=querystring)
    response = response.json()
    return get_revs_res(response)

In [127]:
def get_movie_details(movie_name):
    id_dict = get_movie_id_critic(movie_name)
    img_url = id_dict['i']['imageUrl']
    name = id_dict['l']
    id = id_dict['id']
    plot = get_movie_plot(id)
    rating_dict = get_movie_ratings(id)
    reviews = get_movie_reviews_critic(id)
    movie_details = {'id': id, 'plot': plot, 'reviews':reviews, 'image':img_url, 'name':name}
    for i in rating_dict:
        movie_details[i] = rating_dict[i]
    return movie_details

In [129]:
def get_movie_details_overall(movie_name):
    temp = get_movie_details(movie_name)
    revs_list = get_movie_reviews(temp['name'].lower().replace(" ", "_"))
    temp['reviews'] += revs_list
    return temp

In [131]:
movie_details = get_movie_details("pulp fiction")

In [133]:
movie_details

{'id': 'tt0110912',
 'plot': 'The lives of two mob hitmen, a boxer, a gangster and his wife, and a pair of diner bandits intertwine in four tales of violence and redemption.',
 'reviews': ['One of the early scenes in "Pulp Fiction" features two hit-men discussing what a Big Mac is called in other countries. Their dialogue is witty and entertaining, and it\'s also disarming, because it makes these two thugs seem all too normal. If you didn\'t know better, you might assume these were regular guys having chit-chat on their way to work. Other than the comic payoff at the end of the scene, in which they use parts of this conversation to taunt their victims, their talk has no relevance to anything in the film, or to anything else, for that matter. Yet without such scenes, "Pulp Fiction" wouldn\'t be "Pulp Fiction." I get the sense that Tarantino put into the film whatever struck his fancy, and somehow the final product is not only coherent but wonderfully textured.\n\nIt\'s no wonder that fa