In [1]:
import requests
import re
import json
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor
import time

In [2]:
base_url = 'https://www.rottentomatoes.com/m/'
def get_user_url(movie_name):
    return base_url + '_'.join(movie_name.lower().split()) + '/reviews?type=user'

def fetch_reviews(endCursor,url):
    params = {'directions':'next',
         'endCursor':endCursor,
         'startCursor':''}

    rev_json = requests.get(url,params = params).json()
    return rev_json

In [3]:
def get_user_reviews(movie_name):
    '''
    returns list of 60 reviews of users(latest)
    
    '''
    url = get_user_url(movie_name)
    resp = requests.get(url)
    data = json.loads(re.search('movieReview\s=\s(.*);', resp.text).group(1))
    movieID = data['movieId']
    
    review_url = 'https://www.rottentomatoes.com/napi/movie/'+movieID+'/reviews/user'
    reviews = []
    result = {}
    for i in range(6):
        #print('page {}'.format(i))
        result = fetch_reviews(endCursor = result['pageInfo']['endCursor'] if i!=0 else '',url = review_url)
        reviews.extend(t['review'] for t in result['reviews'])
    
    return reviews
    

In [4]:
base_url = 'https://www.rottentomatoes.com/m/'
def get_critic_url(movie_name):
    
    return base_url+ '_'.join(movie_name.lower().split())+'/reviews'

In [5]:
def get_critic_reviews(movie_name):
    '''
    returns list of 60 reviews of critics (latest)
    '''
    url = get_critic_url(movie_name)
    lst = []
    for i in range(3):
        params = {
            'type':'',
            'sort':'',
            'page':str(i+1)
        }
        resp = requests.get(url,params)
        #print(i,resp.status_code)
        bs = BeautifulSoup(resp.text,'lxml')
        for i in bs.find_all('div',{'class':'the_review'}):
            lst.append(i.get_text().strip())
    
    return lst

In [6]:
def get_image(movie_name):
    '''
    returns image url from rotten tomatoes site
    '''
    url = base_url+'_'.join(movie_name.lower().split())
    resp = requests.get(url)
    bs = BeautifulSoup(resp.text,'lxml')
    return bs.find('img',{'class':'posterImage js-lazyLoad'})['data-src']

In [7]:
tic = time.time()
t1 = get_user_reviews('rambo last blood')
t2 = get_critic_reviews('rambo last blood')
t3 = get_image('rambo last blood')
print(t1,t2,t3,sep = '\n')

toc = time.time()

toc-tic

["If gore was magic, Stallone was a mixture of David Copperfield, Penn and Teller.\n\nWhile the movie has a boring people-story in it's first act like every Rambo movie,\nthe second act already compensates that while the third act is a beautiful painting of exploding heads, cut limbs and ripped out organs.\n\nThis is a men's movie at it's finest, with no excuses or compromises.\nPG-13 action can get arrowed to a barn wall after you've seen this.\n\nBut let's hope this was the last time Stallone performed this role.\nWhile he managed to pull it off once more (by the skin of his teeth) there just can't be another one.", "Best sequel yet! it was an amazing film especially to see Rambo's last mission and to see him go all out against all these people and go on a impossible super mission! Super bloody though I don't reccomend it if you get freaked out by blood and gore easily", 'very bad movie and it was terible', 'Eleven years after the events in Burma, Vietnam War veteran John J. Rambo (S

24.665148735046387

In [8]:


tic = time.time()
executor = ThreadPoolExecutor()
f1 = executor.submit(get_user_reviews,'rambo last blood')
f2 = executor.submit(get_critic_reviews,'rambo last blood')
f3 = executor.submit(get_image,'rambo last blood')
print(f1.result())
print(f2.result())
print(f3.result())

toc = time.time()
print(toc - tic)


["If gore was magic, Stallone was a mixture of David Copperfield, Penn and Teller.\n\nWhile the movie has a boring people-story in it's first act like every Rambo movie,\nthe second act already compensates that while the third act is a beautiful painting of exploding heads, cut limbs and ripped out organs.\n\nThis is a men's movie at it's finest, with no excuses or compromises.\nPG-13 action can get arrowed to a barn wall after you've seen this.\n\nBut let's hope this was the last time Stallone performed this role.\nWhile he managed to pull it off once more (by the skin of his teeth) there just can't be another one.", "Best sequel yet! it was an amazing film especially to see Rambo's last mission and to see him go all out against all these people and go on a impossible super mission! Super bloody though I don't reccomend it if you get freaked out by blood and gore easily", 'very bad movie and it was terible', 'Eleven years after the events in Burma, Vietnam War veteran John J. Rambo (S

In [9]:
def get_rel_date_runtime(movie_name):
    '''
    returns release date and runtime of the movie
    '''
    
    
    url = base_url+'_'.join(movie_name.lower().split())
    resp = requests.get(url)
    bs = BeautifulSoup(resp.text,'lxml')
    rel_date = bs.find_all('time')[0].get_text().strip()
    try:
        runtime = bs.find_all('time')[2].get_text().strip()
    except:
        runtime = bs.find_all('time')[1].get_text().strip()
    return {'release_date':rel_date,'runtime':runtime}

In [10]:
def get_description(movie_name):
    '''
    returns description of the movie
    '''
    
    url = base_url+'_'.join(movie_name.lower().split())
    resp = requests.get(url)
    bs = BeautifulSoup(resp.text,'lxml')
    return bs.find('div',{'class':'movie_synopsis clamp clamp-6 js-clamp'}).get_text().strip()

In [11]:
get_description('Deadpool 2')

"Wisecracking mercenary Deadpool meets Russell, an angry teenage mutant who lives at an orphanage. When Russell becomes the target of Cable -- a genetically enhanced soldier from the future -- Deadpool realizes that he'll need some help saving the boy from such a superior enemy. He soon joins forces with Bedlam, Shatterstar, Domino and other powerful mutants to protect young Russell from Cable and his advanced weaponry."

In [12]:
get_rel_date_runtime('the dark knight')

{'release_date': 'Jul 18, 2008', 'runtime': '2h 32m'}

In [13]:
def get_rel_date_runtime(movie_name):
    if movie_name:
        url = base_url+'_'.join(movie_name.lower().split())
        resp = requests.get(url)
        bs = BeautifulSoup(resp.text,'lxml')
        rel_date = bs.find_all('time')[0].get_text().strip()

        try:

            runtime = bs.find_all('time')[2].get_text().strip()
        except:

            runtime = bs.find_all('time')[1].get_text().strip()
        
        return {'release_date':rel_date,'runtime':runtime}

In [14]:
get_rel_date_runtime('Deadpool')

{'release_date': 'Feb 12, 2016', 'runtime': '1h 48m'}