In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import os
import requests
import glob
import wptools
from PIL import Image
from io import BytesIO

In [95]:
# Read tsv file
df= pd.read_csv('bestofrt.tsv', sep='\t')
df= df.sort_values('title').reset_index(drop=True)
df.head()

Unnamed: 0,ranking,critic_score,title,number_of_critic_ratings
0,53,100,12 Angry Men (Twelve Angry Men) (1957),49
1,29,96,12 Years a Slave (2013),316
2,22,98,A Hard Day's Night (1964),104
3,60,98,A Streetcar Named Desire (1951),54
4,48,97,Alien (1979),104


In [9]:
# Store information from Rotten Tomatoes to dataframe
# List of dictionaries to build file by file and later convert to a DataFrame
df_list = []
folder = 'rt_html'
for movie_html in os.listdir(folder):
    with open(os.path.join(folder, movie_html)) as file:
        soup = BeautifulSoup(file, 'lxml')
        title = soup.find('title').contents[0][:-len(' - Rotten Tomatoes')]
        audience_score = soup.find('div', class_='audience-score meter').find('span').contents[0][:-1]
        # class is a keyword but class_ is not
        average_rating_audience = soup.find('div', class_='audience-info hidden-xs superPageFontColor').contents[1].get_text().replace('Average Rating:', '').replace(" ","").strip()[:-2]
        number_of_votes_audience = soup.find('div', class_='audience-info hidden-xs superPageFontColor')
        number_of_votes_audience = number_of_votes_audience.find_all('div')[1].contents[2].strip().replace(",","")
        average_rating_critic = soup.find('div', class_='tomato-left')
        average_rating_critic = average_rating_critic.find_all('div')[1].contents[1].get_text().replace('Average Rating:', '').strip()[:-3]
        
        df_list.append({'title': title,
                        'audience_score': int(audience_score),
                        'average_rating_audience': float(average_rating_audience),
                        'number_of_votes_audience': int(number_of_votes_audience),
                        'average_rating_critic': float(average_rating_critic)})
df1 = pd.DataFrame(df_list , columns = ['title','audience_score','average_rating_audience','number_of_votes_audience','average_rating_critic'])

In [10]:
df1 = df1.sort_values('title').reset_index(drop=True)
df1

Unnamed: 0,title,audience_score,average_rating_audience,number_of_votes_audience,average_rating_critic
0,12 Angry Men (Twelve Angry Men) (1957),97,4.2,103672,9.0
1,12 Years a Slave (2013),90,4.3,138789,8.9
2,A Hard Day's Night (1964),89,3.9,50067,8.5
3,A Streetcar Named Desire (1951),90,4.0,54761,8.7
4,Alien (1979),94,3.9,457186,9.0
5,All About Eve (1950),94,4.3,44564,9.1
6,All Quiet on the Western Front (1930),89,3.9,17768,9.0
7,Apocalypse Now (1979),94,4.1,284606,8.9
8,Argo (2012),90,4.2,207373,8.4
9,Army of Shadows (L'ArmÃ©e des ombres) (1969),94,4.4,7011,8.6


In [11]:
# Store ebert reviews to dataframe

# Make directory if it doesn't already exist
folder_name = 'ebert_reviews'
if not os.path.exists(folder_name):
    os.makedirs(folder_name)

In [12]:
# URL of reviews
ebert_review_urls = ['https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9900_1-the-wizard-of-oz-1939-film/1-the-wizard-of-oz-1939-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9901_2-citizen-kane/2-citizen-kane.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9901_3-the-third-man/3-the-third-man.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9902_4-get-out-film/4-get-out-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9902_5-mad-max-fury-road/5-mad-max-fury-road.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9902_6-the-cabinet-of-dr.-caligari/6-the-cabinet-of-dr.-caligari.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9903_7-all-about-eve/7-all-about-eve.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9903_8-inside-out-2015-film/8-inside-out-2015-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9903_9-the-godfather/9-the-godfather.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9904_10-metropolis-1927-film/10-metropolis-1927-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9904_11-e.t.-the-extra-terrestrial/11-e.t.-the-extra-terrestrial.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9904_12-modern-times-film/12-modern-times-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9904_14-singin-in-the-rain/14-singin-in-the-rain.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9905_15-boyhood-film/15-boyhood-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9905_16-casablanca-film/16-casablanca-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9905_17-moonlight-2016-film/17-moonlight-2016-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9906_18-psycho-1960-film/18-psycho-1960-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9906_19-laura-1944-film/19-laura-1944-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9906_20-nosferatu/20-nosferatu.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9907_21-snow-white-and-the-seven-dwarfs-1937-film/21-snow-white-and-the-seven-dwarfs-1937-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9907_22-a-hard-day27s-night-film/22-a-hard-day27s-night-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9907_23-la-grande-illusion/23-la-grande-illusion.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9908_25-the-battle-of-algiers/25-the-battle-of-algiers.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9908_26-dunkirk-2017-film/26-dunkirk-2017-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9908_27-the-maltese-falcon-1941-film/27-the-maltese-falcon-1941-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9909_29-12-years-a-slave-film/29-12-years-a-slave-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9909_30-gravity-2013-film/30-gravity-2013-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9909_31-sunset-boulevard-film/31-sunset-boulevard-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad990a_32-king-kong-1933-film/32-king-kong-1933-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad990a_33-spotlight-film/33-spotlight-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad990a_34-the-adventures-of-robin-hood/34-the-adventures-of-robin-hood.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad990b_35-rashomon/35-rashomon.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad990b_36-rear-window/36-rear-window.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad990b_37-selma-film/37-selma-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad990c_38-taxi-driver/38-taxi-driver.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad990c_39-toy-story-3/39-toy-story-3.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad990c_40-argo-2012-film/40-argo-2012-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad990d_41-toy-story-2/41-toy-story-2.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad990d_42-the-big-sick/42-the-big-sick.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad990d_43-bride-of-frankenstein/43-bride-of-frankenstein.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad990d_44-zootopia/44-zootopia.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad990e_45-m-1931-film/45-m-1931-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad990e_46-wonder-woman-2017-film/46-wonder-woman-2017-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad990e_48-alien-film/48-alien-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad990f_49-bicycle-thieves/49-bicycle-thieves.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad990f_50-seven-samurai/50-seven-samurai.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad990f_51-the-treasure-of-the-sierra-madre-film/51-the-treasure-of-the-sierra-madre-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9910_52-up-2009-film/52-up-2009-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9910_53-12-angry-men-1957-film/53-12-angry-men-1957-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9910_54-the-400-blows/54-the-400-blows.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9911_55-logan-film/55-logan-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9911_57-army-of-shadows/57-army-of-shadows.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9912_58-arrival-film/58-arrival-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9912_59-baby-driver/59-baby-driver.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9913_60-a-streetcar-named-desire-1951-film/60-a-streetcar-named-desire-1951-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9913_61-the-night-of-the-hunter-film/61-the-night-of-the-hunter-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9913_62-star-wars-the-force-awakens/62-star-wars-the-force-awakens.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9913_63-manchester-by-the-sea-film/63-manchester-by-the-sea-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9914_64-dr.-strangelove/64-dr.-strangelove.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9914_66-vertigo-film/66-vertigo-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9914_67-the-dark-knight-film/67-the-dark-knight-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9915_68-touch-of-evil/68-touch-of-evil.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9915_69-the-babadook/69-the-babadook.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9915_72-rosemary27s-baby-film/72-rosemary27s-baby-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9916_73-finding-nemo/73-finding-nemo.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9916_74-brooklyn-film/74-brooklyn-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9917_75-the-wrestler-2008-film/75-the-wrestler-2008-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9917_77-l.a.-confidential-film/77-l.a.-confidential-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9918_78-gone-with-the-wind-film/78-gone-with-the-wind-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9918_79-the-good-the-bad-and-the-ugly/79-the-good-the-bad-and-the-ugly.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9918_80-skyfall/80-skyfall.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9919_82-tokyo-story/82-tokyo-story.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9919_83-hell-or-high-water-film/83-hell-or-high-water-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9919_84-pinocchio-1940-film/84-pinocchio-1940-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad9919_85-the-jungle-book-2016-film/85-the-jungle-book-2016-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad991a_86-la-la-land-film/86-la-la-land-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad991b_87-star-trek-film/87-star-trek-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad991b_89-apocalypse-now/89-apocalypse-now.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad991c_90-on-the-waterfront/90-on-the-waterfront.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad991c_91-the-wages-of-fear/91-the-wages-of-fear.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad991c_92-the-last-picture-show/92-the-last-picture-show.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad991d_93-harry-potter-and-the-deathly-hallows-part-2/93-harry-potter-and-the-deathly-hallows-part-2.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad991d_94-the-grapes-of-wrath-film/94-the-grapes-of-wrath-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad991d_96-man-on-wire/96-man-on-wire.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad991e_97-jaws-film/97-jaws-film.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad991e_98-toy-story/98-toy-story.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad991e_99-the-godfather-part-ii/99-the-godfather-part-ii.txt',
                     'https://d17h27t6h515a5.cloudfront.net/topher/2017/September/59ad991e_100-battleship-potemkin/100-battleship-potemkin.txt']

In [13]:
# just to make sure
folder_name = 'ebert_reviews'
if not os.path.exists(folder_name):
    os.makedirs(folder_name)
    
# Save reviwe files from url and stor in folder
for url in ebert_review_urls:
    response = requests.get(url)
    with open(os.path.join(folder_name, url.split('/')[-1]), mode='wb') as file:
        file.write(response.content)

In [14]:
len(os.listdir(folder_name))

88

In [15]:
# List of dictionaries to build file by file and later convert to a DataFrame
df_list = []
for ebert_review in glob.glob('ebert_reviews/*.txt'):
    with open(ebert_review, encoding='utf-8') as file:
        title = file.readline()[:-1]
        review_url = file.readline()[:-1]
        review_text = file.read()
        
        df_list.append({'title': title,
                        'review_url': review_url,
                        'review_text': review_text})
df2 = pd.DataFrame(df_list, columns = ['title', 'review_url', 'review_text'])
        

In [16]:
df2 = df2.sort_values('title').reset_index(drop=True)
df2

Unnamed: 0,title,review_url,review_text
0,12 Angry Men (Twelve Angry Men) (1957),http://www.rogerebert.com/reviews/great-movie-...,"In form, ""12 Angry Men"" is a courtroom drama. ..."
1,12 Years a Slave (2013),http://www.rogerebert.com/reviews/12-years-a-s...,"After ""Django Unchained"" and Lee Daniels' ""The..."
2,A Hard Day's Night (1964),http://www.rogerebert.com/reviews/great-movie-...,"When it opened in September, 1964, ""A Hard Day..."
3,A Streetcar Named Desire (1951),http://www.rogerebert.com/reviews/a-streetcar-...,Marlon Brando didn't win the Academy Award in ...
4,Alien (1979),http://www.rogerebert.com/reviews/great-movie-...,"At its most fundamental level, ""Alien"" is a mo..."
5,All About Eve (1950),http://www.rogerebert.com/reviews/great-movie-...,Growing older was a smart career move for Bett...
6,Apocalypse Now (1979),http://www.rogerebert.com/reviews/great-movie-...,"Francis Ford Coppola's film ""Apocalypse Now"" w..."
7,Argo (2012),http://www.rogerebert.com/reviews/argo-2012,It's the same the world over. A Hollywood prod...
8,Army of Shadows (L'Armée des ombres) (1969),http://www.rogerebert.com/reviews/great-movie-...,"Jean-Pierre Melville's ""Army of Shadows"" is ab..."
9,Arrival (2016),http://www.rogerebert.com/reviews/arrival-2016,Much has been written about the recent surge o...


In [17]:
# Take the posters from wikipedia

title_list = [
 'The_Wizard_of_Oz_(1939_film)',
 'Citizen_Kane',
 'The_Third_Man',
 'Get_Out_(film)',
 'Mad_Max:_Fury_Road',
 'The_Cabinet_of_Dr._Caligari',
 'All_About_Eve',
 'Inside_Out_(2015_film)',
 'The_Godfather',
 'Metropolis_(1927_film)',
 'E.T._the_Extra-Terrestrial',
 'Modern_Times_(film)',
 'It_Happened_One_Night',
 "Singin'_in_the_Rain",
 'Boyhood_(film)',
 'Casablanca_(film)',
 'Moonlight_(2016_film)',
 'Psycho_(1960_film)',
 'Laura_(1944_film)',
 'Nosferatu',
 'Snow_White_and_the_Seven_Dwarfs_(1937_film)',
 'A_Hard_Day%27s_Night_(film)',
 'La_Grande_Illusion',
 'North_by_Northwest',
 'The_Battle_of_Algiers',
 'Dunkirk_(2017_film)',
 'The_Maltese_Falcon_(1941_film)',
 'Repulsion_(film)',
 '12_Years_a_Slave_(film)',
 'Gravity_(2013_film)',
 'Sunset_Boulevard_(film)',
 'King_Kong_(1933_film)',
 'Spotlight_(film)',
 'The_Adventures_of_Robin_Hood',
 'Rashomon',
 'Rear_Window',
 'Selma_(film)',
 'Taxi_Driver',
 'Toy_Story_3',
 'Argo_(2012_film)',
 'Toy_Story_2',
 'The_Big_Sick',
 'Bride_of_Frankenstein',
 'Zootopia',
 'M_(1931_film)',
 'Wonder_Woman_(2017_film)',
 'The_Philadelphia_Story_(film)',
 'Alien_(film)',
 'Bicycle_Thieves',
 'Seven_Samurai',
 'The_Treasure_of_the_Sierra_Madre_(film)',
 'Up_(2009_film)',
 '12_Angry_Men_(1957_film)',
 'The_400_Blows',
 'Logan_(film)',
 'All_Quiet_on_the_Western_Front_(1930_film)',
 'Army_of_Shadows',
 'Arrival_(film)',
 'Baby_Driver',
 'A_Streetcar_Named_Desire_(1951_film)',
 'The_Night_of_the_Hunter_(film)',
 'Star_Wars:_The_Force_Awakens',
 'Manchester_by_the_Sea_(film)',
 'Dr._Strangelove',
 'Frankenstein_(1931_film)',
 'Vertigo_(film)',
 'The_Dark_Knight_(film)',
 'Touch_of_Evil',
 'The_Babadook',
 'The_Conformist_(film)',
 'Rebecca_(1940_film)',
 'Rosemary%27s_Baby_(film)',
 'Finding_Nemo',
 'Brooklyn_(film)',
 'The_Wrestler_(2008_film)',
 'The_39_Steps_(1935_film)',
 'L.A._Confidential_(film)',
 'Gone_with_the_Wind_(film)',
 'The_Good,_the_Bad_and_the_Ugly',
 'Skyfall',
 'Rome,_Open_City',
 'Tokyo_Story',
 'Hell_or_High_Water_(film)',
 'Pinocchio_(1940_film)',
 'The_Jungle_Book_(2016_film)',
 'La_La_Land_(film)',
 'Star_Trek_(film)',
 'High_Noon',
 'Apocalypse_Now',
 'On_the_Waterfront',
 'The_Wages_of_Fear',
 'The_Last_Picture_Show',
 'Harry_Potter_and_the_Deathly_Hallows_–_Part_2',
 'The_Grapes_of_Wrath_(film)',
 'Roman_Holiday',
 'Man_on_Wire',
 'Jaws_(film)',
 'Toy_Story',
 'The_Godfather_Part_II',
 'Battleship_Potemkin'
]


In [18]:
folder_name = 'bestofrt_posters'
# Make directory if it doesn't already exist
if not os.path.exists(folder_name):
    os.makedirs(folder_name)

In [19]:
# List of dictionaries to build and convert to a DataFrame later
df_list = []
image_errors = {}
for title in title_list:
    try:
        # This cell is slow so print ranking to gauge time remaining
        ranking = title_list.index(title) + 1
        print(ranking)
        page = wptools.page(title, silent=True)
        # Your code here (three lines)
        images = page.get().data['image']
        # First image is usually the poster
        first_image_url = images[0]['url']
        r = requests.get(first_image_url)
        # Download movie poster image
        i = Image.open(BytesIO(r.content))
        image_file_format = first_image_url.split('.')[-1]
        i.save(folder_name + "/" + str(ranking) + "_" + title + '.' + image_file_format)
        # Append to list of dictionaries
        df_list.append({'ranking': int(ranking),
                        'title': title,
                        'poster_url': first_image_url})
    
    # Not best practice to catch all exceptions but fine for this short script
    except Exception as e:
        print(str(ranking) + "_" + title + ": " + str(e))
        image_errors[str(ranking) + "_" + title] = images

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22


API error: {'code': 'invalidtitle', 'info': 'Bad title "A_Hard_Day%27s_Night_(film)".', 'docref': 'See https://en.wikipedia.org/w/api.php for API usage. Subscribe to the mediawiki-api-announce mailing list at &lt;https://lists.wikimedia.org/mailman/listinfo/mediawiki-api-announce&gt; for notice of API deprecations and breaking changes.'}


22_A_Hard_Day%27s_Night_(film): https://en.wikipedia.org/w/api.php?action=parse&formatversion=2&contentmodel=text&disableeditsection=&disablelimitreport=&disabletoc=&prop=text|iwlinks|parsetree|wikitext|displaytitle|properties&redirects&page=A_Hard_Day%2527s_Night_%28film%29
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
52_Up_(2009_film): 'image'
53
54
55
56
57
58
59
60
61
62
63
64
64_Dr._Strangelove: cannot identify image file <_io.BytesIO object at 0x000001D3AC90B570>
65
66
67
68
69
70
71
72


API error: {'code': 'invalidtitle', 'info': 'Bad title "Rosemary%27s_Baby_(film)".', 'docref': 'See https://en.wikipedia.org/w/api.php for API usage. Subscribe to the mediawiki-api-announce mailing list at &lt;https://lists.wikimedia.org/mailman/listinfo/mediawiki-api-announce&gt; for notice of API deprecations and breaking changes.'}


72_Rosemary%27s_Baby_(film): https://en.wikipedia.org/w/api.php?action=parse&formatversion=2&contentmodel=text&disableeditsection=&disablelimitreport=&disabletoc=&prop=text|iwlinks|parsetree|wikitext|displaytitle|properties&redirects&page=Rosemary%2527s_Baby_%28film%29
73
74
75
76
77
78
79
80
81
82
83
83_Hell_or_High_Water_(film): 'image'
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100


In [22]:
for key in image_errors.keys():
    print(key)

22_A_Hard_Day%27s_Night_(film)
52_Up_(2009_film)
64_Dr._Strangelove
72_Rosemary%27s_Baby_(film)
83_Hell_or_High_Water_(film)


In [24]:
# Inspect unidentifiable images and download them individually
for rank_title, images in image_errors.items():
    if rank_title == '22_A_Hard_Day%27s_Night_(film)':
        url = 'https://upload.wikimedia.org/wikipedia/en/4/47/A_Hard_Days_night_movieposter.jpg'
    if rank_title == '52_Up_(2009_film)':
        url = 'https://upload.wikimedia.org/wikipedia/ar/1/1e/Up_poster_araby.png'
    if rank_title == '64_Dr._Strangelove':
        url = 'https://upload.wikimedia.org/wikipedia/en/e/e6/Dr._Strangelove_poster.jpg'
    if rank_title == '72_Rosemary%27s_Baby_(film)':
        url = 'https://upload.wikimedia.org/wikipedia/en/e/ef/Rosemarys_baby_poster.jpg'
    if rank_title == '83_Hell_or_High_Water_(film)':
        url = 'https://upload.wikimedia.org/wikipedia/en/8/8f/Hell_or_High_Water_film_poster.png'
    title = rank_title[3:]
    df_list.append({'ranking': int(title_list.index(title) + 1),
                    'title': title,
                    'poster_url': url})
    r = requests.get(url)
    # Download movie poster image
    i = Image.open(BytesIO(r.content))
    image_file_format = url.split('.')[-1]
    i.save(folder_name + "/" + rank_title + '.' + image_file_format)

In [25]:
# Create DataFrame from list of dictionaries
df3 = pd.DataFrame(df_list, columns = ['ranking', 'title', 'poster_url'])
df3 = df3.sort_values('title').reset_index(drop=True)
df3

Unnamed: 0,ranking,title,poster_url
0,53,12_Angry_Men_(1957_film),https://upload.wikimedia.org/wikipedia/en/9/91...
1,29,12_Years_a_Slave_(film),https://upload.wikimedia.org/wikipedia/en/5/5c...
2,22,A_Hard_Day%27s_Night_(film),https://upload.wikimedia.org/wikipedia/en/4/47...
3,22,A_Hard_Day%27s_Night_(film),https://upload.wikimedia.org/wikipedia/en/4/47...
4,22,A_Hard_Day%27s_Night_(film),https://d17h27t6h515a5.cloudfront.net/topher/2...
5,60,A_Streetcar_Named_Desire_(1951_film),https://upload.wikimedia.org/wikipedia/commons...
6,48,Alien_(film),https://upload.wikimedia.org/wikipedia/en/c/c3...
7,7,All_About_Eve,https://upload.wikimedia.org/wikipedia/en/2/22...
8,56,All_Quiet_on_the_Western_Front_(1930_film),https://upload.wikimedia.org/wikipedia/commons...
9,89,Apocalypse_Now,https://upload.wikimedia.org/wikipedia/en/c/c2...


In [26]:
df1

Unnamed: 0,title,audience_score,average_rating_audience,number_of_votes_audience,average_rating_critic
0,12 Angry Men (Twelve Angry Men) (1957),97,4.2,103672,9.0
1,12 Years a Slave (2013),90,4.3,138789,8.9
2,A Hard Day's Night (1964),89,3.9,50067,8.5
3,A Streetcar Named Desire (1951),90,4.0,54761,8.7
4,Alien (1979),94,3.9,457186,9.0
5,All About Eve (1950),94,4.3,44564,9.1
6,All Quiet on the Western Front (1930),89,3.9,17768,9.0
7,Apocalypse Now (1979),94,4.1,284606,8.9
8,Argo (2012),90,4.2,207373,8.4
9,Army of Shadows (L'ArmÃ©e des ombres) (1969),94,4.4,7011,8.6


In [27]:
df2

Unnamed: 0,title,review_url,review_text
0,12 Angry Men (Twelve Angry Men) (1957),http://www.rogerebert.com/reviews/great-movie-...,"In form, ""12 Angry Men"" is a courtroom drama. ..."
1,12 Years a Slave (2013),http://www.rogerebert.com/reviews/12-years-a-s...,"After ""Django Unchained"" and Lee Daniels' ""The..."
2,A Hard Day's Night (1964),http://www.rogerebert.com/reviews/great-movie-...,"When it opened in September, 1964, ""A Hard Day..."
3,A Streetcar Named Desire (1951),http://www.rogerebert.com/reviews/a-streetcar-...,Marlon Brando didn't win the Academy Award in ...
4,Alien (1979),http://www.rogerebert.com/reviews/great-movie-...,"At its most fundamental level, ""Alien"" is a mo..."
5,All About Eve (1950),http://www.rogerebert.com/reviews/great-movie-...,Growing older was a smart career move for Bett...
6,Apocalypse Now (1979),http://www.rogerebert.com/reviews/great-movie-...,"Francis Ford Coppola's film ""Apocalypse Now"" w..."
7,Argo (2012),http://www.rogerebert.com/reviews/argo-2012,It's the same the world over. A Hollywood prod...
8,Army of Shadows (L'Armée des ombres) (1969),http://www.rogerebert.com/reviews/great-movie-...,"Jean-Pierre Melville's ""Army of Shadows"" is ab..."
9,Arrival (2016),http://www.rogerebert.com/reviews/arrival-2016,Much has been written about the recent surge o...


In [28]:
df3

Unnamed: 0,ranking,title,poster_url
0,53,12_Angry_Men_(1957_film),https://upload.wikimedia.org/wikipedia/en/9/91...
1,29,12_Years_a_Slave_(film),https://upload.wikimedia.org/wikipedia/en/5/5c...
2,22,A_Hard_Day%27s_Night_(film),https://upload.wikimedia.org/wikipedia/en/4/47...
3,22,A_Hard_Day%27s_Night_(film),https://upload.wikimedia.org/wikipedia/en/4/47...
4,22,A_Hard_Day%27s_Night_(film),https://d17h27t6h515a5.cloudfront.net/topher/2...
5,60,A_Streetcar_Named_Desire_(1951_film),https://upload.wikimedia.org/wikipedia/commons...
6,48,Alien_(film),https://upload.wikimedia.org/wikipedia/en/c/c3...
7,7,All_About_Eve,https://upload.wikimedia.org/wikipedia/en/2/22...
8,56,All_Quiet_on_the_Western_Front_(1930_film),https://upload.wikimedia.org/wikipedia/commons...
9,89,Apocalypse_Now,https://upload.wikimedia.org/wikipedia/en/c/c2...


In [None]:
  #              new= pd.concat([df, df2], axis=1, sort=False)
  #              new

In [99]:
dfinal = pd.concat([df,df1], join='inner',axis=1 ,keys='title')
dfinal

Unnamed: 0_level_0,t,t,t,t,i,i,i,i,i
Unnamed: 0_level_1,ranking,critic_score,title,number_of_critic_ratings,title,audience_score,average_rating_audience,number_of_votes_audience,average_rating_critic
0,53,100,12 Angry Men (Twelve Angry Men) (1957),49,12 Angry Men (Twelve Angry Men) (1957),97,4.2,103672,9.0
1,29,96,12 Years a Slave (2013),316,12 Years a Slave (2013),90,4.3,138789,8.9
2,22,98,A Hard Day's Night (1964),104,A Hard Day's Night (1964),89,3.9,50067,8.5
3,60,98,A Streetcar Named Desire (1951),54,A Streetcar Named Desire (1951),90,4.0,54761,8.7
4,48,97,Alien (1979),104,Alien (1979),94,3.9,457186,9.0
5,7,100,All About Eve (1950),64,All About Eve (1950),94,4.3,44564,9.1
6,56,100,All Quiet on the Western Front (1930),40,All Quiet on the Western Front (1930),89,3.9,17768,9.0
7,89,98,Apocalypse Now (1979),80,Apocalypse Now (1979),94,4.1,284606,8.9
8,40,96,Argo (2012),313,Argo (2012),90,4.2,207373,8.4
9,57,97,Army of Shadows (L'Armée des ombres) (1969),73,Army of Shadows (L'ArmÃ©e des ombres) (1969),94,4.4,7011,8.6


In [109]:
dfinal2 = pd.concat([df2,df3], join='inner',axis=1 ,keys='title')
dfinal2 

Unnamed: 0_level_0,t,t,t,i,i,i
Unnamed: 0_level_1,title,review_url,review_text,ranking,title,poster_url
0,12 Angry Men (Twelve Angry Men) (1957),http://www.rogerebert.com/reviews/great-movie-...,"In form, ""12 Angry Men"" is a courtroom drama. ...",53,12_Angry_Men_(1957_film),https://upload.wikimedia.org/wikipedia/en/9/91...
1,12 Years a Slave (2013),http://www.rogerebert.com/reviews/12-years-a-s...,"After ""Django Unchained"" and Lee Daniels' ""The...",29,12_Years_a_Slave_(film),https://upload.wikimedia.org/wikipedia/en/5/5c...
2,A Hard Day's Night (1964),http://www.rogerebert.com/reviews/great-movie-...,"When it opened in September, 1964, ""A Hard Day...",22,A_Hard_Day%27s_Night_(film),https://upload.wikimedia.org/wikipedia/en/4/47...
3,A Streetcar Named Desire (1951),http://www.rogerebert.com/reviews/a-streetcar-...,Marlon Brando didn't win the Academy Award in ...,22,A_Hard_Day%27s_Night_(film),https://upload.wikimedia.org/wikipedia/en/4/47...
4,Alien (1979),http://www.rogerebert.com/reviews/great-movie-...,"At its most fundamental level, ""Alien"" is a mo...",22,A_Hard_Day%27s_Night_(film),https://d17h27t6h515a5.cloudfront.net/topher/2...
5,All About Eve (1950),http://www.rogerebert.com/reviews/great-movie-...,Growing older was a smart career move for Bett...,60,A_Streetcar_Named_Desire_(1951_film),https://upload.wikimedia.org/wikipedia/commons...
6,Apocalypse Now (1979),http://www.rogerebert.com/reviews/great-movie-...,"Francis Ford Coppola's film ""Apocalypse Now"" w...",48,Alien_(film),https://upload.wikimedia.org/wikipedia/en/c/c3...
7,Argo (2012),http://www.rogerebert.com/reviews/argo-2012,It's the same the world over. A Hollywood prod...,7,All_About_Eve,https://upload.wikimedia.org/wikipedia/en/2/22...
8,Army of Shadows (L'Armée des ombres) (1969),http://www.rogerebert.com/reviews/great-movie-...,"Jean-Pierre Melville's ""Army of Shadows"" is ab...",56,All_Quiet_on_the_Western_Front_(1930_film),https://upload.wikimedia.org/wikipedia/commons...
9,Arrival (2016),http://www.rogerebert.com/reviews/arrival-2016,Much has been written about the recent surge o...,89,Apocalypse_Now,https://upload.wikimedia.org/wikipedia/en/c/c2...


In [114]:
r1 = pd.merge(df, df1, left_index=True, right_index=True ,on='title' )
r1

Unnamed: 0,ranking,critic_score,title,number_of_critic_ratings,audience_score,average_rating_audience,number_of_votes_audience,average_rating_critic
0,53,100,12 Angry Men (Twelve Angry Men) (1957),49,97,4.2,103672,9.0
1,29,96,12 Years a Slave (2013),316,90,4.3,138789,8.9
2,22,98,A Hard Day's Night (1964),104,89,3.9,50067,8.5
3,60,98,A Streetcar Named Desire (1951),54,90,4.0,54761,8.7
4,48,97,Alien (1979),104,94,3.9,457186,9.0
5,7,100,All About Eve (1950),64,94,4.3,44564,9.1
6,56,100,All Quiet on the Western Front (1930),40,89,3.9,17768,9.0
7,89,98,Apocalypse Now (1979),80,94,4.1,284606,8.9
8,40,96,Argo (2012),313,90,4.2,207373,8.4
9,57,97,Army of Shadows (L'Armée des ombres) (1969),73,94,4.4,7011,8.6


In [115]:
r2 = pd.merge(r1, df2, left_index=True, right_index=True ,on='title' )
r2

Unnamed: 0,ranking,critic_score,title,number_of_critic_ratings,audience_score,average_rating_audience,number_of_votes_audience,average_rating_critic,review_url,review_text
0,53,100,12 Angry Men (Twelve Angry Men) (1957),49,97,4.2,103672,9.0,http://www.rogerebert.com/reviews/great-movie-...,"In form, ""12 Angry Men"" is a courtroom drama. ..."
1,29,96,12 Years a Slave (2013),316,90,4.3,138789,8.9,http://www.rogerebert.com/reviews/12-years-a-s...,"After ""Django Unchained"" and Lee Daniels' ""The..."
2,22,98,A Hard Day's Night (1964),104,89,3.9,50067,8.5,http://www.rogerebert.com/reviews/great-movie-...,"When it opened in September, 1964, ""A Hard Day..."
3,60,98,A Streetcar Named Desire (1951),54,90,4.0,54761,8.7,http://www.rogerebert.com/reviews/a-streetcar-...,Marlon Brando didn't win the Academy Award in ...
4,48,97,Alien (1979),104,94,3.9,457186,9.0,http://www.rogerebert.com/reviews/great-movie-...,"At its most fundamental level, ""Alien"" is a mo..."
5,7,100,All About Eve (1950),64,94,4.3,44564,9.1,http://www.rogerebert.com/reviews/great-movie-...,Growing older was a smart career move for Bett...
6,56,100,All Quiet on the Western Front (1930),40,89,3.9,17768,9.0,http://www.rogerebert.com/reviews/great-movie-...,"Francis Ford Coppola's film ""Apocalypse Now"" w..."
7,89,98,Apocalypse Now (1979),80,94,4.1,284606,8.9,http://www.rogerebert.com/reviews/argo-2012,It's the same the world over. A Hollywood prod...
8,40,96,Argo (2012),313,90,4.2,207373,8.4,http://www.rogerebert.com/reviews/great-movie-...,"Jean-Pierre Melville's ""Army of Shadows"" is ab..."
9,57,97,Army of Shadows (L'Armée des ombres) (1969),73,94,4.4,7011,8.6,http://www.rogerebert.com/reviews/arrival-2016,Much has been written about the recent surge o...


In [117]:
r3 = pd.merge(r2, df3, left_index=True, right_index=True ,on='ranking' )
r3 

Unnamed: 0,ranking,critic_score,title_x,number_of_critic_ratings,audience_score,average_rating_audience,number_of_votes_audience,average_rating_critic,review_url,review_text,title_y,poster_url
0,53,100,12 Angry Men (Twelve Angry Men) (1957),49,97,4.2,103672,9.0,http://www.rogerebert.com/reviews/great-movie-...,"In form, ""12 Angry Men"" is a courtroom drama. ...",12_Angry_Men_(1957_film),https://upload.wikimedia.org/wikipedia/en/9/91...
1,29,96,12 Years a Slave (2013),316,90,4.3,138789,8.9,http://www.rogerebert.com/reviews/12-years-a-s...,"After ""Django Unchained"" and Lee Daniels' ""The...",12_Years_a_Slave_(film),https://upload.wikimedia.org/wikipedia/en/5/5c...
2,22,98,A Hard Day's Night (1964),104,89,3.9,50067,8.5,http://www.rogerebert.com/reviews/great-movie-...,"When it opened in September, 1964, ""A Hard Day...",A_Hard_Day%27s_Night_(film),https://upload.wikimedia.org/wikipedia/en/4/47...
3,60,98,A Streetcar Named Desire (1951),54,90,4.0,54761,8.7,http://www.rogerebert.com/reviews/a-streetcar-...,Marlon Brando didn't win the Academy Award in ...,A_Hard_Day%27s_Night_(film),https://upload.wikimedia.org/wikipedia/en/4/47...
4,48,97,Alien (1979),104,94,3.9,457186,9.0,http://www.rogerebert.com/reviews/great-movie-...,"At its most fundamental level, ""Alien"" is a mo...",A_Hard_Day%27s_Night_(film),https://d17h27t6h515a5.cloudfront.net/topher/2...
5,7,100,All About Eve (1950),64,94,4.3,44564,9.1,http://www.rogerebert.com/reviews/great-movie-...,Growing older was a smart career move for Bett...,A_Streetcar_Named_Desire_(1951_film),https://upload.wikimedia.org/wikipedia/commons...
6,56,100,All Quiet on the Western Front (1930),40,89,3.9,17768,9.0,http://www.rogerebert.com/reviews/great-movie-...,"Francis Ford Coppola's film ""Apocalypse Now"" w...",Alien_(film),https://upload.wikimedia.org/wikipedia/en/c/c3...
7,89,98,Apocalypse Now (1979),80,94,4.1,284606,8.9,http://www.rogerebert.com/reviews/argo-2012,It's the same the world over. A Hollywood prod...,All_About_Eve,https://upload.wikimedia.org/wikipedia/en/2/22...
8,40,96,Argo (2012),313,90,4.2,207373,8.4,http://www.rogerebert.com/reviews/great-movie-...,"Jean-Pierre Melville's ""Army of Shadows"" is ab...",All_Quiet_on_the_Western_Front_(1930_film),https://upload.wikimedia.org/wikipedia/commons...
9,57,97,Army of Shadows (L'Armée des ombres) (1969),73,94,4.4,7011,8.6,http://www.rogerebert.com/reviews/arrival-2016,Much has been written about the recent surge o...,Apocalypse_Now,https://upload.wikimedia.org/wikipedia/en/c/c2...
