In [79]:
# Purpose: Web scraping Metacritic with Python

# urllib3 is a powerful, user-friendly HTTP client for Python.
import urllib3

# certifi is a Python package that provides Mozilla's carefully curated collection of Root Certificates 
# for validating the trustworthiness of SSL certificates while verifying the identity of TLS hosts.
import certifi

year = 2014
page = 1

In [80]:
# the link to the page we want to scrape
link = f"https://www.metacritic.com/browse/movie/all/all/{year}/metascore/?page={page}"

# create a PoolManager, which handles all details of connection pooling, thread-safety, and certificate management.
http = urllib3.PoolManager(ca_certs=certifi.where())

In [81]:
# request the page and get the response, which is an HTTPResponse object
# the response object has data, status, and headers attributes 
response = http.request('GET', link, headers={'User-Agent': 'Mozilla/5.0'})
datastring = str(response.data, "utf-8")

# print the characters fetched and the status code
print(f"Fetched {len(datastring)} characters from {link}: {response.status}")

Fetched 369325 characters from https://www.metacritic.com/browse/movie/all/all/2014/metascore/?page=1: 200


In [82]:
# re is a module that provides regular expression matching operations
import re

In [83]:
# compile() returns a Regex pattern object
# the pattern we want to match is a movie title in the HTML
movie_title = re.compile(r'<div data-title="(.*?)\"')
# findall() returns all non-overlapping matches of pattern in string, as a list of strings
movie_title_matches = movie_title.findall(datastring)

In [84]:
# compile() returns a Regex pattern object
# the pattern we want to match is a release date in the HTML
release_date = re.compile(r'<span class="u-text-uppercase">\s+(.*)\s+<\/span>')
dates = release_date.findall(datastring)


In [85]:
# the pattern we want to match is a metascore in the HTML
metascore = re.compile(r'<div title="Metascore\s(.*?)"')
metascore_matches = metascore.findall(datastring)

In [86]:
# the pattern we want to match is a thumbnail URL in the HTML
thumbnail = re.compile(r'<img src="(.*?)" height')
thumbnail_matches = thumbnail.findall(datastring)

fixed = []
for link in thumbnail_matches:
    #print(link)
    link = link.replace("&amp;", "&")
    fixed.append(link)
#print(fixed)
thumbnail_matches = fixed

In [87]:
# the pattern we want to match is a description in the HTML
description = re.compile(r'<div class="c-finderProductCard_description"><span>(.*?)<')
description_matches = description.findall(datastring)

In [88]:
# print the number of matches
print(f"Found {len(movie_title_matches)} matches")

Found 24 matches


In [89]:
# pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
# built on top of the Python programming language.
import pandas as pd

# openpyxl is a Python library to read/write Excel 2010 xlsx/xlsm/xltx/xltm files.
import openpyxl

# IPython is a growing project, with increasingly language-agnostic components. IPython 3.x was the last monolithic release of IPython, containing the notebook server, qtconsole, etc. As of IPython 4.0, the language-agnostic parts of the project: the notebook format, message protocol, qtconsole, notebook web application, etc. have moved to new projects under the name Jupyter. IPython itself is focused on interactive Python, part of which is providing a Python kernel for Jupyter.
# IPython.display is a module that contains functions for displaying objects in different formats.
# HTML is a class that is used to display HTML objects in IPython.
# display() is a function that is used to display the specified object.
from IPython.display import HTML

In [90]:
# create a dataframe with the data we scraped
# like a spreadsheet or SQL table, or a dict of Series objects.
# the data we scraped is stored in lists, so we can create a dataframe from a dict of lists.

df = pd.DataFrame()
df['Movie Title'] = movie_title_matches
df['Release Date'] = dates
df['Metascore'] = metascore_matches
df['Thumbnail'] = thumbnail_matches
df['Description'] = description_matches

In [91]:
# render_links() renders the links in the dataframe
# escape() removes the HTML tags from the dataframe
# display() displays the dataframe
display(HTML(df.to_html(render_links=True, escape=False)))

# write the dataframe to an excel file
df.to_excel("movieoutput.xlsx")

Unnamed: 0,Movie Title,Release Date,Metascore,Thumbnail,Description
0,Boyhood,"Jul 11, 2014",100 out of 100,https://www.metacritic.com/a/img/resize/5e2347af172196e6c7076494b7e8ce26fd832f72/catalog/provider/2/2/2-e25db87c9adea62102834d1cc06d3705.jpg?auto=webp&fit=cover&height=132&width=88,"Filmed over 12 years with the same cast, Richard Linklater's Boyhood is a groundbreaking story of growing up as seen through the eyes of a child named Mason (Ellar Coltrane), who literally grows up on screen before our eyes. Starring Ethan Hawke and Patricia Arquette as Mason's parents and newcomer Lorelei Linklater as his sister Samantha, Boyhood charts the rocky terrain of childhood like no other film has before and is both a nostalgic time capsule of the recent past and an ode to growing up and parenting. [IFC Films]"
1,Virunga,"Nov 7, 2014",95 out of 100,https://www.metacritic.com/a/img/resize/7a288f130add9fce9cbf082014411d72e84bf0c3/catalog/provider/2/2/2-bb956027e7bf3858ad9502a0fdb34fc8.jpg?auto=webp&fit=cover&height=132&width=88,"In the forested depths of eastern Congo lies Virunga National Park, one of the most bio-diverse places in the world and home to the last of the mountain gorillas. In this wild, but enchanted environment, a small and embattled team of park rangers - including an ex-child soldier turned ranger, a carer of orphan gorillas and a Belgian conservationist - protect this UNESCO world heritage site from armed militia, poachers and the dark forces struggling to control Congo's rich natural resources. When the newly formed M23 rebel group declares war in May 2012, a new conflict threatens the lives and stability of everyone and everything they've worked so hard to protect."
2,Mr. Turner,"Dec 19, 2014",94 out of 100,https://www.metacritic.com/a/img/resize/ab3c4c74e14b05ba2c1e63a1de83e059fe423921/catalog/provider/2/2/2-693ad75855a16a331e6c7d9c779c5005.jpg?auto=webp&fit=cover&height=132&width=88,"Mr. Turner explores the last quarter century of the life of the great if eccentric British painter J.M.W. Turner (Timothy Spall). Profoundly affected by the death of his father, loved by a housekeeper he takes for granted and occasionally exploits sexually, he forms a close relationship with a seaside landlady with whom he eventually lives incognito in Chelsea, where he dies. Throughout this, he travels, paints, stays with the country aristocracy, visits brothels, is a popular if anarchic member of the Royal Academy of Arts, has himself strapped to the mast of a ship so that he can paint a snowstorm, and is both celebrated and reviled by the public and by royalty."
3,Timbuktu,"Jan 28, 2015",92 out of 100,https://www.metacritic.com/a/img/resize/014a64850de9a9727bb1f33d8dca7fa28359288b/catalog/provider/2/2/2-1c50226452b9cf9bb2cf4a0d8a102a5a.jpg?auto=webp&fit=cover&height=132&width=88,"Not far from Timbuktu, now ruled by the religious fundamentalists, Kidane lives peacefully in the dunes with his wife Satima, his daughter Toya, and Issan, their twelve-year-old shepherd. In town, the people suffer, powerless, from the regime of terror imposed by the Jihadists determined to control their faith. Music, laughter, cigarettes, even soccer have been banned. The women have become shadows but resist with dignity. Every day, the new improvised courts issue tragic and absurd sentences. Kidane and his family are being spared the chaos that prevails in Timbuktu. But their destiny changes when Kidane accidentally kills Amadou, the fisherman who slaughtered his beloved cow. He now has to face the new laws of the foreign occupants."
4,The Look of Silence,"Jul 17, 2015",92 out of 100,https://www.metacritic.com/a/img/resize/3308ba5f681a756dce221ecb579291ef405e4eff/catalog/provider/2/2/2-593bf9be386313fae75ec9373a967662.jpg?auto=webp&fit=cover&height=132&width=88,"The Look of Silence is Joshua Oppenheimer's powerful companion piece to the Oscar®-nominated The Act of Killing. Through Oppenheimer's footage of perpetrators of the 1965 Indonesian genocide, a family of survivors discovers how their son was murdered, as well as the identities of the killers. The documentary focuses on the youngest son, an optometrist named Adi, who decides to break the suffocating spell of submission and terror by doing something unimaginable in a society where the murderers remain in power: he confronts the men who killed his brother and, while testing their eyesight, asks them to accept responsibility for their actions."
5,Leviathan,"Dec 25, 2014",92 out of 100,https://www.metacritic.com/a/img/resize/e4cb569096f438d542a20dfc8f9d768933d5db33/catalog/provider/2/2/2-e1ae4ec86d558cfc4566d79c0d5cc8e9.jpg?auto=webp&fit=cover&height=132&width=88,"Kolia lives in a small town near the Barents Sea in North Russia. He has his own auto-repair shop. His shop stands right next to the house where he lives with his young wife Lilya and his son from a previous marriage. Vadim Shelevyat, the Mayor of the town, wants to take away his business, his house and his land. First he tries buying off Kolia, but Kolia cannot stand losing everything he has, not only the land, but also all the beauty that has surrounded him from the day of his birth. So Vadim Shelevyat starts being more aggressive."
6,Gett: The Trial of Viviane Amsalem,"Feb 13, 2015",90 out of 100,https://www.metacritic.com/a/img/resize/4e654b58a2d6825a098521919adb6f18cc073699/catalog/provider/2/2/2-7630f09f4223a4b01c2a8670d680acc8.jpg?auto=webp&fit=cover&height=132&width=88,"In Israel there is neither civil marriage nor civil divorce. Only rabbis can legitimate a marriage or its dissolution. But this dissolution is only possible with full consent from the husband, who in the end has more power than the judges. Viviane Amsalem (Ronit Elkabetz) has been applying for divorce for three years. But her husband Elisha (Simon Abkarian) will not agree. His cold intransigence, Viviane's determination to fight for her freedom, and the ambiguous role of the judges shape a procedure in which tragedy vies with absurdity, and everything is brought out for judgment, apart from the initial request."
7,The Overnighters,"Oct 10, 2014",89 out of 100,https://www.metacritic.com/a/img/resize/9206388eaa503708b594d2cea409fcaa56c8f932/catalog/provider/2/2/2-8ac48e7b0ce9c1dea1c4ea65a80bc6e8.jpg?auto=webp&fit=cover&height=132&width=88,"In the tiny town of Williston, North Dakota, tens of thousands of unemployed hopefuls show up with dreams of honest work and a big paycheck under the lure of the oil boom. However, busloads of newcomers chasing a broken American Dream step into the stark reality of slim work prospects and nowhere to sleep. The town lacks the infrastructure to house the overflow of migrants, even for those who do find gainful employment. Over at Concordia Lutheran Church, Pastor Jay Reinke is driven to deliver the migrants some dignity. Night after night, he converts his church into a makeshift dorm and counseling center, opening the church’s doors to allow the “Overnighters” (as he calls them) to stay for a night, a week or longer. [Drafthouse Films]"
8,The Winding Stream,"Dec 16, 2015",89 out of 100,https://www.metacritic.com/a/img/resize/6818e47df1d8038477e0579ae655b4c76fd0bac1/catalog/provider/2/2/2-8e96893c99181e1c9a8464b50c56d4f2.jpg?auto=webp&fit=cover&height=132&width=88,"The Winding Stream tells the story of the American roots music dynasty, the Carters and the Cashes. Starting with the Original Carter Family (A.P., Sara, Maybelle), the film traces the ebb and flow of their influence, the transformation of that act into the Carter Sisters, the marital alliance with legend Johnny Cash and the efforts of present-day family to keep this legacy alive."
9,"Two Days, One Night","Dec 24, 2014",89 out of 100,https://www.metacritic.com/a/img/resize/8df86d786439700461e3c0532004bb1956f92438/catalog/provider/2/2/2-bee59c823ad3af71a644ba8938fe90b9.jpg?auto=webp&fit=cover&height=132&width=88,"Sandra (Marion Cotillard) has just returned to work after recovering from a serious bout with depression. Realizing that the company can operate with one fewer employee, management tells Sandra she is to be let go. After learning that her co-workers will vote to decide her fate on Monday morning, Sandra races against time over the course of the weekend, often with the help of her husband, to convince each of her fellow employees to sacrifice their much-needed bonuses so she can keep her job. With each encounter, Sandra is brought into a different world with unexpected results. [IFC Films]"
