# Metacritic:
In this notebook we using crawling on the https://www.metacritic.com/ website.
We are using the combination of selenium and BeautifulSoup.

In [1]:
import pandas as pd
import requests
import json
from bs4 import BeautifulSoup

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

# Crawling preparations:
* meta_critic_list_years   -  list of URLs relevant for each year. (2009-2021).
* meta_critic_detail_list  -  dictionary to save the values of each game of each year (key=year, value=game details)

In [2]:
#base_url is used to create URL for each year
base_url = 'https://www.metacritic.com/browse/games/score/metascore/year/all/filtered?year_selected=year_str&distribution=&sort=desc&view=detailed'

#creating a list of URLs for each year
year_url = lambda x:base_url.replace("year_str", str(x))
meta_critic_list_years = [year_url(i) for i in range(2009,2022)]

meta_critic_list_years = []
meta_critic_detail_list = {'2009':[], '2010':[], '2011':[], '2012':[], '2013':[], '2014':[], '2015':[], '2016':[], '2017':[], '2018':[], '2019':[], '2020':[], '2021':[]}

for i in range (2009, 2022):
    complete_url = 'https://www.metacritic.com/browse/games/score/metascore/year/all/filtered?year_selected=' + "{0}".format(i) + '&distribution=&sort=desc&view=detailed'
    meta_critic_list_years.append(complete_url)

# Crawling execution:
Crawling https://www.metacritic.com/ using BeautifulSoup AND selenium.  
The crawler iterates through urls of each year and finds out how many pages that year has (outer loop).  
The crawler iterates throught all the pages of the year and scrapes the information needed (inner loop).

In [3]:
#Setting up selenium

#install service for selenium access
s=Service(ChromeDriverManager().install())

#toggle needed options
options = Options()
options.headless = True #open browser unseen

#create driver
driver = webdriver.Chrome(service=s, options=options)
#---------------------------------------------------------------------------
    
for mc_years, mc_details in zip(meta_critic_list_years, meta_critic_detail_list):
    driver.get(mc_years)
    ps = driver.page_source
    soup = BeautifulSoup(ps, 'html.parser')
    page_numbers = int(soup.find("li", class_ = "page last_page").find("a").string)
    for i in range (0, page_numbers):
        driver.get(mc_years + '&page=' + "{0}".format(i))
        ps = driver.page_source
        soup = BeautifulSoup(ps, 'html.parser')
        meta_critic_detail_list[mc_details].append(soup.find_all(class_ = "clamp-summary-wrap"))

driver.quit()



Current google-chrome version is 96.0.4664
Get LATEST chromedriver version for 96.0.4664 google-chrome
Trying to download new driver from https://chromedriver.storage.googleapis.com/96.0.4664.45/chromedriver_win32.zip
Driver has been saved in cache [C:\Users\Daniel\.wdm\drivers\chromedriver\win32\96.0.4664.45]


In [4]:
mc_name = []
mc_score = []

for year in meta_critic_detail_list:
    for i in range(0, len(meta_critic_detail_list[year])):
        for j in range (0, len(meta_critic_detail_list[year][i])):
            mc_name.append(meta_critic_detail_list[year][i][j].find("a", class_ = "title").string)
            check = meta_critic_detail_list[year][i][j].find("div", class_ = "metascore_w large game positive")
            if (check == None):
                check = meta_critic_detail_list[year][i][j].find("div", class_ = "metascore_w large game negative")
                if (check == None):
                    check = meta_critic_detail_list[year][i][j].find("div", class_ = "metascore_w large game mixed")
                    if(check == None):
                        mc_score.append(None)
                    else:
                        mc_score.append(check.string)
                else:
                    mc_score.append(check.string)
            else:
                mc_score.append(check.string)

In [5]:
metacritic_df = pd.DataFrame({'name':mc_name, 'score':mc_score})

In [7]:
metacritic_df

Unnamed: 0,name,score
0,Uncharted 2: Among Thieves,96
1,Call of Duty: Modern Warfare 2,94
2,Street Fighter IV,94
3,Call of Duty: Modern Warfare 2,94
4,Grand Theft Auto: Chinatown Wars,93
...,...,...
12090,Taxi Chaos,42
12091,Werewolf: The Apocalypse - Earthblood,42
12092,Balan Wonderworld,38
12093,Balan Wonderworld,36


In [8]:
metacritic_df.to_csv("metacritic.csv")