In [1]:
import requests, csv, pandas as pd, pprint, time
from bs4 import BeautifulSoup
import lxml, html5lib
import re
from IPython.display import display, HTML

data_dict = {
    "name": [],
    "date": [],
    "platform": [],
    "score": [],
    "url": [],
    "ratings": [],
    "userscore": [],
}  # Data Structure


def webpage(
    pageNum, system
):  # function that navigates the metacritic SRP(Search Results Pages) based on the page number
    url = (
        "https://www.metacritic.com/browse/games/score/metascore/90day/"
        + str(system)
        + "/filtered?view=condensed&page="
        + str(pageNum)
    )
    userAgent = {"User-agent": "Mozilla/5.0"}
    response = requests.get(url, headers=userAgent)
    return response


def numberPages(
    response,
):  # Helper Function that determines how many pages are in a SRP to know how many times to run scrapper function
    soup = BeautifulSoup(response.text, "html.parser")
    pages = soup.find_all("li", {"class": "page last_page"})
    try:
        pagesCleaned = pages[0].find("a", {"class": "page_num"}).text
    except:
        pagesCleaned = "1"
    return pagesCleaned


def scrapper(num_loops, content):
    tblnum = 0
    while tblnum < num_loops:
        # get Game name
        table_rows = content[tblnum].find_all("tr")
        for tr in table_rows:
            td = tr.find_all("td")
            for a in td[1].find_all("a", {"class": "title"}):
                data_dict["name"].append(a.find("h3").text)

        # get Game release date
        table_rows = content[tblnum].find_all("tr")
        for tr in table_rows:
            td = tr.find_all("td")
            for date in td[1].find_all("span", {"class": ""}):
                data_dict["date"].append(date.text)

        # get platform
        table_rows = content[tblnum].find_all("tr")
        for tr in table_rows:
            td = tr.find_all("td")
            for platform in td[1].find_all("span", {"class": "data"}):
                data_dict["platform"].append(platform.text.strip())

        # get Game score
        table_rows = content[tblnum].find_all("tr")
        for tr in table_rows:
            td = tr.find_all("td")
            for user in td[0].find_all("div", {"class": "metascore_w"}):
                data_dict["score"].append(user.text.strip())

        # getting game url
        table_rows = content[tblnum].find_all("tr")
        for tr in table_rows:
            td = tr.find_all("td")
            for a in td[1].find_all("a", {"class": "title"}, href=True):
                data_dict["url"].append(a["href"])
                # print(a['href'])
                game_soup = BeautifulSoup(
                    requests.get(
                        "https://www.metacritic.com" + str(a["href"]) + "/user-reviews",
                        headers={"User-agent": "Mozilla/5.0"},
                    ).text,
                    "html.parser",
                )
                game_ratings = game_soup.find("strong", text=re.compile("Ratings"))
                if game_ratings:
                    game_ratings = (
                        game_ratings.get_text().strip().replace(" Ratings", "")
                    )
                else:
                    game_ratings = "0"
                # print(game_ratings)
                data_dict["ratings"].append(game_ratings)

        # get Game userscore
        table_rows = content[tblnum].find_all("tr")
        for tr in table_rows:
            td = tr.find_all("td")
            for score in td[1].find_all("div", {"class": "metascore_w"}):
                data_dict["userscore"].append(score.text)
        tblnum += 1


def pages(
    lastPageNum, system
):  # Function that returns the html(code) and initiates the web scrapper
    currentPage = 0
    while currentPage < int(lastPageNum):
        url = url = (
            "https://www.metacritic.com/browse/games/score/metascore/90day/"
            + str(system)
            + "/filtered?view=condensed&page="
            + str(currentPage)
        )
        userAgent = {"User-agent": "Mozilla/5.0"}
        response = requests.get(url, headers=userAgent)
        soup = BeautifulSoup(response.text, "html.parser")
        content = soup.find_all("table")

        num_loops = len(content)
        # print(num_loops)
        scrapper(num_loops, content)
        # print(data_dict)
        currentPage += 1
        time.sleep(6)


def main():
    #systems = ["switch", "ps4", "ps5"]
    systems = ["all"]
    for system in systems:
        numPage = (numberPages(webpage(0,system)))
        pages(int(numPage), system)
        time.sleep(5)
    xData = pd.DataFrame.from_dict(data_dict)
    xData.to_csv("mc_90_days.csv")


main()

video_games = pd.DataFrame.from_dict(data_dict)

video_games = video_games[~video_games["userscore"].isin(["tbd"])]
video_games = video_games[~video_games["score"].isin(["tbd"])]
video_games = video_games[~video_games["ratings"].isin(["tbd"])]

video_games['ratings'] = video_games['ratings'].astype(float)
video_games['score'] = video_games['score'].astype(float)
video_games['userscore'] = video_games['userscore'].astype(float)

d = {'date':'date', 'platform':'platform', 'score':'score', 'ratings':'ratings','userscore':'userscore'}
video_games=video_games.groupby('name').agg({'date':'min', 'platform':list, 'score':'mean', 'ratings':'sum','userscore':'mean'}).rename(columns=d).round(1)

video_games = video_games[video_games["userscore"] >= 7.5]
video_games = video_games[video_games["score"] >= 75]
video_games = video_games[video_games["ratings"] >= 150]

display(HTML(video_games.sort_values(['name']).to_html()))


Unnamed: 0_level_0,date,platform,score,ratings,userscore
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bravely Default II,"February 26, 2021",[Switch],76.0,233.0,8.1
Disco Elysium: The Final Cut,"March 30, 2021","[PC, PlayStation 5, PlayStation 4]",90.0,227.0,7.7
It Takes Two,"March 26, 2021","[PlayStation 4, Xbox Series X, PlayStation 5, PC]",88.5,771.0,8.9
Little Nightmares II,"February 11, 2021","[PC, Switch, PlayStation 4, Xbox One]",81.5,475.0,8.1
Monster Hunter Rise,"March 26, 2021",[Switch],88.0,510.0,9.1
NieR Replicant ver.1.22474487139...,"April 23, 2021","[PlayStation 4, PC]",81.5,187.0,8.0
Super Mario 3D World + Bowser's Fury,"February 12, 2021",[Switch],89.0,370.0,8.5
