In [1]:
"""
Script to extract recent game review from IGN
"""

'\nScript to extract recent game review from IGN\n'

In [2]:
import os
os.getcwd()
os.chdir("/Users/natepruitt/Projects/robocritic/")

In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
# Imports
import requests
from settings import settings
from bs4 import BeautifulSoup
from helper_functions.db_connection import connection
from helper_functions.query_functions import get_publisher_by_id, get_game_by_title, get_review_by_game_id_and_publisher_id
from helper_functions.dynamodb_services import GameReleaseDate, Reviews
from helper_functions.igdb_service import IGDBService
from helper_functions.scraping_utils import get_latest_reviews_from_ign, get_game_review_soup_from_ign, get_game_release_date_from_metacritic
from helper_functions.write_functions import create_game, create_review, write_review_pros_to_db, write_review_cons_to_db
from helper_functions.openai_service import OpenAIService
from datetime import datetime
import logging

logger = logging.getLogger()
logger.setLevel(logging.INFO)

The error '2003 (HY000): Can't connect to MySQL server on 'localhost:3306' (61)' occurred


In [5]:
# retrieve the game release date from igdb API
igdb_service = IGDBService(connection)

# Instantiate OpenAI service
openai_service = OpenAIService()

TypeError: __init__() takes 1 positional argument but 2 were given

In [5]:
IGDBService().get_first_release_date_by_title("Mario vs. Donkey Kong")

datetime.date(2024, 2, 16)

In [6]:
# Pull latest games from IGN
latest_reviews = get_latest_reviews_from_ign()

In [8]:
soup = get_game_review_soup_from_ign("https://www.ign.com/articles/helldivers-2-review")
soup.find("a", class_="article-object-link").get("href")

'/games/helldivers-ii'

In [9]:
response = requests.get("https://www.ign.com/games/helldivers-ii", headers={"User-Agent": "Mac Firefox"})
soup = BeautifulSoup(response.content, "html.parser")

In [14]:
soup.find(class_="progressive-image")

<img alt="Helldivers II" class="jsx-2920405963 progressive-image object-image jsx-405688819 expand" decoding="async" src="https://assets-prd.ignimgs.com/2023/05/25/helldivers2-1684972836077.jpg?width=300&amp;crop=1%3A1%2Csmart&amp;auto=webp" srcset="https://assets-prd.ignimgs.com/2023/05/25/helldivers2-1684972836077.jpg?width=300&amp;crop=1%3A1%2Csmart&amp;auto=webp 1x, https://assets-prd.ignimgs.com/2023/05/25/helldivers2-1684972836077.jpg?width=300&amp;crop=1%3A1%2Csmart&amp;auto=webp&amp;dpr=2 2x"/>

In [7]:
game_title = "Prince of Persia: The Lost Crown"
review_publisher = "IGN"
review = Reviews().get_review_by_game_title_and_reviewer_name("Supernormal", review_publisher)
review

{'Pros': {'L': [{'S': 'Beautifully done scares, especially when played alone in a dark room with headphones'},
   {'S': 'Tense atmosphere in the exploration of the creepy house'},
   {'S': 'Randomized events and spooky interactions add to the unsettling experience'},
   {'S': 'Voice recognition system adds an intriguing feature'}]},
 'RoboScore': {'N': '7'},
 'ReviewPublisherName': {'S': 'IGN'},
 'Cons': {'L': [{'S': 'Underwhelming first half with strange choices and dull opening cinematic'},
   {'S': 'Awful dialogue and unconvincing delivery by the character'},
   {'S': 'Clumsy and predictable story with lack of replayability'},
   {'S': 'Voice recognition system not functioning properly'}]},
 'GameTitle': {'S': 'Supernormal'},
 'GameReleaseDate': {'S': '2024-01-08'}}

In [9]:
latest_reviews

[{'title': 'Another Code: Recollection',
  'href': '/articles/another-code-recollection-review'},
 {'title': 'Turnip Boy Robs a Bank',
  'href': '/articles/turnip-boy-robs-a-bank-review'},
 {'title': 'Prince of Persia: The Lost Crown',
  'href': '/articles/prince-of-persia-the-lost-crown-review'},
 {'title': 'Supernormal', 'href': '/articles/supernormal-review'},
 {'title': 'PlayStation 5 Slim', 'href': '/articles/playstation-5-review'}]

In [14]:
# For each review, create a game entry if does not exist
for review in latest_reviews:
    game_title = review['title']
    logger.info(game_title)
    game_release_date = None
    try:
        game_release_date = igdb_service.get_first_release_date_by_title(game_title).strftime("%Y-%m-%d")
    except requests.exceptions.HTTPError as e:
        logger.error(e)
    except ValueError as e:
        logger.error(e)
    if not game_release_date:
        # if pulling release_date from igdb service failed, try metacritic
        try:
            logger.info("Querying metacritic for game release date")
            game_release_date = get_game_release_date_from_metacritic(game_title)
            game_release_date = game_release_date.strftime("%Y-%m-%d")
        except requests.exceptions.HTTPError as e:
            logger.error(e)
        except ValueError as e:
            logger.error(e)

    if not game_release_date:
        # If fail to pull release date from meta critic and API, mark as no release date
        game_release_date = 'NoRelease'
    
    logger.info(game_release_date)
    logger.info(game_release_date + '_' + game_title)

    games_response = GameReleaseDate().write_item(game_release_date + '_' + game_title)
    logger.info(f"Writing review to db for {review['title']}")
    # Scrape review from site. 
    # Would be nice if review already exists NOT to call openAI api. 
    # Check if review exists for review. 
    review_in_db = Reviews().get_review_by_game_title_and_reviewer_name(game_title=game_title, review_publisher_name="IGN")
    if review_in_db:
        # if review does not exist, continue to next in loop.
        logger.info(f"Review found for game {game_title} published by IGN")
        continue

    review_url = "https://www.ign.com" + review['href']
    logger.info(f"Scrape review from {review_url}")
    game_review_content = None
    try:
        game_review_content = get_game_review_content_from_ign(review_url)
    except requests.exceptions.HTTPError as e:
        logger.error(e)
    
    if game_review_content:    
        # Get roboscore from open ai
        score_json = openai_service.assign_score_to_content(game_review_content)
        score = score_json["score"]
            
        # Send content to OpenAI API. 
        pros_cons_json = openai_service.extract_review_pros_and_cons(game_review_content)
    
        # Parse pros cons list
        # Create a list of tuples with review_id, text
        pros = pros_cons_json["pros"]
        cons = pros_cons_json["cons"]
    
        reviews_response = Reviews().write_item(game_title=game_title,
                             review_publisher_name="IGN",
                             game_release_date=game_release_date,
                             list_of_pros=pros,
                             list_of_cons=cons,
                             roboscore=score)
        logger.info(reviews_response)



Item already exists 2024-01-19_Another Code: Recollection
Item already exists 2024-01-18_Turnip Boy Robs a Bank
Item already exists 2024-01-18_Prince of Persia: The Lost Crown
Item already exists 2024-01-08_Supernormal


No game found with title 'PlayStation 5 Slim'
Request failed with url: https://www.metacritic.com/browse/games/playstation-5-slim


404 Client Error: Not Found for url: https://www.metacritic.com/game/playstation-5-slim/
Item already exists NoRelease_PlayStation 5 Slim


In [None]:
game = get_game_by_title(connection, 'The Finals')
print(game)

In [None]:
the_finals_release_date = igdb_service.get_game_release_date_by_title('The Finals')

In [None]:
create_game(connection, {"title": "The Finals", "release_date": the_finals_release_date})

In [None]:
get_latest_reviews_from_ign()

In [None]:
igdb_service.get_game_release_date_by_title("Resident Evil 4 Remake VR")

In [None]:
game = []
game[0]

In [None]:
r = requests.get('http://www.google.com/nothere')
r.status_code

In [None]:
ign_latest_reviews_url = "https://www.ign.com/reviews/games"
header_mapping = {"User-Agent": "Mac Firefox"}

ign_latest_reviews_html = requests.get(
        ign_latest_reviews_url, headers=header_mapping
    )
ign_latest_reivews_soup = BeautifulSoup(
        ign_latest_reviews_html.content, "html.parser"
    )
latest_reviews_a_tag_list = ign_latest_reivews_soup.find(
        class_="main-content"
    ).find_all("a")

filter_latest_reviews_a_tag = filter(
        lambda x: bool(x.get("aria-label")), latest_reviews_a_tag_list
    )

filtered_a_tags = list(filter_latest_reviews_a_tag)


In [None]:
filtered_a_tags[0].find(class_="item-publish-date").text

In [None]:
from datetime import datetime

In [None]:
datetime.strptime("5d ago", "%b %d, %Y").date().strftime("%Y-%m-%d")

In [None]:
# If error thrown trying to convert datetime, parse as '5d ago'
"11d ago".split("d")

In [None]:
datetime.

In [None]:
GameReleaseDate().get_recent_games()