In [1]:
"""
Script to extract recent game review from IGN
"""

'\nScript to extract recent game review from IGN\n'

In [2]:
import os
os.getcwd()
os.chdir("/Users/natepruitt/Projects/robocritic/")

In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
# Imports
import requests
from settings import settings
from bs4 import BeautifulSoup
from helper_functions.db_connection import connection
from helper_functions.query_functions import get_publisher_by_id, get_game_by_title, get_review_by_game_id_and_publisher_id, GameReleaseDate, Reviews
from helper_functions.igdb_service import IGDBService
from helper_functions.scraping_utils import get_latest_reviews_from_ign, get_game_review_content_from_ign
from helper_functions.write_functions import create_game, create_review, write_review_pros_to_db, write_review_cons_to_db
from helper_functions.openai_service import OpenAIService
from datetime import datetime

Connection to MySQL DB successful


In [5]:
# retrieve the game release date from igdb API
igdb_service = IGDBService(connection)

# Instantiate OpenAI service
openai_service = OpenAIService()

In [6]:
# Pull latest games from IGN
publisher = get_publisher_by_id(connection, settings.ign_publisher_id)
latest_reviews = get_latest_reviews_from_ign()

In [None]:
game_title = "Prince of Persia: The Lost Crown"
review_publisher = "IGN"
review = Reviews().get_review_by_game_title_and_reviewer_name(game_title, review_publisher)
review

In [8]:
latest_reviews[0:2]

[{'title': 'Prince of Persia: The Lost Crown',
  'href': '/articles/prince-of-persia-the-lost-crown-review'},
 {'title': 'Supernormal', 'href': '/articles/supernormal-review'}]

In [12]:
# For each review, create a game entry if does not exist
for review in latest_reviews[0:2]:
    game_title = review['title']
    print(game_title)
    # Problem! Everytime I call this section of code, and there is NO game_release_date, it will result in a NEW entry into the 'Games'
    # table, since its creating a new date which, in combination with the game title, will create a new parition key.
    # Can I write a query function that searches the sort key?
    # Could check if it begins with previous date + game. But that no good. 
    # If a review exists, could assume it is in games. That is probably fair. Do not execute at all if a review exists for the specific
    # review + publisher. I might be able to check with 'CONTAINS' operator in query.
    try:
        game_release_date = igdb_service.get_first_release_date_by_title(game_title)
    except ValueError as e:
        print(e)
        game_release_date = datetime.today().date().strftime("%Y-%m-%d")
    
    print(game_release_date)
    print(game_release_date + '_' + game_title)

    games_response = GameReleaseDate().write_item(game_release_date + '_' + game_title)
    print(games_response)
    print(f"Writing review to db for {review['title']}")
    # Scrape review from site. 
    # Would be nice if review already exists NOT to call openAI api. 
    # Check if review exists for review. 
    review = Reviews().get_review_by_game_title_and_reviewer_name(game_title=game_title, review_publisher_name="IGN")
    
    if review:
        # if review does not exist, continue to next in loop.
        print(f"Review found for game {game_title} published by IGN")
        continue

    review_url = ign_url + review['href']
    print(f"Scrape review from {review_url}")
    game_review_content = get_game_review_content_from_ign(review_url)

    
    # Get roboscore from open ai
    score_json = openai_service.assign_score_to_content(game_review_content)
    score = score_json["score"]
        
    # Send content to OpenAI API. 
    pros_cons_json = openai_service.extract_review_pros_and_cons(game_review_content)

    # Parse pros cons list
    # Create a list of tuples with review_id, text
    pros = pros_cons_json["pros"]
    cons = pros_cons_json["cons"]

    reviews_response = Reviews().write_item(game_title=game_title,
                         review_publisher_name="IGN",
                         game_release_date=game_release_date,
                         list_of_pros=pros,
                         list_of_cons=cons,
                         roboscore=score)
    print(reviews_response)
    # Create review in DB
    # db_review = {"game_id": game_id, "url": review_url, "publisher_id": settings.ign_publisher_id, "robo_score": score}
    # print(f"Create review in db out of review object {db_review}")
    # review_id = create_review(connection, db_review)



Prince of Persia: The Lost Crown
2024-01-16
2024-01-16_Prince of Persia: The Lost Crown
Item already exists 2024-01-16_Prince of Persia: The Lost Crown
{'Error': {'Message': 'The conditional request failed', 'Code': 'ConditionalCheckFailedException'}, 'ResponseMetadata': {'RequestId': '66I4OOGITT6C2PS5UM5598O82JVV4KQNSO5AEMVJF66Q9ASUAAJG', 'HTTPStatusCode': 400, 'HTTPHeaders': {'server': 'Server', 'date': 'Tue, 16 Jan 2024 14:09:44 GMT', 'content-type': 'application/x-amz-json-1.0', 'content-length': '120', 'connection': 'keep-alive', 'x-amzn-requestid': '66I4OOGITT6C2PS5UM5598O82JVV4KQNSO5AEMVJF66Q9ASUAAJG', 'x-amz-crc32': '396270901'}, 'RetryAttempts': 0}, 'message': 'The conditional request failed'}
Writing review to db for Prince of Persia: The Lost Crown
Review found for game Prince of Persia: The Lost Crown published by IGN
Supernormal


KeyError: 'first_release_date'

In [None]:
game = get_game_by_title(connection, 'The Finals')
print(game)

In [None]:
the_finals_release_date = igdb_service.get_game_release_date_by_title('The Finals')

In [None]:
create_game(connection, {"title": "The Finals", "release_date": the_finals_release_date})

In [None]:
get_latest_reviews_from_ign()

In [None]:
igdb_service.get_game_release_date_by_title("Resident Evil 4 Remake VR")

In [None]:
game = []
game[0]

In [None]:
r = requests.get('http://www.google.com/nothere')
r.status_code