In [1]:
"""
Script to extract recent game review from IGN
"""

'\nScript to extract recent game review from IGN\n'

In [2]:
import os
os.getcwd()
os.chdir("/Users/natepruitt/Projects/robocritic/")

In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
# Imports
import requests
from settings import settings
from bs4 import BeautifulSoup
from helper_functions.db_connection import connection
from helper_functions.query_functions import get_publisher_by_id, get_game_by_title, get_review_by_game_id_and_publisher_id
from helper_functions.igdb_service import IGDBService
from helper_functions.scraping_utils import get_latest_reviews_from_ign, get_game_review_content_from_ign
from helper_functions.write_functions import create_game, create_review, write_review_pros_to_db, write_review_cons_to_db
from helper_functions.openai_service import OpenAIService
from datetime import datetime

Connection to MySQL DB successful


In [5]:
# retrieve the game release date from igdb API
igdb_service = IGDBService(connection)

# Instantiate OpenAI service
openai_service = OpenAIService()

In [6]:
# Pull latest games from IGN
publisher = get_publisher_by_id(connection, settings.ign_publisher_id)
latest_reviews = get_latest_reviews_from_ign()

In [9]:
latest_reviews[1:2]

[{'title': 'Fortnite Rocket Racing',
  'href': '/articles/fortnite-rocket-racing-review'}]

In [11]:
ign_url = "https://www.ign.com"
fortnite = ign_url + "/articles/fortnite-rocket-racing-review"
fortnite

'https://www.ign.com/articles/fortnite-rocket-racing-review'

In [14]:
# For each review, create a game entry if does not exist
for review in latest_reviews[1:2]:
    game_title = review['title']
    game = get_game_by_title(connection, game_title)
    if not game:
        # If game not exists, write game then write review
        try:
            game_release_date = igdb_service.get_first_release_date_id_by_title(game_title)
        except ValueError as e:
            print(e)
            game_release_date = datetime.today().date()
        game_id = create_game(connection, {"title": game_title, "release_date": game_release_date})
    # Check if review exists:
    game_id = game[0] if game else game_id
    db_review = get_review_by_game_id_and_publisher_id(connection, game_id=game_id, publisher_id=settings.ign_publisher_id)
    if not db_review:
        # write review to database
        print(f"Writing review to db for {review['title']}")
        # Scrape review from site. 
        review_url = ign_url + review['href']
        print(f"Scrape review from {review_url}")
        game_review_content = get_game_review_content_from_ign(review_url)

        # Get roboscore from open ai
        score_json = openai_service.assign_score_to_content(game_review_content)
        score = score_json["score"]
        
        # Create review in DB
        db_review = {"game_id": game_id, "url": review_url, "publisher_id": settings.ign_publisher_id, "robo_score": score}
        print(f"Create review in db out of review object {db_review}")
        review_id = create_review(connection, db_review)

        # Send content to OpenAI API. 
        pros_cons_json = openai_service.extract_review_pros_and_cons(game_review_content)

        # Parse pros cons list
        # Create a list of tuples with review_id, text
        pros = list(map(lambda x: (review_id, x), pros_cons_json["pros"]))
        cons = list(map(lambda x: (review_id, x), pros_cons_json["cons"]))

        write_review_pros_to_db(connection, pros)
        write_review_cons_to_db(connection, cons)
    else:
        print(f"Review in db: {db_review}")

Review in db: (5, 'https://www.ign.com/articles/fortnite-rocket-racing-review', Decimal('5.0'), None, 31, 1, None, datetime.datetime(2024, 1, 4, 13, 13, 7))


In [15]:
game = get_game_by_title(connection, 'The Finals')
print(game)

(5, 'The Finals', datetime.date(2023, 12, 8), datetime.datetime(2023, 12, 19, 12, 46, 51))


In [8]:
the_finals_release_date = igdb_service.get_game_release_date_by_title('The Finals')

In [9]:
create_game(connection, {"title": "The Finals", "release_date": the_finals_release_date})

5

In [9]:
get_latest_reviews_from_ign()

[{'title': 'The Finals', 'href': '/articles/the-finals-review'},
 {'title': 'Warhammer 40,000: Rogue Trader',
  'href': '/articles/warhammer-40000-rogue-trader-review'},
 {'title': 'Resident Evil 4 Remake VR',
  'href': '/articles/resident-evil-4-remake-vr-review'},
 {'title': 'Granblue Fantasy Versus Rising',
  'href': '/articles/granblue-fantasy-versus-rising-review'},
 {'title': 'Cookie Cutter', 'href': '/articles/cookie-cutter-review'},
 {'title': 'House Flipper 2', 'href': '/articles/house-flipper-2-review'},
 {'title': 'Fortnite Festival', 'href': '/articles/fortnite-festival-review'},
 {'title': 'Asgard’s Wrath 2', 'href': '/articles/asgards-wrath-2-review'},
 {'title': 'Lego Fortnite', 'href': '/articles/lego-fortnite-review'},
 {'title': 'The Day Before', 'href': '/articles/the-day-before-review'}]

In [6]:
igdb_service.get_game_release_date_by_title("Resident Evil 4 Remake VR")

404 Client Error: Not Found for url: https://api.igdb.com/v4/game
Request failed with data: fields name, genres, platforms, release_dates; where name = "Resident Evil 4 Remake VR";
400 Client Error: Bad Request for url: https://api.igdb.com/v4/release_dates
Request failed with data: fields date; where id = None;


In [7]:
game = []
game[0]

IndexError: list index out of range

In [17]:
r = requests.get('http://www.google.com/nothere')
r.status_code

404