In [1]:
2+2

4

In [2]:
# https://partner.steamgames.com/doc/store/getreviews

In [3]:
import requests
import json
import csv
import sys
import os
import time  # Import time for the epoch timestamp
import re  # Import re for regex


# Construct the path to the scripts directory
script_path = os.path.abspath('../reviews-assistant/scripts')

# Add the path to sys.path
if script_path not in sys.path:
    sys.path.append(script_path)

import minsearch


class SteamReviewFetcher:
    def __init__(self, appids_with_titles, filter="all", language="english", day_range=30, review_type="all", purchase_type="all"):
        """
        Initializes the SteamReviewFetcher with the required parameters.

        :param appids_with_titles: List of tuples containing Steam application IDs and their corresponding titles.
        :param filter: Type of review filter.
        :param language: Language of the reviews.
        :param day_range: Number of days to consider for reviews.
        :param review_type: Type of review (all or specific).
        :param purchase_type: Type of purchase.
        """
        self.base_url = "https://store.steampowered.com/appreviews/"
        self.appids_with_titles = appids_with_titles if isinstance(appids_with_titles, list) else [appids_with_titles]
        self.filter = filter
        self.language = language
        self.day_range = day_range
        self.review_type = review_type
        self.purchase_type = purchase_type
        self.data_dir = os.path.abspath('../reviews-assistant/data/reviews')

        # Ensure the data directory exists
        os.makedirs(self.data_dir, exist_ok=True)

    def _construct_url(self, appid):
        return f"{self.base_url}{appid}?json=1"

    def _fetch_reviews(self, appid, num_reviews):
        url = self._construct_url(appid)
        params = {
            "filter": self.filter,
            "language": self.language,
            "day_range": self.day_range,
            "review_type": self.review_type,
            "purchase_type": self.purchase_type,
            "num_per_page": num_reviews
        }
        response = requests.get(url, params=params)
        response.raise_for_status()
        return response.json()

    def get_reviews(self, num_reviews=20, print_reviews=True):
        all_reviews = {}
        for appid, title in self.appids_with_titles:
            review_data = self._fetch_reviews(appid, num_reviews)
            reviews = review_data.get("reviews", [])
            all_reviews[appid] = (title, reviews)  # Store title along with reviews
            
            if print_reviews:
                self.print_first_last_reviews(appid, title, reviews)

        return all_reviews

    def print_first_last_reviews(self, appid, title, reviews):
        total_reviews = len(reviews)
        if total_reviews == 0:
            print(f"No reviews found for App ID {appid} ({title}).")
            return

        print(f"\nFirst 5 Reviews for App ID {appid} ({title}):")
        for review in reviews[:5]:
            self._print_review(review)

        print(f"\nLast 5 Reviews for App ID {appid} ({title}):")
        for review in reviews[-5:]:
            self._print_review(review)

    def _print_review(self, review):
        print(f"Author: {review['author']['steamid']}")
        print(f"Review: {review.get('review', 'No text')}")
        print(f"Rating: {'Positive' if review['voted_up'] else 'Negative'}")
        print(f"Timestamp: {review['timestamp_created']}")
        print("-" * 79)

    def _extract_columns_to_save(self, reviews, appid, title):
        extracted_reviews = []
        current_time = int(time.time())  # Get current epoch time
        for review in reviews:
            review_dict = {
                "appid": appid,  # Move appid to the first field
                "timestamp_query": current_time,  # Move timestamp_query to the second field
                "title": title,
            }
            for column in ["recommendationid", "author.steamid", "author.playtimeforever",
                           "author.playtime_last_two_weeks", "author.playtime_at_review",
                           "author.last_played", "language", "review", "timestamp_created", "timestamp_updated"]:
                if column.split('.')[0] == 'author':
                    nested_column = column.split('.')[1]
                    review_dict[column] = review['author'].get(nested_column)
                else:
                    review_dict[column] = review.get(column)
            extracted_reviews.append(review_dict)
        return extracted_reviews

    def save_reviews(self, all_reviews, filename_prefix, format):
        for appid, (title, reviews) in all_reviews.items():
            if not reviews:
                print(f"No reviews to save for App ID {appid} ({title}).")
                continue

            safe_title = self._sanitize_title(title)  # Sanitize title for filename
            lower_safe_title = safe_title.lower()  # Convert to lowercase for the filename
            if format == "csv":
                self._save_reviews_as_csv(appid, lower_safe_title, reviews, filename_prefix, title)
            elif format == "json":
                self._save_reviews_as_json(appid, lower_safe_title, reviews, filename_prefix, title)
            else:
                print("Invalid format. Please specify 'csv' or 'json'.")

    def _sanitize_title(self, title):
        """Remove special characters from the title for safe filename."""
        return re.sub(r'[<>:"/\\|?*]', '', title).replace("'", "").replace(" ", "_")

    def _save_reviews_as_csv(self, appid, lower_safe_title, reviews, filename_prefix, title):
        keys = ['appid', 'timestamp_query', 'title', 'steamid', 'review', 'voted_up', 'timestamp_created']  # Update the keys
        filename = os.path.join(self.data_dir, f"{filename_prefix}_{lower_safe_title}_{appid}_reviews.csv")
        with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(keys)  # Write the header
            for review in reviews:
                row = [
                    appid,  # App ID as the first field
                    int(time.time()),  # Current epoch timestamp as the second field
                    title,  # Keep the original title
                    review['author']['steamid'],
                    review.get('review', 'No text'),
                    'Positive' if review['voted_up'] else 'Negative',
                    review['timestamp_created'],
                ]
                writer.writerow(row)
        print(f"Reviews for App ID {appid} ({title}) saved to {filename}")

    def _save_reviews_as_json(self, appid, lower_safe_title, reviews, filename_prefix, title):
        filename = os.path.join(self.data_dir, f"reviews_{lower_safe_title}.json")
        extracted_reviews = self._extract_columns_to_save(reviews, appid, title)  # Use original title here
        with open(filename, 'w', encoding='utf-8') as jsonfile:
            json.dump(extracted_reviews, jsonfile, indent=4)
        print(f"Reviews for App ID {appid} ({title}) saved to {filename}")


# Example usage
if __name__ == "__main__":
    appids_with_titles = [
        ("2322010", "God of War: Ragnarok"),
        ("1086940", "Baldur's Gate 3"),
        ("1680880", "Forspoken"),
        ("1496790", "Gotham Knights"),
        ("315210", "Suicide Squad: Kill the Justice League"),
        ("2443720", "Concord"),
        ("2208920", "Assassin's Creed Valhalla"),
        ("1817070", "Marvel’s Spider-Man Remastered"),
        ("1832040", "Flintlock: The Siege of Dawn"),
    ]  # List of tuples containing app IDs and their titles
    review_fetcher = SteamReviewFetcher(appids_with_titles)

    num_reviews = 2000  # Specify the number of reviews to fetch
    print_reviews_flag = False  # Set to False to turn off printing reviews

    all_reviews = review_fetcher.get_reviews(num_reviews, print_reviews=print_reviews_flag)

    # Save reviews to file
    review_fetcher.save_reviews(all_reviews, "reviews", format="json")


Reviews for App ID 2322010 (God of War: Ragnarok) saved to /mnt/c/Users/KonuTech/llm-zoomcamp-capstone-01/reviews-assistant/data/reviews/reviews_god_of_war_ragnarok.json
Reviews for App ID 1086940 (Baldur's Gate 3) saved to /mnt/c/Users/KonuTech/llm-zoomcamp-capstone-01/reviews-assistant/data/reviews/reviews_baldurs_gate_3.json
Reviews for App ID 1680880 (Forspoken) saved to /mnt/c/Users/KonuTech/llm-zoomcamp-capstone-01/reviews-assistant/data/reviews/reviews_forspoken.json
Reviews for App ID 1496790 (Gotham Knights) saved to /mnt/c/Users/KonuTech/llm-zoomcamp-capstone-01/reviews-assistant/data/reviews/reviews_gotham_knights.json
Reviews for App ID 315210 (Suicide Squad: Kill the Justice League) saved to /mnt/c/Users/KonuTech/llm-zoomcamp-capstone-01/reviews-assistant/data/reviews/reviews_suicide_squad_kill_the_justice_league.json
Reviews for App ID 2443720 (Concord) saved to /mnt/c/Users/KonuTech/llm-zoomcamp-capstone-01/reviews-assistant/data/reviews/reviews_concord.json
Reviews for 

# Ingestion

In [4]:
# Directory containing the data files
data_dir = os.path.abspath('../reviews-assistant/data/reviews')

# Initialize an empty list to hold all reviews
reviews = []

# List objects in the directory
objects_in_directory = os.listdir(data_dir)

# Iterate over the files in the directory
for obj in objects_in_directory:
    if obj.endswith('.json'):  # Check if the file is a JSON file
        file_path = os.path.join(data_dir, obj)
        with open(file_path, 'r', encoding='utf-8') as jsonfile:
            # Load the reviews from the JSON file
            file_reviews = json.load(jsonfile)
            reviews.extend(file_reviews)  # Append reviews to the main list
# Print the first i reviews
i = 2  # Change this to print more reviews if needed
for review in reviews[:i]:
    print(f"Author ID: {review['author.steamid']}")
    print(f"Review: {review.get('review', 'No text')}")
    print(f"Timestamp Created: {review['timestamp_created']}")
    print("-" * 79)

Author ID: 76561199211892974
Review: ⠀⠀⠀⠀⠀⠀⠀⠀⠀
[h2] Pros & Cons [/h2]
[hr]
[/hr]
✅ Pros:
[list]
  [*]Beautifully crafted world with great visuals.
  [*]Entertaining quests.
  [*]Good amount of personalization.
  [*]Variety of things to do.
  [*]Great setting.
[/list]

❌ Cons:
[list]
  [*]Yet another Ubisoft game with fillers above fillers. Quantity > Quality.
  [*]The game overall is too longs. With such an amount of things to do, it is easy to get distracted. It is a personal view as someone might find this as a positive, but after so many hours I just lost interest to move forward.
  [*]The combats is getting repetitive after a while. The skill tree even if it is big, lacks skills that will impact the combat.
  [*]The base building although interesting in the beginning, does not bring much value to your characters. It is an add-on that after a while you might forget it even exist.
[/list]

[h2] Verdict[/h2]
[hr]
[/hr]
Even though I recommend this game, it is easy to notice the amount

In [5]:
len(reviews)

773

In [6]:
reviews[0]

{'appid': '2208920',
 'timestamp_query': 1727039096,
 'title': "Assassin's Creed Valhalla",
 'recommendationid': '173898407',
 'author.steamid': '76561199211892974',
 'author.playtimeforever': None,
 'author.playtime_last_two_weeks': 0,
 'author.playtime_at_review': 2441,
 'author.last_played': 1718652672,
 'language': 'english',
 'review': '⠀⠀⠀⠀⠀⠀⠀⠀⠀\n[h2] Pros & Cons [/h2]\n[hr]\n[/hr]\n✅ Pros:\n[list]\n  [*]Beautifully crafted world with great visuals.\n  [*]Entertaining quests.\n  [*]Good amount of personalization.\n  [*]Variety of things to do.\n  [*]Great setting.\n[/list]\n\n❌ Cons:\n[list]\n  [*]Yet another Ubisoft game with fillers above fillers. Quantity > Quality.\n  [*]The game overall is too longs. With such an amount of things to do, it is easy to get distracted. It is a personal view as someone might find this as a positive, but after so many hours I just lost interest to move forward.\n  [*]The combats is getting repetitive after a while. The skill tree even if it is bi

In [7]:
reviews[-1]

{'appid': '315210',
 'timestamp_query': 1727039096,
 'title': 'Suicide Squad: Kill the Justice League',
 'recommendationid': '173467679',
 'author.steamid': '76561198069417131',
 'author.playtimeforever': None,
 'author.playtime_last_two_weeks': 2711,
 'author.playtime_at_review': 1891,
 'author.last_played': 1726956745,
 'language': 'english',
 'review': 'When you make it less shit grind and more power fantasy maybe we can talk. Change the guns for something more interesting',
 'timestamp_created': 1724697884,
 'timestamp_updated': 1724697884}

In [8]:
index = minsearch.Index(
    # text_fields=["author.steamid", "author.playtimeforever", "author.playtime_last_two_weeks", "author.playtime_at_review", "author.last_played", "language", "review", "timestamp_created", "timestamp_updated"],
    text_fields=["title", "language", "review"],
    keyword_fields=["appid", "recommendationid"]
)

In [9]:
index.fit(reviews)

<minsearch.Index at 0x7f1ed430ddb0>

# RAG flow

In [10]:
# print(os.environ['OPENAI_API_KEY'])

In [11]:
from openai import OpenAI

client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],  # this is also the default, it can be omitted
)


In [12]:
def search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [13]:
prompt_template = """
You're a conservative father of young children who is not aware of how the modern gaming industry works, who is not up to date with the titles released on a daily basis.
A father who needs to keep his child from not suitable games, including DEI games.
DEI ideology, which enforces Diversity, equity, and inclusion (DEI) hurts creativity, and uses corporate propaganda to improve sales.
Answer the QUESTION based on the CONTEXT from our exercises database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

entry_template = """
title: {title}
language: {language}
review: {review}
""".strip()

def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [14]:
def llm(prompt, model='gpt-4o-mini'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [15]:
def rag(query, model='gpt-4o-mini'):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    #print(prompt)
    answer = llm(prompt, model=model)
    return answer

In [16]:
question = "Is Baldur's Gate 3 a game for kids?"
answer = rag(question)
print(answer)

Baldur's Gate 3 is not considered suitable for kids. The game features complex themes, including mature content such as romance and interactions with darker elements like demons, cults, and moral dilemmas. Players often engage in quests that involve serious decision-making, with some scenarios being intense or adult-oriented in nature. Given these factors, it may not be appropriate for young children, particularly if you're concerned about exposure to games that have elements of adult content or themes not suitable for a child's developmental stage.


In [17]:
question = "Is Baldur's Gate 3 a game following DEI concepts, namely diversity, equity and inclusion?"
answer = rag(question)
print(answer)

Baldur's Gate 3 appears to incorporate aspects of diversity in its gameplay, allowing players to create characters with a range of backgrounds, races, and abilities that can affect interactions within the game. The writing showcases a variety of character personalities and motivations, which can enhance the overall narrative experience. However, the provided context does not explicitly indicate that the game is designed to enforce or promote a DEI agenda as criticized by some. Instead, it emphasizes the richness of the game's world, storytelling, and character development, all of which can foster creativity and personal expression in a way that does not seem rooted in a corporate propaganda narrative.

Overall, while Baldur's Gate 3 features diverse character creation and interaction mechanics, the reviews highlight its focus on quality storytelling and immersive gameplay rather than a direct adherence to DEI concepts.


In [18]:
question = "Is God of War Ragnarök a game for kids?"
answer = rag(question)
print(answer)

God of War Ragnarök is not suitable for kids. The game has been described as having a strong narrative with complex themes, which may not be appropriate for younger audiences. Additionally, it has been criticized for incorporating "woke" elements and includes forced character decisions and storylines that some may find inappropriate or distracting. The game also features intense combat and violence, which, along with its mature themes, makes it better suited for older teens and adults rather than young children.


In [19]:
question = "Is God of War Ragnarök a game following DEI concepts, namely diversity, equity and inclusion?"
answer = rag(question)
print(answer)

Based on the context provided, it appears that "God of War Ragnarök" incorporates elements that some reviewers associate with "woke" ideology, suggesting influences tied to diversity, equity, and inclusion (DEI). For instance, it's mentioned that the developers collaborated with a company known for inclusive storytelling, which some critics felt affected the authenticity of the plot and character decisions, leading to a narrative that felt forced or nonsensical at times. This influence has been noted as a drawback by certain players who believe it detracts from the overall experience and depth of the game's story.

Thus, if you're seeking to keep your children away from games that you believe promote DEI concepts, "God of War Ragnarök" may be a title to consider, as it has elements that some perceive as aligned with that ideology.
