In [1]:
2+2

4

In [2]:
# !pip list

In [3]:
# https://partner.steamgames.com/doc/store/getreviews

In [4]:
import requests
import json
import csv
import sys
import os
import time  # Import time for the epoch timestamp
import re  # Import re for regex


# Construct the path to the scripts directory
script_path = os.path.abspath('../reviews-assistant/scripts')

# Add the path to sys.path
if script_path not in sys.path:
    sys.path.append(script_path)

import minsearch


class SteamReviewFetcher:
    def __init__(self, appids_with_titles, filter="all", language="english", day_range=30, review_type="all", purchase_type="all"):
        """
        Initializes the SteamReviewFetcher with the required parameters.

        :param appids_with_titles: List of tuples containing Steam application IDs and their corresponding titles.
        :param filter: Type of review filter.
        :param language: Language of the reviews.
        :param day_range: Number of days to consider for reviews.
        :param review_type: Type of review (all or specific).
        :param purchase_type: Type of purchase.
        """
        self.base_url = "https://store.steampowered.com/appreviews/"
        self.appids_with_titles = appids_with_titles if isinstance(appids_with_titles, list) else [appids_with_titles]
        self.filter = filter
        self.language = language
        self.day_range = day_range
        self.review_type = review_type
        self.purchase_type = purchase_type
        self.data_dir = os.path.abspath('../reviews-assistant/data/reviews')

        # Ensure the data directory exists
        os.makedirs(self.data_dir, exist_ok=True)

    def _construct_url(self, appid):
        return f"{self.base_url}{appid}?json=1"

    def _fetch_reviews(self, appid, num_reviews):
        url = self._construct_url(appid)
        params = {
            "filter": self.filter,
            "language": self.language,
            "day_range": self.day_range,
            "review_type": self.review_type,
            "purchase_type": self.purchase_type,
            "num_per_page": num_reviews
        }
        response = requests.get(url, params=params)
        response.raise_for_status()
        return response.json()

    def get_reviews(self, num_reviews=20, print_reviews=True):
        all_reviews = {}
        for appid, title in self.appids_with_titles:
            review_data = self._fetch_reviews(appid, num_reviews)
            reviews = review_data.get("reviews", [])
            all_reviews[appid] = (title, reviews)  # Store title along with reviews
            
            if print_reviews:
                self.print_first_last_reviews(appid, title, reviews)

        return all_reviews

    def print_first_last_reviews(self, appid, title, reviews):
        total_reviews = len(reviews)
        if total_reviews == 0:
            print(f"No reviews found for App ID {appid} ({title}).")
            return

        print(f"\nFirst 5 Reviews for App ID {appid} ({title}):")
        for review in reviews[:5]:
            self._print_review(review)

        print(f"\nLast 5 Reviews for App ID {appid} ({title}):")
        for review in reviews[-5:]:
            self._print_review(review)

    def _print_review(self, review):
        print(f"Author: {review['author']['steamid']}")
        print(f"Review: {review.get('review', 'No text')}")
        print(f"Rating: {'Positive' if review['voted_up'] else 'Negative'}")
        print(f"Timestamp: {review['timestamp_created']}")
        print("-" * 79)

    def _extract_columns_to_save(self, reviews, appid, title):
        extracted_reviews = []
        current_time = int(time.time())  # Get current epoch time
        for review in reviews:
            review_dict = {
                "appid": appid,  # Move appid to the first field
                "timestamp_query": current_time,  # Move timestamp_query to the second field
                "title": title,
            }
            for column in ["recommendationid", "author.steamid", "author.playtimeforever",
                           "author.playtime_last_two_weeks", "author.playtime_at_review",
                           "author.last_played", "language", "review", "timestamp_created", "timestamp_updated"]:
                if column.split('.')[0] == 'author':
                    nested_column = column.split('.')[1]
                    review_dict[column] = review['author'].get(nested_column)
                else:
                    review_dict[column] = review.get(column)
            extracted_reviews.append(review_dict)
        return extracted_reviews

    def save_reviews(self, all_reviews, filename_prefix, format):
        for appid, (title, reviews) in all_reviews.items():
            if not reviews:
                print(f"No reviews to save for App ID {appid} ({title}).")
                continue

            safe_title = self._sanitize_title(title)  # Sanitize title for filename
            lower_safe_title = safe_title.lower()  # Convert to lowercase for the filename
            if format == "csv":
                self._save_reviews_as_csv(appid, lower_safe_title, reviews, filename_prefix, title)
            elif format == "json":
                self._save_reviews_as_json(appid, lower_safe_title, reviews, filename_prefix, title)
            else:
                print("Invalid format. Please specify 'csv' or 'json'.")

    def _sanitize_title(self, title):
        """Remove special characters from the title for safe filename."""
        return re.sub(r'[<>:"/\\|?*]', '', title).replace("'", "").replace(" ", "_")

    def _save_reviews_as_csv(self, appid, lower_safe_title, reviews, filename_prefix, title):
        keys = ['appid', 'timestamp_query', 'title', 'steamid', 'review', 'voted_up', 'timestamp_created']  # Update the keys
        filename = os.path.join(self.data_dir, f"{filename_prefix}_{lower_safe_title}_{appid}_reviews.csv")
        with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(keys)  # Write the header
            for review in reviews:
                row = [
                    appid,  # App ID as the first field
                    int(time.time()),  # Current epoch timestamp as the second field
                    title,  # Keep the original title
                    review['author']['steamid'],
                    review.get('review', 'No text'),
                    'Positive' if review['voted_up'] else 'Negative',
                    review['timestamp_created'],
                ]
                writer.writerow(row)
        print(f"Reviews for App ID {appid} ({title}) saved to {filename}")

    def _save_reviews_as_json(self, appid, lower_safe_title, reviews, filename_prefix, title):
        filename = os.path.join(self.data_dir, f"reviews_{lower_safe_title}.json")
        extracted_reviews = self._extract_columns_to_save(reviews, appid, title)  # Use original title here
        with open(filename, 'w', encoding='utf-8') as jsonfile:
            json.dump(extracted_reviews, jsonfile, indent=4)
        print(f"Reviews for App ID {appid} ({title}) saved to {filename}")


# Example usage
if __name__ == "__main__":
    appids_with_titles = [
        ("2322010", "God of War: Ragnarok"),
        ("1086940", "Baldur's Gate 3"),
        ("1680880", "Forspoken"),
        ("1496790", "Gotham Knights"),
        ("315210", "Suicide Squad: Kill the Justice League"),
        ("2443720", "Concord"),
        ("2208920", "Assassin's Creed Valhalla"),
        ("1817070", "Marvel’s Spider-Man Remastered"),
        ("1832040", "Flintlock: The Siege of Dawn"),
        ("2698940", "The Crew Motorfest"),
        ("2702430", "Usual June"),
        ("1545560", "Shadow Gambit: The Cursed Crew"),
        ("794540", "Neo Cab"),
        ("721180", "Dustborn"),
        ("1477940", "Unknown 9: Awakening"),
        ("2239550", "Watch Dogs: Legion"),
        ("447040", "Watch_Dogs 2"),
        ("243470", "Watch_Dogs"),
        ("582160", "Assassin's Creed Origin"),
        ("812140", "Assassin's Creed Odyssey"),
        ("552520", "Far Cry 5"),
        ("2369390", "Far Cry 6"),
        ("304390", "FOR HONOR"),
        ("2842040", "Star Wars Outlaws"),
        
    ]  # List of tuples containing app IDs and their titles
    review_fetcher = SteamReviewFetcher(appids_with_titles)

    num_reviews = 3000  # Specify the number of reviews to fetch
    print_reviews_flag = False  # Set to False to turn off printing reviews

    all_reviews = review_fetcher.get_reviews(num_reviews, print_reviews=print_reviews_flag)

    # Save reviews to file
    review_fetcher.save_reviews(all_reviews, "reviews", format="json")


Reviews for App ID 2322010 (God of War: Ragnarok) saved to /home/jovyan/reviews-assistant/data/reviews/reviews_god_of_war_ragnarok.json
Reviews for App ID 1086940 (Baldur's Gate 3) saved to /home/jovyan/reviews-assistant/data/reviews/reviews_baldurs_gate_3.json
Reviews for App ID 1680880 (Forspoken) saved to /home/jovyan/reviews-assistant/data/reviews/reviews_forspoken.json
Reviews for App ID 1496790 (Gotham Knights) saved to /home/jovyan/reviews-assistant/data/reviews/reviews_gotham_knights.json
Reviews for App ID 315210 (Suicide Squad: Kill the Justice League) saved to /home/jovyan/reviews-assistant/data/reviews/reviews_suicide_squad_kill_the_justice_league.json
Reviews for App ID 2443720 (Concord) saved to /home/jovyan/reviews-assistant/data/reviews/reviews_concord.json
Reviews for App ID 2208920 (Assassin's Creed Valhalla) saved to /home/jovyan/reviews-assistant/data/reviews/reviews_assassins_creed_valhalla.json
Reviews for App ID 1817070 (Marvel’s Spider-Man Remastered) saved to /

# Ingestion

In [5]:
# Directory containing the data files
data_dir = os.path.abspath('../reviews-assistant/data/reviews')

# Initialize an empty list to hold all reviews
reviews = []

# List objects in the directory
objects_in_directory = os.listdir(data_dir)

# Iterate over the files in the directory
for obj in objects_in_directory:
    if obj.endswith('.json'):  # Check if the file is a JSON file
        file_path = os.path.join(data_dir, obj)
        with open(file_path, 'r', encoding='utf-8') as jsonfile:
            # Load the reviews from the JSON file
            file_reviews = json.load(jsonfile)
            reviews.extend(file_reviews)  # Append reviews to the main list
# Print the first i reviews
i = 2  # Change this to print more reviews if needed
for review in reviews[:i]:
    print(f"Author ID: {review['author.steamid']}")
    print(f"Review: {review.get('review', 'No text')}")
    print(f"Timestamp Created: {review['timestamp_created']}")
    print("-" * 79)

Author ID: 76561198420943538
Review: ---{ Graphics }---
✅ You forget what reality is
☐ Beautiful
☐ Good
☐ Decent
☐ Bad
☐ You will get eye cancer
☐ Get a pepper spray for your eye instead

---{ Gameplay }---
☐ Won’t ever touch any other game anymore
✅ Very good
☐ Good
☐ It's just gameplay
☐ Mehh
☐ Watch paint dry instead
☐ Tic Tac toe is better

---{ Audio }---
☐ Eargasm
✅ Very good
☐ Good
☐ Not too bad
☐ Bad
☐ I'm now deaf

---{ Audience }---
☐ Kids
✅Teens
✅ Adults
☐ Grandma

---{ PC Requirements }---
☐ Check if you can run paint
☐ Potato
☐ Decent
✅ Fast
☐ Rich boi
☐ Ask NASA if they have a spare computer
☐ Search the galaxy for dark matter fuel to run

---{ Difficulty }---
☐ Just press 'W'
☐ Easy
✅ Easy to learn / Hard to master
☐ Significant brain usage
☐ Difficult
☐ Dark Souls

---{ Grind }---
☐ Nothing to grind
☐ Only if u care about leaderboards/ranks
✅ Isn't necessary to progress
☐ Average grind level
☐ Too much grind
☐ You'll need a second life for grinding

---{ Story }---
☐ No

In [6]:
len(reviews)

1785

In [7]:
reviews[0]

{'appid': '812140',
 'timestamp_query': 1727557020,
 'title': "Assassin's Creed Odyssey",
 'recommendationid': '174160338',
 'author.steamid': '76561198420943538',
 'author.playtimeforever': None,
 'author.playtime_last_two_weeks': 0,
 'author.playtime_at_review': 5599,
 'author.last_played': 1696845057,
 'language': 'english',
 'review': "---{ Graphics }---\n✅ You forget what reality is\n☐ Beautiful\n☐ Good\n☐ Decent\n☐ Bad\n☐ You will get eye cancer\n☐ Get a pepper spray for your eye instead\n\n---{ Gameplay }---\n☐ Won’t ever touch any other game anymore\n✅ Very good\n☐ Good\n☐ It's just gameplay\n☐ Mehh\n☐ Watch paint dry instead\n☐ Tic Tac toe is better\n\n---{ Audio }---\n☐ Eargasm\n✅ Very good\n☐ Good\n☐ Not too bad\n☐ Bad\n☐ I'm now deaf\n\n---{ Audience }---\n☐ Kids\n✅Teens\n✅ Adults\n☐ Grandma\n\n---{ PC Requirements }---\n☐ Check if you can run paint\n☐ Potato\n☐ Decent\n✅ Fast\n☐ Rich boi\n☐ Ask NASA if they have a spare computer\n☐ Search the galaxy for dark matter fuel to

In [8]:
reviews[-1]

{'appid': '2239550',
 'timestamp_query': 1727557020,
 'title': 'Watch Dogs: Legion',
 'recommendationid': '174862159',
 'author.steamid': '76561199371555316',
 'author.playtimeforever': None,
 'author.playtime_last_two_weeks': 1,
 'author.playtime_at_review': 109,
 'author.last_played': 1726993288,
 'language': 'english',
 'review': 'meh',
 'timestamp_created': 1726233314,
 'timestamp_updated': 1726233314}

In [9]:
index = minsearch.Index(
    # text_fields=["author.steamid", "author.playtimeforever", "author.playtime_last_two_weeks", "author.playtime_at_review", "author.last_played", "language", "review", "timestamp_created", "timestamp_updated"],
    text_fields=["title", "language", "review"],
    keyword_fields=["appid", "recommendationid"]
)

In [10]:
index.fit(reviews)

<minsearch.Index at 0x7fa296214ed0>

# RAG flow

In [11]:
from openai import OpenAI

client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],  # this is also the default, it can be omitted
)

In [12]:
def search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [13]:
prompt_template = """
You're a conservative father of young children who is not aware of how the modern gaming industry works, who is not up to date with the titles released on a daily basis.
A father who needs to keep his child from not suitable games, including DEI games.
DEI ideology, which enforces Diversity, equity, and inclusion (DEI) hurts creativity, and uses corporate propaganda to improve sales.
Answer the QUESTION based on the CONTEXT from our exercises database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

entry_template = """
title: {title}
language: {language}
review: {review}
""".strip()

def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [14]:
def llm(prompt, model='gpt-4o-mini'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [15]:
def rag(query, model='gpt-4o-mini'):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    #print(prompt)
    answer = llm(prompt, model=model)
    return answer

In [16]:
question = "Is Baldur's Gate 3 a game for kids?"
answer = rag(question)
print(answer)

Baldur's Gate 3 is not considered suitable for children. The game features complex themes, engaging narratives, and mature content, including elements characteristic of role-playing games (RPGs) that might not be appropriate for younger audiences. It’s designed for players who can appreciate intricate storytelling, character development, and strategic gameplay typical of Western CRPGs, which may not align with the interests or maturity levels of children. Therefore, as a parent, you might want to look for games that are specifically designed for younger players that offer age-appropriate content.


In [17]:
question = "Is Baldur's Gate 3 a game following DEI concepts, namely diversity, equity and inclusion?"
answer = rag(question)
print(answer)

Baldur's Gate 3 does incorporate elements that could be associated with diversity, equity, and inclusion (DEI) concepts, particularly through its character diversity and rich storytelling. However, the primary focus of the game appears to be on delivering a high-quality RPG experience that emphasizes player choice, exploration, and immersive storytelling.

While some reviews highlight the variety of characters and the engaging narrative, there isn't a clear indication that the game is built around DEI ideology as a central theme or that it enforces corporate propaganda tied to these concepts. Instead, it seems to prioritize creativity and player engagement through a well-crafted blend of structured storytelling and sandbox gameplay.

If your concern is mainly about keeping your child away from games that heavily focus on DEI themes, Baldur's Gate 3 seems more about traditional RPG mechanics and storytelling rather than being an overt representation of DEI ideology. Thus, it may not ali

In [18]:
question = "Is God of War Ragnarök a game for kids?"
answer = rag(question)
print(answer)

No, God of War Ragnarök is not suitable for kids. The game contains mature themes, a complex narrative, and elements that lean into 'woke' storytelling. The review comments suggest that certain plot lines and character decisions may feel forced and detract from the overall experience, which may not be appropriate for younger audiences. Overall, the game is designed for an older audience and involves violence, intense themes, and storytelling that may not align with the interests or maturity levels of children.


In [19]:
question = "Is God of War Ragnarök a game following DEI concepts, namely diversity, equity and inclusion?"
answer = rag(question)
print(answer)

Based on the provided context, "God of War Ragnarök" does appear to incorporate elements associated with DEI concepts. The review mentions that the game has a "woke tone" and highlights that the developers collaborated with a company known for inclusive storytelling. Some players felt that certain plotlines and character decisions seemed forced, which detracted from the overall experience. This suggests that there are indeed aspects of the game that align with DEI ideology, potentially compromising the creative storytelling that is typically expected in such narrative-driven games. Therefore, it can be concluded that "God of War Ragnarök" follows DEI concepts to some degree.


In [20]:
question = "Is Far Cry 6 a game for kids?"
answer = rag(question)
print(answer)

Far Cry 6 is not suitable for kids. The game is part of the Far Cry franchise, known for its action-oriented gameplay, which includes shooting and looting mechanics. While the gameplay may be engaging, it typically involves violence, making it more appropriate for older players. Additionally, the reviews suggest that the storytelling and character interactions, which can be mature in nature, may not be designed with a younger audience in mind. Therefore, it would be wise to keep this game away from children.


In [21]:
question = "Is Far Cry 6 a game following DEI concepts, namely diversity, equity and inclusion?"
answer = rag(question)
print(answer)

Based on the provided reviews of *Far Cry 6*, there is no direct mention of the game embracing DEI (Diversity, Equity, and Inclusion) concepts. The reviews primarily focus on gameplay mechanics, story immersion, and comparisons with previous titles in the *Far Cry* series. Many comments express that the game retains the classic *Far Cry* gameplay with some experimentation and improvements, but they do not highlight any specific adherence to DEI ideologies or practices.

Overall, the consensus seems to indicate that *Far Cry 6* is more about continuing the established gameplay style and less about introducing new narratives or ideologies, including those related to DEI. As such, there is no clear indication that this game would be categorized as promoting DEI concepts.


In [22]:
question = "Is Watch Dogs: Legion a game for kids?"
answer = rag(question)
print(answer)

Based on the available reviews for Watch Dogs: Legion, it does not appear to be a suitable game for children. The reviews suggest that the game tackles themes and gameplay that are more mature in nature, with comments about its complexity and lack of appeal compared to earlier titles in the series. Additionally, there is mention of the game's potential shortcomings, which may not align with a child's gaming experience.

As a conservative father, it's essential to consider the overall themes and gameplay mechanics of a title like Watch Dogs: Legion, which may not provide the wholesome entertainment you might desire for your young children. It may be wiser to look for games that are specifically designed for younger audiences, ensuring age-appropriate content and values.


In [23]:
question = "Is Watch Dogs: Legion a game following DEI concepts, namely diversity, equity and inclusion?"
answer = rag(question)
print(answer)

Based on the context provided, "Watch Dogs: Legion" features a unique gameplay mechanic called "Play as Anyone," which allows players to recruit and control any non-playable character (NPC) in the game, each with their own abilities and backstory. This mechanic introduces a variety of characters, encouraging players to explore diverse skill sets and strategies throughout their missions.

While the game does showcase a range of character backgrounds—thereby promoting a sense of diversity—the reviews primarily focus on gameplay mechanics, creativity, and user experience rather than explicitly addressing diversity, equity, and inclusion (DEI) principles as corporate ideology. Thus, while it may reflect elements of diversity through its character options, whether it aligns with DEI concepts as ideological frameworks or propaganda isn't clearly laid out in the reviews.

Therefore, it's reasonable to conclude that "Watch Dogs: Legion" incorporates diversity through its gameplay features but 

In [24]:
question = "Is Forspoken a game for kids?"
answer = rag(question)
print(answer)

Based on the provided context, *Forspoken* may not be suitable for kids. While specific content ratings and triggers are not mentioned, reviews indicate that the game has elements like a "reluctant hero" whose character attributes may not resonate well with younger audiences. Additionally, the dialogue is described as "campy" and "cringy," which might not align with the values you wish to promote for your children.

Furthermore, while the reviews praise the gameplay mechanics, such as fun traversal and combat, they also suggest that the story may have darker themes or elements that could be better suited for older players. Since you are concerned about DEI ideologies and the impact of gaming content, it might be wise to explore other titles that have clearer family-friendly values and themes.


In [25]:
question = "Is Forspoken a game following DEI concepts, namely diversity, equity and inclusion?"
answer = rag(question)
print(answer)

Based on the provided context, there is no clear indication that "Forspoken" specifically follows DEI concepts, such as diversity, equity, and inclusion, or that it was created with a strong DEI ideology in mind. 

While the game features a mostly female cast and promotes a unique gameplay experience, the reviews primarily focus on aspects like gameplay mechanics, graphics, and personal enjoyment. The storytelling and character development are noted as areas of improvement or personal opinion. The reviews do not explicitly link the game to DEI principles, nor do they articulate that it is part of any corporate propaganda to enforce such ideologies.

If you're concerned about games that promote DEI concepts, you might find it crucial to research titles based on their narratives and reviews to determine their suitability for your children. It's always good to consider the content and themes in any game before allowing your kids to play.
