In [1]:
2+2

4

In [2]:
# https://partner.steamgames.com/doc/store/getreviews

In [26]:
import requests
import json
import csv
import sys
import os
import time  # Import time for the epoch timestamp
import re  # Import re for regex


# Construct the path to the scripts directory
script_path = os.path.abspath('../reviews-assistant/scripts')

# Add the path to sys.path
if script_path not in sys.path:
    sys.path.append(script_path)

import minsearch


class SteamReviewFetcher:
    def __init__(self, appids_with_titles, filter="all", language="english", day_range=30, review_type="all", purchase_type="all"):
        """
        Initializes the SteamReviewFetcher with the required parameters.

        :param appids_with_titles: List of tuples containing Steam application IDs and their corresponding titles.
        :param filter: Type of review filter.
        :param language: Language of the reviews.
        :param day_range: Number of days to consider for reviews.
        :param review_type: Type of review (all or specific).
        :param purchase_type: Type of purchase.
        """
        self.base_url = "https://store.steampowered.com/appreviews/"
        self.appids_with_titles = appids_with_titles if isinstance(appids_with_titles, list) else [appids_with_titles]
        self.filter = filter
        self.language = language
        self.day_range = day_range
        self.review_type = review_type
        self.purchase_type = purchase_type
        self.data_dir = os.path.abspath('../reviews-assistant/data/reviews')

        # Ensure the data directory exists
        os.makedirs(self.data_dir, exist_ok=True)

    def _construct_url(self, appid):
        return f"{self.base_url}{appid}?json=1"

    def _fetch_reviews(self, appid, num_reviews):
        url = self._construct_url(appid)
        params = {
            "filter": self.filter,
            "language": self.language,
            "day_range": self.day_range,
            "review_type": self.review_type,
            "purchase_type": self.purchase_type,
            "num_per_page": num_reviews
        }
        response = requests.get(url, params=params)
        response.raise_for_status()
        return response.json()

    def get_reviews(self, num_reviews=20, print_reviews=True):
        all_reviews = {}
        for appid, title in self.appids_with_titles:
            review_data = self._fetch_reviews(appid, num_reviews)
            reviews = review_data.get("reviews", [])
            all_reviews[appid] = (title, reviews)  # Store title along with reviews
            
            if print_reviews:
                self.print_first_last_reviews(appid, title, reviews)

        return all_reviews

    def print_first_last_reviews(self, appid, title, reviews):
        total_reviews = len(reviews)
        if total_reviews == 0:
            print(f"No reviews found for App ID {appid} ({title}).")
            return

        print(f"\nFirst 5 Reviews for App ID {appid} ({title}):")
        for review in reviews[:5]:
            self._print_review(review)

        print(f"\nLast 5 Reviews for App ID {appid} ({title}):")
        for review in reviews[-5:]:
            self._print_review(review)

    def _print_review(self, review):
        print(f"Author: {review['author']['steamid']}")
        print(f"Review: {review.get('review', 'No text')}")
        print(f"Rating: {'Positive' if review['voted_up'] else 'Negative'}")
        print(f"Timestamp: {review['timestamp_created']}")
        print("-" * 79)

    def _extract_columns_to_save(self, reviews, appid, title):
        extracted_reviews = []
        current_time = int(time.time())  # Get current epoch time
        for review in reviews:
            review_dict = {
                "appid": appid,  # Move appid to the first field
                "timestamp_query": current_time,  # Move timestamp_query to the second field
                "title": title,
            }
            for column in ["recommendationid", "author.steamid", "author.playtimeforever",
                           "author.playtime_last_two_weeks", "author.playtime_at_review",
                           "author.last_played", "language", "review", "timestamp_created", "timestamp_updated"]:
                if column.split('.')[0] == 'author':
                    nested_column = column.split('.')[1]
                    review_dict[column] = review['author'].get(nested_column)
                else:
                    review_dict[column] = review.get(column)
            extracted_reviews.append(review_dict)
        return extracted_reviews

    def save_reviews(self, all_reviews, filename_prefix, format):
        for appid, (title, reviews) in all_reviews.items():
            if not reviews:
                print(f"No reviews to save for App ID {appid} ({title}).")
                continue

            safe_title = self._sanitize_title(title)  # Sanitize title for filename
            lower_safe_title = safe_title.lower()  # Convert to lowercase for the filename
            if format == "csv":
                self._save_reviews_as_csv(appid, lower_safe_title, reviews, filename_prefix, title)
            elif format == "json":
                self._save_reviews_as_json(appid, lower_safe_title, reviews, filename_prefix, title)
            else:
                print("Invalid format. Please specify 'csv' or 'json'.")

    def _sanitize_title(self, title):
        """Remove special characters from the title for safe filename."""
        return re.sub(r'[<>:"/\\|?*]', '', title).replace("'", "").replace(" ", "_")

    def _save_reviews_as_csv(self, appid, lower_safe_title, reviews, filename_prefix, title):
        keys = ['appid', 'timestamp_query', 'title', 'steamid', 'review', 'voted_up', 'timestamp_created']  # Update the keys
        filename = os.path.join(self.data_dir, f"{filename_prefix}_{lower_safe_title}_{appid}_reviews.csv")
        with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(keys)  # Write the header
            for review in reviews:
                row = [
                    appid,  # App ID as the first field
                    int(time.time()),  # Current epoch timestamp as the second field
                    title,  # Keep the original title
                    review['author']['steamid'],
                    review.get('review', 'No text'),
                    'Positive' if review['voted_up'] else 'Negative',
                    review['timestamp_created'],
                ]
                writer.writerow(row)
        print(f"Reviews for App ID {appid} ({title}) saved to {filename}")

    def _save_reviews_as_json(self, appid, lower_safe_title, reviews, filename_prefix, title):
        filename = os.path.join(self.data_dir, f"reviews_{lower_safe_title}.json")
        extracted_reviews = self._extract_columns_to_save(reviews, appid, title)  # Use original title here
        with open(filename, 'w', encoding='utf-8') as jsonfile:
            json.dump(extracted_reviews, jsonfile, indent=4)
        print(f"Reviews for App ID {appid} ({title}) saved to {filename}")


# Example usage
if __name__ == "__main__":
    appids_with_titles = [
        ("2322010", "God of War: Ragnarok"),
        ("1086940", "Baldur's Gate 3")
    ]  # List of tuples containing app IDs and their titles
    review_fetcher = SteamReviewFetcher(appids_with_titles)

    num_reviews = 2000  # Specify the number of reviews to fetch
    print_reviews_flag = False  # Set to False to turn off printing reviews

    all_reviews = review_fetcher.get_reviews(num_reviews, print_reviews=print_reviews_flag)

    # Save reviews to file
    review_fetcher.save_reviews(all_reviews, "reviews", format="json")


Reviews for App ID 2322010 (God of War: Ragnarok) saved to /mnt/c/Users/KonuTech/llm-zoomcamp-capstone-01/reviews-assistant/data/reviews/reviews_god_of_war_ragnarok.json
Reviews for App ID 1086940 (Baldur's Gate 3) saved to /mnt/c/Users/KonuTech/llm-zoomcamp-capstone-01/reviews-assistant/data/reviews/reviews_baldurs_gate_3.json


# Ingestion

In [32]:
# Directory containing the data files
data_dir = os.path.abspath('../reviews-assistant/data/reviews')

# Initialize an empty list to hold all reviews
reviews = []

# List objects in the directory
objects_in_directory = os.listdir(data_dir)

# Iterate over the files in the directory
for obj in objects_in_directory:
    if obj.endswith('.json'):  # Check if the file is a JSON file
        file_path = os.path.join(data_dir, obj)
        with open(file_path, 'r', encoding='utf-8') as jsonfile:
            # Load the reviews from the JSON file
            file_reviews = json.load(jsonfile)
            reviews.extend(file_reviews)  # Append reviews to the main list
# Print the first i reviews
i = 2  # Change this to print more reviews if needed
for review in reviews[:i]:
    print(f"Author ID: {review['author.steamid']}")
    print(f"Review: {review.get('review', 'No text')}")
    print(f"Timestamp Created: {review['timestamp_created']}")
    print("-" * 79)

Author ID: 76561197996873645
Review: Awesome game, but I held off playing it because of the top-down/isometric camera - didn't like the camera in DOS2 either.  Then I found the "Native Camera Tweaks" mod on Nexus Mods.

This totally changes the game, allowing you to use standard 3rd person camera controls, and actually see what's ahead when moving up-hill.

It turns out all outdoor areas have sky boxes (including sun and moon), and almost all interior areas have ceilings.  Trust me, you'll hardly notice the few times a ceiling is missing or "pops in", unless you're looking for it.  And there's no performance hit that I've noticed.

Makes me wander why Larian didn't give an unlocked camera by default...

Also you have full control over zoom/pitch/FoV/invert axis/dead zones etc through the mod's configuration file.

[b][i]EDIT:  The mod has been updated (8 SEP 24) to work with patch 7.  Many thanks to Ersh for updating it so promptly!
Timestamp Created: 1724579126
-----------------------

In [34]:
index = minsearch.Index(
    # text_fields=["author.steamid", "author.playtimeforever", "author.playtime_last_two_weeks", "author.playtime_at_review", "author.last_played", "language", "review", "timestamp_created", "timestamp_updated"],
    text_fields=["title", "language", "review"],
    keyword_fields=["appid", "recommendationid"]
)

In [35]:
index.fit(reviews)

<minsearch.Index at 0x7f078c67f6d0>

# RAG flow

In [36]:
# print(os.environ['OPENAI_API_KEY'])

In [37]:
from openai import OpenAI

client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],  # this is also the default, it can be omitted
)


In [38]:
def search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [39]:
prompt_template = """
You're a conservative father of young children who is not aware of how the modern gaming industry works, who is not up to date with the titles released on a daily basis.
Answer the QUESTION based on the CONTEXT from our exercises database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

entry_template = """
title: {title}
language: {language}
review: {review}
""".strip()

def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [40]:
def llm(prompt, model='gpt-4o-mini'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [41]:
def rag(query, model='gpt-4o-mini'):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    #print(prompt)
    answer = llm(prompt, model=model)
    return answer

In [42]:
question = "Is Baldur's Gate 3 a game for kids?"
answer = rag(question)
print(answer)

Baldur's Gate 3 is likely not suitable for young children. The game is described as an RPG with complex storytelling, character interactions, and themes that may not be appropriate for kids. Reviews mention encounters with mature content, including dark themes like murder cults and demons, as well as romance elements that reference adult relationships. The writing is noted for its depth and intricacy, which may be challenging for younger players. Additionally, the game is rooted in Dungeons & Dragons mechanics, which often handle adult themes and moral complexities. Therefore, as a conservative father, you might want to consider other games that are specifically designed for children.


In [43]:
question = "Is Baldur's Gate 3 a game following DEI concepts, namely diversity, equity and inclusion?"
answer = rag(question)
print(answer)

Based on the provided context, there isn't any direct mention of Baldur's Gate 3 adhering specifically to Diversity, Equity, and Inclusion (DEI) concepts. However, the game is noted for its inclusivity in character creation, with a wide range of classes and races, allowing for in-depth customization. Players can choose various character backgrounds, which impacts the game world and how NPCs react, suggesting a level of diversity in gameplay and character representation. Ultimately, while the game may promote diverse character options and narratives, there is no explicit indication that it is designed around DEI principles as a central theme.


In [44]:
question = "Is God of War Ragnarök a game for kids?"
answer = rag(question)
print(answer)

God of War Ragnarök is not typically considered a game for kids. It is known for its complex story, mature themes, and intense combat, which might not be suitable for younger audiences. The game includes elements that some may describe as "woke," and it has been noted that it leans into more serious narrative issues that may be challenging for children to understand. Furthermore, the game's design appeals more to adult gamers and older teens who are familiar with the previous titles in the series. Overall, it is advisable for parents to review the game's content and themes before deciding if it is appropriate for their children.


In [45]:
question = "Is God of War Ragnarök a game following DEI concepts, namely diversity, equity and inclusion?"
answer = rag(question)
print(answer)

Based on the context provided, it appears that "God of War Ragnarök" does incorporate elements associated with DEI (diversity, equity, and inclusion) concepts. The review mentions that the developers collaborated with a company known for inclusive storytelling, which indicates a focus on inclusivity in the game's narrative and character development. However, there are criticisms from some players about how these elements were implemented, with claims that certain plotlines and character decisions felt forced or detracted from the overall narrative experience.

Overall, while there are influences of DEI concepts in "God of War Ragnarök," the reception seems mixed, with some players appreciating the effort and others finding it overdone or not well integrated.


# Retrieval evaluation

In [None]:
# df_question = pd.read_csv('../data/ground-truth-retrieval.csv')