In [1]:
2+2

4

In [58]:
# https://partner.steamgames.com/doc/store/getreviews

In [2]:
import sys
import os

# Construct the path to the scripts directory
script_path = os.path.abspath('../reviews-assistant/scripts')

# Add the path to sys.path
if script_path not in sys.path:
    sys.path.append(script_path)

# Now you can import minsearch.py
import minsearch

In [3]:
import requests
import json
import csv
from datetime import datetime

class SteamReviewFetcher:
    def __init__(self, appid, filter="all", language="english", day_range=30, review_type="all", purchase_type="all"):
        """
        Initializes the SteamReviewFetcher with the required parameters.

        :param appid: Steam application ID for the game.
        :param filter: Type of review filter.
        :param language: Language of the reviews.
        :param day_range: Number of days to consider for reviews.
        :param review_type: Type of review (all or specific).
        :param purchase_type: Type of purchase.
        """
        self.base_url = "https://store.steampowered.com/appreviews/"
        self.appid = appid
        self.filter = filter
        self.language = language
        self.day_range = day_range
        self.review_type = review_type
        self.purchase_type = purchase_type

    def _construct_url(self):
        """
        Constructs the URL for the API request.

        :return: Full URL for the API request.
        """
        return f"{self.base_url}{self.appid}?json=1"

    def _fetch_reviews(self, num_reviews):
        """
        Fetches reviews from the Steam Store API.

        :param num_reviews: Number of reviews to fetch.
        :return: JSON response containing the reviews.
        :raises: HTTPError if the API request fails.
        """
        url = self._construct_url()
        params = {
            "filter": self.filter,
            "language": self.language,
            "day_range": self.day_range,
            "review_type": self.review_type,
            "purchase_type": self.purchase_type,
            "num_per_page": num_reviews
        }
        response = requests.get(url, params=params)
        response.raise_for_status()  # Raises an HTTPError for bad responses
        return response.json()

    def get_reviews(self, num_reviews=20, print_reviews=True):
        """
        Retrieves and processes the reviews.

        :param num_reviews: Number of reviews to fetch.
        :param print_reviews: Whether to print the reviews (default is True).
        :return: List of reviews.
        """
        review_data = self._fetch_reviews(num_reviews)
        reviews = review_data.get("reviews", [])
        
        if print_reviews:
            self.print_first_last_reviews(reviews)

        return reviews

    def print_first_last_reviews(self, reviews):
        """
        Prints the first and last five reviews.

        :param reviews: List of reviews to print.
        """
        total_reviews = len(reviews)
        if total_reviews == 0:
            print("No reviews found.")
            return

        print("\nFirst 5 Reviews:")
        for review in reviews[:5]:
            print(f"Author: {review['author']['steamid']}")
            print(f"Review: {review.get('review', 'No text')}")
            print(f"Rating: {'Positive' if review['voted_up'] else 'Negative'}")
            print(f"Timestamp: {review['timestamp_created']}")
            print("-" * 79)

        print("\nLast 5 Reviews:")
        for review in reviews[-5:]:
            print(f"Author: {review['author']['steamid']}")
            print(f"Review: {review.get('review', 'No text')}")
            print(f"Rating: {'Positive' if review['voted_up'] else 'Negative'}")
            print(f"Timestamp: {review['timestamp_created']}")
            print("-" * 79)

    def _extract_columns_to_save(self, reviews, columns):
        """
        Extracts specific columns from the reviews for saving.

        :param reviews: List of reviews to extract from.
        :param columns: List of columns to extract.
        :return: List of dictionaries containing the extracted data.
        """
        extracted_reviews = []
        for review in reviews:
            review_dict = {}
            for column in columns:
                if column.split('.')[0] == 'author':
                    nested_column = column.split('.')[1]
                    review_dict[column] = review['author'].get(nested_column)
                else:
                    review_dict[column] = review.get(column)
            extracted_reviews.append(review_dict)
        return extracted_reviews

    def save_reviews(self, reviews, filename, format, columns=None):
        """
        Saves the reviews to a file in the specified format.

        :param reviews: List of reviews to save.
        :param filename: Name of the file to save the reviews to.
        :param format: Format to save the reviews ('csv' or 'json').
        :param columns: Optional list of columns to save (used in JSON).
        """
        if not reviews:
            print("No reviews to save.")
            return

        if format == "csv":
            keys = ['steamid', 'review', 'voted_up', 'timestamp_created']
            with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
                writer = csv.writer(csvfile)
                writer.writerow(keys)  # Write the header
                for review in reviews:
                    row = [
                        review['author']['steamid'],
                        review.get('review', 'No text'),
                        'Positive' if review['voted_up'] else 'Negative',
                        review['timestamp_created']
                    ]
                    writer.writerow(row)
            print(f"Reviews saved to {filename}")
        elif format == "json":
            extracted_reviews = self._extract_columns_to_save(reviews, columns)
            with open(filename, 'w', encoding='utf-8') as jsonfile:
                json.dump(extracted_reviews, jsonfile, indent=4)
                print(f"Reviews saved to {filename}")
        else:
            print("Invalid format. Please specify 'csv' or 'json'.")

    def save_reviews_as_json_with_columns(self, reviews, columns=None):
        """
        Saves the reviews as a JSON file with specified columns.

        :param reviews: List of reviews to save.
        :param columns: Optional list of columns to extract and save.
        """
        if not reviews:
            print("No reviews to save.")
            return
        
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"baldurs_gate_reviews_{timestamp}.json"
        
        extracted_reviews = self._extract_columns_to_save(reviews, columns)
        with open(filename, 'w', encoding='utf-8') as jsonfile:
            json.dump(extracted_reviews, jsonfile, indent=4)
        print(f"Reviews saved to {filename}")


# Example usage
if __name__ == "__main__":
    appid = "1086940"  # Baldur's Gate 3
    review_fetcher = SteamReviewFetcher(appid)

    num_reviews = 50  # Specify the number of reviews to fetch
    print_reviews_flag = False  # Set to False to turn off printing reviews
    columns = ["recommendationid", "author.steamid", "author.playtimeforever", "author.playtime_last_two_weeks", "author.playtime_at_review", "author.last_played", "language", "review", "timestamp_created", "timestamp_updated"]  # Specify the columns to save

    reviews = review_fetcher.get_reviews(num_reviews, print_reviews=print_reviews_flag)

    # Save reviews to file
    review_fetcher.save_reviews_as_json_with_columns(reviews, columns)


Reviews saved to baldurs_gate_reviews_20240922_183856.json


In [42]:
import requests
import json
import csv
from datetime import datetime

class SteamReviewFetcher:
    def __init__(self, appid, filter="all", language="english", day_range=30, review_type="all", purchase_type="all"):
        """
        Initializes the SteamReviewFetcher with the required parameters.

        :param appid: Steam application ID for the game.
        :param filter: Type of review filter.
        :param language: Language of the reviews.
        :param day_range: Number of days to consider for reviews.
        :param review_type: Type of review (all or specific).
        :param purchase_type: Type of purchase.
        """
        self.base_url = "https://store.steampowered.com/appreviews/"
        self.appid = appid
        self.filter = filter
        self.language = language
        self.day_range = day_range
        self.review_type = review_type
        self.purchase_type = purchase_type

    def _construct_url(self):
        """
        Constructs the URL for the API request.

        :return: Full URL for the API request.
        """
        return f"{self.base_url}{self.appid}?json=1"

    def _fetch_reviews(self, num_reviews):
        """
        Fetches reviews from the Steam Store API.

        :param num_reviews: Number of reviews to fetch.
        :return: JSON response containing the reviews.
        :raises: HTTPError if the API request fails.
        """
        url = self._construct_url()
        params = {
            "filter": self.filter,
            "language": self.language,
            "day_range": self.day_range,
            "review_type": self.review_type,
            "purchase_type": self.purchase_type,
            "num_per_page": num_reviews
        }
        response = requests.get(url, params=params)
        response.raise_for_status()  # Raises an HTTPError for bad responses
        return response.json()

    def get_reviews(self, num_reviews=20, print_reviews=True):
        """
        Retrieves and processes the reviews.

        :param num_reviews: Number of reviews to fetch.
        :param print_reviews: Whether to print the reviews (default is True).
        :return: List of reviews.
        """
        review_data = self._fetch_reviews(num_reviews)
        reviews = review_data.get("reviews", [])
        
        if print_reviews:
            self.print_first_last_reviews(reviews)

        return reviews

    def print_first_last_reviews(self, reviews):
        """
        Prints the first and last five reviews.

        :param reviews: List of reviews to print.
        """
        total_reviews = len(reviews)
        if total_reviews == 0:
            print("No reviews found.")
            return

        print("\nFirst 5 Reviews:")
        for review in reviews[:5]:
            print(f"Author: {review['author']['steamid']}")
            print(f"Review: {review.get('review', 'No text')}")
            print(f"Rating: {'Positive' if review['voted_up'] else 'Negative'}")
            print(f"Timestamp: {review['timestamp_created']}")
            print("-" * 79)

        print("\nLast 5 Reviews:")
        for review in reviews[-5:]:
            print(f"Author: {review['author']['steamid']}")
            print(f"Review: {review.get('review', 'No text')}")
            print(f"Rating: {'Positive' if review['voted_up'] else 'Negative'}")
            print(f"Timestamp: {review['timestamp_created']}")
            print("-" * 79)

    def _extract_columns_to_save(self, reviews, columns):
        """
        Extracts specific columns from the reviews for saving.

        :param reviews: List of reviews to extract from.
        :param columns: List of columns to extract.
        :return: List of dictionaries containing the extracted data.
        """
        extracted_reviews = []
        for review in reviews:
            review_dict = {}
            for column in columns:
                if column.split('.')[0] == 'author':
                    nested_column = column.split('.')[1]
                    review_dict[column] = review['author'].get(nested_column)
                else:
                    review_dict[column] = review.get(column)
            extracted_reviews.append(review_dict)
        return extracted_reviews

    def save_reviews(self, reviews, filename, format, columns=None):
        """
        Saves the reviews to a file in the specified format.

        :param reviews: List of reviews to save.
        :param filename: Name of the file to save the reviews to.
        :param format: Format to save the reviews ('csv' or 'json').
        :param columns: Optional list of columns to save (used in JSON).
        """
        if not reviews:
            print("No reviews to save.")
            return

        if format == "csv":
            keys = ['steamid', 'review', 'voted_up', 'timestamp_created']
            with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
                writer = csv.writer(csvfile)
                writer.writerow(keys)  # Write the header
                for review in reviews:
                    row = [
                        review['author']['steamid'],
                        review.get('review', 'No text'),
                        'Positive' if review['voted_up'] else 'Negative',
                        review['timestamp_created']
                    ]
                    writer.writerow(row)
            print(f"Reviews saved to {filename}")
        elif format == "json":
            extracted_reviews = self._extract_columns_to_save(reviews, columns)
            with open(filename, 'w', encoding='utf-8') as jsonfile:
                json.dump(extracted_reviews, jsonfile, indent=4)
                print(f"Reviews saved to {filename}")
        else:
            print("Invalid format. Please specify 'csv' or 'json'.")

    def save_reviews_as_json_with_columns(self, reviews, columns=None):
        """
        Saves the reviews as a JSON file with specified columns.

        :param reviews: List of reviews to save.
        :param columns: Optional list of columns to extract and save.
        """
        if not reviews:
            print("No reviews to save.")
            return
        
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"god_of_war_ragnarok_reviews_{timestamp}.json"
        
        extracted_reviews = self._extract_columns_to_save(reviews, columns)
        with open(filename, 'w', encoding='utf-8') as jsonfile:
            json.dump(extracted_reviews, jsonfile, indent=4)
        print(f"Reviews saved to {filename}")


# Example usage
if __name__ == "__main__":
    appid = "2322010"  # God of War Ragnarök
    review_fetcher = SteamReviewFetcher(appid)

    num_reviews = 2000  # Specify the number of reviews to fetch
    print_reviews_flag = False  # Set to False to turn off printing reviews
    columns = ["recommendationid", "author.steamid", "author.playtimeforever", "author.playtime_last_two_weeks", "author.playtime_at_review", "author.last_played", "language", "review", "timestamp_created", "timestamp_updated"]  # Specify the columns to save

    reviews = review_fetcher.get_reviews(num_reviews, print_reviews=print_reviews_flag)

    # Save reviews to file
    review_fetcher.save_reviews_as_json_with_columns(reviews, columns)


Reviews saved to god_of_war_ragnarok_reviews_20240922_190601.json


# Ingestion

In [43]:
!ls

001_test.ipynb
baldurs_gate_reviews.json
baldurs_gate_reviews_20240922_181423.json
baldurs_gate_reviews_20240922_182037.json
baldurs_gate_reviews_20240922_183445.json
baldurs_gate_reviews_20240922_183856.json
god_of_war_ragnarok_reviews_20240922_190345.json
god_of_war_ragnarok_reviews_20240922_190601.json


In [44]:
# Specify the filename
filename = "baldurs_gate_reviews_20240922_182037.json"
filename2 = "god_of_war_ragnarok_reviews_20240922_190601.json"

# Read the JSON file
# with open(filename, 'r', encoding='utf-8') as jsonfile:
with open(filename2, 'r', encoding='utf-8') as jsonfile:
    reviews = json.load(jsonfile)

In [45]:
# Print the reviews
i=2
for review in reviews[:i]:
    print(f"Author ID: {review['author.steamid']}")
    print(f"Review: {review.get('review', 'No text')}")
    print(f"Timestamp Created: {review['timestamp_created']}")
    print("-" * 79)

Author ID: 76561198354466742
Review: to God of War: Ragnarok Dev. Team,

I’m writing to bring to your attention an issue regarding the current PC port of God of War: Ragnarok that affects users with 4GB VRAM graphics cards. When attempting to launch the game on such systems, an error message is displayed, indicating that there is not enough VRAM, and the game closes without allowing users to proceed. It is my humble request to kindly reconsider or remove this VRAM limitation. From personal experience and community feedback, I can confirm that the game runs well on certain integrated graphics processors (such as the Ryzen 3 3200G, Ryzen 5 3400G, and Ryzen 7 5700G), which are generally weaker than many 4GB VRAM GPUs available today. God of War: Ragnarok performs on the base PlayStation 4, which further suggests that the game should be capable of running on 4GB VRAM graphics cards, and the latest Steam Hardware Survey shows that one of the most widely used graphics cards among gamers is t

In [46]:
index = minsearch.Index(
    # text_fields=["author.steamid", "author.playtimeforever", "author.playtime_last_two_weeks", "author.playtime_at_review", "author.last_played", "language", "review", "timestamp_created", "timestamp_updated"],
    text_fields=["language", "review"],
    keyword_fields=["recommendationid"]
)

In [47]:
index.fit(reviews)

<minsearch.Index at 0x7fe530cb0f10>

# RAG flow

In [48]:
# print(os.environ['OPENAI_API_KEY'])

In [49]:
from openai import OpenAI

client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],  # this is also the default, it can be omitted
)


In [50]:
def search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [51]:
prompt_template = """
You're a conservative father of little children who is not aware of how the gaming industry works, who is not up to date with what titles are released on daily basis. Answer the QUESTION based on the CONTEXT from our exercises database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

entry_template = """
language: {language}
review: {review}
""".strip()

def build_prompt(query, search_results):
    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [52]:
def llm(prompt, model='gpt-4o-mini'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [53]:
def rag(query, model='gpt-4o-mini'):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    #print(prompt)
    answer = llm(prompt, model=model)
    return answer

In [54]:
question = "Is Baldur's Gate 3 a game for kids?"
answer = rag(question)
print(answer)

Baldur's Gate 3 is not a game for kids. Based on the audience designation in the context, it is intended for teens and adults.


In [55]:
question = "Is Baldur's Gate 3 a game following DEI concepts, namely diversity, equity and inclusion?"
answer = rag(question)
print(answer)

Based on the context provided, there is no specific mention of Baldur's Gate 3 or any discussion surrounding diversity, equity, and inclusion (DEI) concepts in regard to this game. Therefore, I cannot confirm whether Baldur's Gate 3 follows DEI concepts as there is simply no information available about the game in the provided reviews.


In [56]:
question = "Is God of War Ragnarök a game for kids?"
answer = rag(question)
print(answer)

God of War Ragnarök is not a game geared toward children. It is part of a series known for its mature themes, complex narratives, and intense combat. The game involves exploring mythology, dealing with serious plotlines, and contains elements that may not be suitable for younger audiences. Additionally, there are mentions of "woke" influences and narrative complexities, which further indicate that it is targeted at a more mature audience. Therefore, as a conservative father, you may want to consider this before allowing your little children to play it.


In [57]:
question = "Is God of War Ragnarök a game following DEI concepts, namely diversity, equity and inclusion?"
answer = rag(question)
print(answer)

Yes, God of War Ragnarök does follow DEI concepts, specifically regarding diversity, equity, and inclusion. This is evident through the game’s collaboration with Sweet Baby Inc., a company known for its focus on inclusive storytelling. However, some reviews mention that elements of the story and character decisions feel forced, which some players perceive as a "woke influence." This sentiment indicates that while the game aims to incorporate DEI concepts, it has sparked mixed reactions among players regarding the execution.
