In [1]:
2+2

4

In [2]:
import os
import json

In [27]:
# Directory containing the data files
data_dir = os.path.abspath('../reviews-assistant/data/reviews')

# Initialize an empty list to hold all reviews
reviews = []

# List objects in the directory
objects_in_directory = os.listdir(data_dir)

# Iterate over the files in the directory
for obj in objects_in_directory:
    if obj.endswith('.json'):  # Check if the file is a JSON file
        file_path = os.path.join(data_dir, obj)
        with open(file_path, 'r', encoding='utf-8') as jsonfile:
            # Load the reviews from the JSON file
            file_reviews = json.load(jsonfile)
            reviews.extend(file_reviews)  # Append reviews to the main list

# Print the first i reviews
i = 2  # Change this to print more reviews if needed
for review in reviews[:i]:
    print(f"Author ID: {review['author.steamid']}")
    print(f"Review: {review.get('review', 'No text')}")
    print(f"Timestamp Created: {review['timestamp_created']}")
    print("-" * 79)

Author ID: 76561197996873645
Review: Awesome game, but I held off playing it because of the top-down/isometric camera - didn't like the camera in DOS2 either.  Then I found the "Native Camera Tweaks" mod on Nexus Mods.

This totally changes the game, allowing you to use standard 3rd person camera controls, and actually see what's ahead when moving up-hill.

It turns out all outdoor areas have sky boxes (including sun and moon), and almost all interior areas have ceilings.  Trust me, you'll hardly notice the few times a ceiling is missing or "pops in", unless you're looking for it.  And there's no performance hit that I've noticed.

Makes me wander why Larian didn't give an unlocked camera by default...

Also you have full control over zoom/pitch/FoV/invert axis/dead zones etc through the mod's configuration file.

[b][i]EDIT:  The mod has been updated (8 SEP 24) to work with patch 7.  Many thanks to Ersh for updating it so promptly!
Timestamp Created: 1724579126
-----------------------

In [42]:
reviews

[{'appid': '1086940',
  'timestamp_query': 1727034887,
  'title': "Baldur's Gate 3",
  'recommendationid': '173318245',
  'author.steamid': '76561197996873645',
  'author.playtimeforever': None,
  'author.playtime_last_two_weeks': 445,
  'author.playtime_at_review': 7247,
  'author.last_played': 1726859390,
  'language': 'english',
  'review': 'Awesome game, but I held off playing it because of the top-down/isometric camera - didn\'t like the camera in DOS2 either.  Then I found the "Native Camera Tweaks" mod on Nexus Mods.\n\nThis totally changes the game, allowing you to use standard 3rd person camera controls, and actually see what\'s ahead when moving up-hill.\n\nIt turns out all outdoor areas have sky boxes (including sun and moon), and almost all interior areas have ceilings.  Trust me, you\'ll hardly notice the few times a ceiling is missing or "pops in", unless you\'re looking for it.  And there\'s no performance hit that I\'ve noticed.\n\nMakes me wander why Larian didn\'t giv

In [29]:
from openai import OpenAI

client = OpenAI()

In [30]:
prompt_template = """
You are emulating a user of our computer game preselection application.
The application and its recommendations are based on reviews from an online shop called Steam.
The application is designed for conservative fathers of young children who are not aware of how the modern gaming industry works.
This application is for fathers who are not up to date with the latest titles released on a daily basis.
Formulate 5 questions this user might ask based on a provided exercise.
Make the questions specific to this exercise.
The record should contain the answer to the questions, and the questions should
be complete and not too short. Use as fewer words as possible from the record. 

The record:

title: {title}
language: {language}
review: {review}

Provide the output in parsable JSON without using code blocks:

{{"questions": ["question1", "question2", ..., "question5"]}}
""".strip()

In [31]:
prompt = prompt_template.format(**reviews[0])

In [32]:
prompt

'You are emulating a user of our computer game preselection application.\nThe application and its recommendations are based on reviews from an online shop called Steam.\nThe application is designed for conservative fathers of young children who are not aware of how the modern gaming industry works.\nThis application is for fathers who are not up to date with the latest titles released on a daily basis.\nFormulate 5 questions this user might ask based on a provided exercise.\nMake the questions specific to this exercise.\nThe record should contain the answer to the questions, and the questions should\nbe complete and not too short. Use as fewer words as possible from the record. \n\nThe record:\n\ntitle: Baldur\'s Gate 3\nlanguage: english\nreview: Awesome game, but I held off playing it because of the top-down/isometric camera - didn\'t like the camera in DOS2 either.  Then I found the "Native Camera Tweaks" mod on Nexus Mods.\n\nThis totally changes the game, allowing you to use stand

In [34]:
def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [35]:
questions = llm(prompt)

In [36]:
json.loads(questions)

{'questions': ["What is the main appeal of Baldur's Gate 3 according to the reviews?",
  "How does the 'Native Camera Tweaks' mod enhance the gameplay experience?",
  'Are there any noticeable downsides to the camera changes suggested in the mod?',
  "Why might someone question the decision not to include an unlocked camera by default in Baldur's Gate 3?",
  'Has the mod been kept up to date with the latest game patches?']}

In [37]:
def generate_questions(doc):
    prompt = prompt_template.format(**doc)

    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )

    json_response = response.choices[0].message.content
    return json_response

In [39]:
from tqdm.auto import tqdm

In [40]:
results = {}

In [43]:
for doc in tqdm(reviews): 
    doc_id = doc['appid']
    if doc_id in results:
        continue

    questions_raw = generate_questions(doc)
    questions = json.loads(questions_raw)
    results[doc_id] = questions['questions']

  0%|          | 0/200 [00:00<?, ?it/s]

In [44]:
final_results = []

for doc_id, reviews in results.items():
    for q in reviews:
        final_results.append((doc_id, q))

In [45]:
final_results[0]

('1086940',
 "What is Baldur's Gate 3 about and why should I consider it for my children?")

In [46]:
final_results[-1]

('2322010',
 'Why might the developers have imposed a VRAM limitation on the game, despite evidence that it runs well on certain systems?')

In [49]:
import json
import os

# Assuming final_results is defined as a list of dictionaries
# Example: final_results = [{'recommendationid': 1, 'question': 'What is the best game?'}, ...]
data_dir = os.path.abspath('../reviews-assistant/data/ground_truth')

# Define the output file path
output_file = os.path.join(data_dir, "ground-truth-retrieval.json")

# Save the results to a JSON file
with open(output_file, 'w', encoding='utf-8') as json_file:
    json.dump(final_results, json_file, ensure_ascii=False, indent=4)

print(f"Data saved to {output_file}")


Data saved to /mnt/c/Users/KonuTech/llm-zoomcamp-capstone-01/reviews-assistant/data/ground_truth/ground-truth-retrieval.json
