In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
2+2

4

In [3]:
from tqdm.auto import tqdm
import pandas as pd

In [4]:
import sys
import os
import json

# Construct the path to the scripts directory
script_path = os.path.abspath('../reviews-assistant/scripts')

# Add the path to sys.path
if script_path not in sys.path:
    sys.path.append(script_path)
    
import minsearch

In [5]:
# Directory containing the ground truth file
data_dir = os.path.abspath('../reviews-assistant/data/ground_truth')

# Path to the ground_truth_retrieval.json file
file_path = os.path.join(data_dir, 'ground_truth_retrieval.json')

# Check if the file exists
if os.path.exists(file_path):
    try:
        # Open and load the JSON file
        with open(file_path, 'r', encoding='utf-8') as jsonfile:
            ground_truth_data = json.load(jsonfile)  # Load the JSON data into a Python object

        # Ensure each item is a dictionary and contains 'id' and 'question'
        if all(isinstance(item, dict) and 'appid' in item and 'question' in item for item in ground_truth_data):
            print("Data successfully loaded and is in the correct format.")

            # Example: Print a sample of the data
            for item in ground_truth_data[:5]:  # Print the first 5 questions
                print(f"ID: {item['appid']}, Question: {item['question']}")
        else:
            print("Error: The data format is incorrect or missing required fields ('id', 'question').")
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
else:
    print(f"File {file_path} does not exist.")

Data successfully loaded and is in the correct format.
ID: 1817070, Question: What kind of violent content is present in Marvel's Spider-Man Remastered that I should be concerned about?
ID: 1817070, Question: Are there any graphic depictions of gore or blood in the gameplay of Marvel's Spider-Man Remastered?
ID: 1817070, Question: Does the game contain any sexual content or suggestive themes that could be inappropriate for children?
ID: 1817070, Question: Is there any use of profanity or offensive language found in Marvel's Spider-Man Remastered?
ID: 1817070, Question: Are there depictions of substance abuse, like drugs or alcohol, within the story or gameplay?


In [6]:
len(ground_truth_data)

210

In [7]:
# Directory containing the data files
data_dir = os.path.abspath('../reviews-assistant/data/reviews')

# Initialize an empty list to hold all reviews
reviews = []

# List objects in the directory
objects_in_directory = os.listdir(data_dir)

# Iterate over the files in the directory
for obj in objects_in_directory:
    if obj.endswith('.json'):  # Check if the file is a JSON file
        file_path = os.path.join(data_dir, obj)
        with open(file_path, 'r', encoding='utf-8') as jsonfile:
            # Load the reviews from the JSON file
            file_reviews = json.load(jsonfile)
            reviews.extend(file_reviews)  # Append reviews to the main list
# Print the first i reviews
i = 2  # Change this to print more reviews if needed
for review in reviews[:i]:
    print(f"Author ID: {review['author.steamid']}")
    print(f"Review: {review.get('review', 'No text')}")
    print(f"Timestamp Created: {review['timestamp_created']}")
    print("-" * 79)

Author ID: 76561198420943538
Review: ---{ Graphics }---
✅ You forget what reality is
☐ Beautiful
☐ Good
☐ Decent
☐ Bad
☐ You will get eye cancer
☐ Get a pepper spray for your eye instead

---{ Gameplay }---
☐ Won’t ever touch any other game anymore
✅ Very good
☐ Good
☐ It's just gameplay
☐ Mehh
☐ Watch paint dry instead
☐ Tic Tac toe is better

---{ Audio }---
☐ Eargasm
✅ Very good
☐ Good
☐ Not too bad
☐ Bad
☐ I'm now deaf

---{ Audience }---
☐ Kids
✅Teens
✅ Adults
☐ Grandma

---{ PC Requirements }---
☐ Check if you can run paint
☐ Potato
☐ Decent
✅ Fast
☐ Rich boi
☐ Ask NASA if they have a spare computer
☐ Search the galaxy for dark matter fuel to run

---{ Difficulty }---
☐ Just press 'W'
☐ Easy
✅ Easy to learn / Hard to master
☐ Significant brain usage
☐ Difficult
☐ Dark Souls

---{ Grind }---
☐ Nothing to grind
☐ Only if u care about leaderboards/ranks
✅ Isn't necessary to progress
☐ Average grind level
☐ Too much grind
☐ You'll need a second life for grinding

---{ Story }---
☐ No

In [8]:
len(reviews)

1784

In [9]:
index = minsearch.Index(
    # text_fields=["author.steamid", "author.playtimeforever", "author.playtime_last_two_weeks", "author.playtime_at_review", "author.last_played", "language", "review", "timestamp_created", "timestamp_updated"],
    text_fields=["title", "language", "review"],
    keyword_fields=["appid", "recommendationid"]
)

index.fit(reviews)

<minsearch.Index at 0x7ff9b0959420>

In [10]:
from openai import OpenAI

client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],  # this is also the default, it can be omitted
)

In [11]:
def build_prompt(query, search_results):
    answer_llm = ""

    if 'entry_template' in globals():  # Check if entry_template exists in global scope
        for doc in search_results:
            answer_llm += entry_template.format(**doc) + "\n\n"
    else:
        for doc in search_results:
            # Fallback formatting if entry_template is missing
            answer_llm += str(doc) + "\n\n"

    prompt = prompt_template.format(question=query, answer_llm=answer_llm).strip()
    return prompt

In [12]:
def llm(prompt, model='gpt-4o-mini'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [13]:
def search(query):
    boost = {'exercise_name': 1.5333100039172263,
  'type_of_activity': 1.5395670128097776,
  'type_of_equipment': 2.39211934690812,
  'body_part': 1.6244459844173096,
  'type': 2.7948598963438456,
  'muscle_groups_activated': 1.4303057412632778,
  'instructions': 1.46575166599529}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [14]:
def rag(query, model='gpt-4o-mini'):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    #print(prompt)
    answer = llm(prompt, model=model)
    return answer

In [15]:
prompt_template = """
You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: {question}
Generated Answer: {answer_llm}

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{{
  "relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "explanation": "[Provide a brief explanation for your evaluation]"
}}
""".strip()

In [16]:
# sample = ground_truth_data

In [17]:
# record['question']

In [18]:
# rag(question)

In [19]:
# answer_llm = rag(question)

In [21]:
evaluations = []

for record in tqdm(ground_truth_data):
    question = record['question']
    answer_llm = rag(question) 

    prompt = prompt_template.format(
        question=question,
        answer_llm=answer_llm
    )

    evaluation = llm(prompt)
    evaluation = json.loads(evaluation)

    evaluations.append((record, answer_llm, evaluation))

  0%|          | 0/210 [00:00<?, ?it/s]

In [22]:
evaluations[0][0]['question']

"What kind of violent content is present in Marvel's Spider-Man Remastered that I should be concerned about?"

In [23]:
evaluations[:1]

[({'appid': '1817070',
   'question': "What kind of violent content is present in Marvel's Spider-Man Remastered that I should be concerned about?"},
  '{\n  "relevance": "NON_RELEVANT",\n  "explanation": "The generated answer does not address the question about the types of violent content present in Marvel\'s Spider-Man Remastered. Instead, it focuses entirely on the game\'s positive aspects, such as graphics, story, and gameplay, without mentioning any concerns about violent content."\n}',
  {'relevance': 'NON_RELEVANT',
   'explanation': "The generated answer fails to address the specific inquiry regarding the types of violent content in Marvel's Spider-Man Remastered. Instead, it emphasizes the game's positive features, which does not fulfill the request for information on potentially concerning violent elements."})]

In [25]:
df_eval = pd.DataFrame(evaluations, columns=['record', 'answer', 'evaluation'])

df_eval['appid'] = df_eval.record.apply(lambda d: d['appid'])
df_eval['question'] = df_eval.record.apply(lambda d: d['question'])

df_eval['relevance'] = df_eval.evaluation.apply(lambda d: d['relevance'])
df_eval['explanation'] = df_eval.evaluation.apply(lambda d: d['explanation'])

del df_eval['record']
del df_eval['evaluation']

In [26]:
df_eval.relevance.value_counts()

relevance
NON_RELEVANT       165
RELEVANT            24
PARTLY_RELEVANT     21
Name: count, dtype: int64

In [27]:
df_eval.relevance.value_counts(normalize=True)

relevance
NON_RELEVANT       0.785714
RELEVANT           0.114286
PARTLY_RELEVANT    0.100000
Name: proportion, dtype: float64

In [28]:
import json
import os
import pandas as pd

# Sample DataFrame for demonstration
# df_eval = pd.DataFrame({'recommendationid': [1, 2], 'question': ['What is the best game?', 'What is the worst game?']})

data_dir = os.path.abspath('../reviews-assistant/data/ground_truth')

# Define the output file path
output_file = os.path.join(data_dir, "ground_truth_evaluation.json")

# Convert DataFrame to a list of dictionaries including index
df_eval_list = df_eval.reset_index().to_dict(orient='records')

# Save the results to a JSON file
with open(output_file, 'w', encoding='utf-8') as json_file:
    json.dump(df_eval_list, json_file, ensure_ascii=False, indent=4)

print(f"Data saved to {output_file}")

Data saved to /mnt/c/Users/KonuTech/llm-zoomcamp-capstone-01/reviews-assistant/data/ground_truth/ground_truth_evaluation.json


In [29]:
df_eval[df_eval["relevance"]=="RELEVANT"]

Unnamed: 0,answer,appid,question,relevance,explanation
1,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",1817070,Are there any graphic depictions of gore or bl...,RELEVANT,The generated answer correctly identifies that...
7,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",1817070,Does Marvel's Spider-Man Remastered contain ho...,RELEVANT,The generated answer accurately assesses that ...
21,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",2698940,Is there any graphic gore or blood featured in...,RELEVANT,The generated answer correctly assesses the re...
60,"{\n ""relevance"": ""RELEVANT"",\n ""explanation""...",304390,What kind of community reputation does the gam...,RELEVANT,The generated answer addresses the question ab...
61,"{\n ""relevance"": ""RELEVANT"",\n ""explanation""...",304390,Does FOR HONOR have any features that might pr...,RELEVANT,The generated answer directly addresses the qu...
72,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",2208920,Are there any explicit or suggestive sexual th...,RELEVANT,The generated answer directly addresses the qu...
105,"{\n ""relevance"": ""RELEVANT"",\n ""explanation""...",447040,Are there any elements of discrimination or of...,RELEVANT,The generated answer effectively addresses the...
109,"{\n ""relevance"": ""RELEVANT"",\n ""explanation""...",447040,Is there any criticism regarding the incorpora...,RELEVANT,The generated answer directly addresses the cr...
112,"{\n ""relevance"": ""RELEVANT"",\n ""explanation""...",2322010,What kind of practices does the reviewer say s...,RELEVANT,The generated answer directly responds to the ...
118,"{\n ""relevance"": ""RELEVANT"",\n ""explanation""...",2322010,Should I be concerned about in-game purchases ...,RELEVANT,The generated answer directly addresses the us...


In [30]:
df_eval[df_eval["relevance"]=="NON_RELEVANT"]

Unnamed: 0,answer,appid,question,relevance,explanation
0,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",1817070,What kind of violent content is present in Mar...,NON_RELEVANT,The generated answer fails to address the spec...
3,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",1817070,Is there any use of profanity or offensive lan...,NON_RELEVANT,The generated answer does not contain any info...
4,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",1817070,"Are there depictions of substance abuse, like ...",NON_RELEVANT,The generated answer does not address the ques...
5,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",1817070,Does the game feature any elements of discrimi...,NON_RELEVANT,The generated answer does not address the ques...
6,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",1817070,Is there any gambling or loot box mechanics pr...,NON_RELEVANT,The generated answer fails to address the spec...
...,...,...,...,...,...
205,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",1832040,Is there any discrimination or discriminatory ...,NON_RELEVANT,The generated answer does not address the spec...
206,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",1832040,Does Flintlock: The Siege of Dawn include any ...,NON_RELEVANT,The generated answer fails to address the spec...
207,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",1832040,Are there any horror or disturbing imagery asp...,NON_RELEVANT,The generated answer does not address the spec...
208,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",1832040,Does Flintlock: The Siege of Dawn have any fea...,NON_RELEVANT,The generated answer does not address the ques...


In [31]:
df_eval[df_eval["appid"]=="2322010"]

Unnamed: 0,answer,appid,question,relevance,explanation
110,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",2322010,What does the review mean by using a mod to by...,NON_RELEVANT,The generated answer does not address the spec...
111,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",2322010,Why do some games require separate accounts fo...,NON_RELEVANT,The generated answer fails to address the ques...
112,"{\n ""relevance"": ""RELEVANT"",\n ""explanation""...",2322010,What kind of practices does the reviewer say s...,RELEVANT,The generated answer directly responds to the ...
113,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",2322010,"Is God of War: Ragnarok known for its content,...",NON_RELEVANT,The generated answer fails to address the main...
114,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",2322010,Does this game involve any graphic depictions ...,NON_RELEVANT,The generated answer fails to address the spec...
115,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",2322010,Are there any offensive words or strong langua...,NON_RELEVANT,The generated answer does not address the spec...
116,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",2322010,Can you explain the types of content that coul...,NON_RELEVANT,The generated answer does not address the ques...
117,"{\n ""relevance"": ""NON_RELEVANT"",\n ""explanat...",2322010,Does the requirement for separate accounts imp...,NON_RELEVANT,The generated answer discusses reviews of the ...
118,"{\n ""relevance"": ""RELEVANT"",\n ""explanation""...",2322010,Should I be concerned about in-game purchases ...,RELEVANT,The generated answer directly addresses the us...
119,"{\n ""relevance"": ""PARTLY_RELEVANT"",\n ""expla...",2322010,Does God of War: Ragnarok incorporate any cont...,PARTLY_RELEVANT,The generated answer mentions criticisms relat...


In [32]:
df_eval.iloc[8:9]

Unnamed: 0,answer,appid,question,relevance,explanation
8,"{\n ""relevance"": ""PARTLY_RELEVANT"",\n ""expla...",1817070,Are there any mechanisms in the game that enco...,PARTLY_RELEVANT,The generated answer provides some insight int...


In [None]:
df_eval.iloc[8:9]["question"]