In [1]:
# https://github.com/DataTalksClub/llm-zoomcamp/blob/main/03-vector-search/eval/evaluation-metrics.md

In [2]:
2+2

4

In [3]:
import pandas as pd
from tqdm.auto import tqdm
import json
import sys
import os
from dotenv import load_dotenv
# Load environment variables from .env
load_dotenv("/home/jovyan/.envrc")

True

In [4]:
from elasticsearch import Elasticsearch

# Create an instance of the Elasticsearch client
es = Elasticsearch(['http://elasticsearch:9200'])

# Check if the connection is established
if es.ping():
    print("Elasticsearch is connected!")
else:
    print("Elasticsearch is not reachable.")

Elasticsearch is connected!


In [5]:
from sentence_transformers import SentenceTransformer

In [6]:
from elasticsearch import Elasticsearch

In [7]:
# # Directory containing the data files
# data_dir = os.path.abspath('../reviews-assistant/data/reviews')

# # Initialize an empty list to hold all reviews
# reviews = []

# # List objects in the directory
# objects_in_directory = os.listdir(data_dir)

# # Iterate over the files in the directory
# for obj in objects_in_directory:
#     if obj.endswith('.json'):  # Check if the file is a JSON file
#         file_path = os.path.join(data_dir, obj)
#         with open(file_path, 'r', encoding='utf-8') as jsonfile:
#             # Load the reviews from the JSON file
#             file_reviews = json.load(jsonfile)
#             reviews.extend(file_reviews)  # Append reviews to the main list
# # Print the first i reviews
# i = 2  # Change this to print more reviews if needed
# for review in reviews[:i]:
#     print(f"Author ID: {review['author.steamid']}")
#     print(f"Review: {review.get('review', 'No text')}")
#     print(f"Timestamp Created: {review['timestamp_created']}")
#     print("-" * 79)

In [8]:
# len(reviews)

In [9]:
# reviews[1:2]

In [10]:
model_name = 'multi-qa-MiniLM-L6-cos-v1'
model = SentenceTransformer(model_name)



In [11]:
# for doc in tqdm(reviews):
#     title = doc["title"]
#     review = doc['review']
#     tr = title + ' ' + review

#     doc['title_vector'] = model.encode(title)
#     doc['review_vector'] = model.encode(review)
#     doc['title_review_vector'] = model.encode(tr)

In [12]:
# doc.keys()

In [13]:
from elasticsearch import Elasticsearch, ConnectionError

try:
    es = Elasticsearch(['http://elasticsearch:9200'])  # Use the service name
    if es.ping():
        print("Connected to Elasticsearch")
    else:
        print("Could not connect to Elasticsearch")
except ConnectionError as e:
    print(f"Connection error: {e}")


Connected to Elasticsearch


In [14]:
from elasticsearch import Elasticsearch, NotFoundError, ConnectionError
from tqdm import tqdm

class ReviewIndexer:
    def __init__(self, es_host='http://elasticsearch:9200', index_name='steam-reviews', model=None):
        self.es = Elasticsearch([es_host])
        self.index_name = index_name
        self.model = model  # Expecting a model to encode text
        
        # Check the connection upon initialization
        self.check_connection()
        
        self.index_settings = {
            "settings": {
                "number_of_shards": 1,
                "number_of_replicas": 0
            },
            "mappings": {
                "properties": {
                    "appid": {"type": "keyword"},
                    "timestamp_query": {"type": "integer"},
                    "title": {"type": "keyword"},
                    "recommendationid": {"type": "keyword"},  # Changed to keyword for consistency
                    "author.steamid": {"type": "keyword"},
                    "author.playtimeforever": {"type": "integer"},
                    "author.playtime_last_two_weeks": {"type": "integer"},
                    "author.playtime_at_review": {"type": "integer"},
                    "author.last_played": {"type": "integer"},
                    "language": {"type": "keyword"},
                    "review": {"type": "text"},
                    "voted_up": {"type": "boolean"},  # Changed to boolean for consistency
                    "votes_up": {"type": "integer"},
                    "timestamp_created": {"type": "integer"},
                    "timestamp_updated": {"type": "integer"},
                    "title_vector": {
                        "type": "dense_vector",
                        "dims": 384,
                        "index": True,
                        "similarity": "cosine"
                    },
                    "review_vector": {
                        "type": "dense_vector",
                        "dims": 384,
                        "index": True,
                        "similarity": "cosine"
                    },
                    "title_review_vector": {
                        "type": "dense_vector",
                        "dims": 384,
                        "index": True,
                        "similarity": "cosine"
                    },
                }
            }
        }

    def check_connection(self):
        try:
            if not self.es.ping():
                raise ConnectionError("Elasticsearch cluster is down!")
            print("Successfully connected to Elasticsearch.")
        except ConnectionError as e:
            print(f"Connection error: {e}")
            raise

    def create_index(self):
        try:
            self.es.indices.delete(index=self.index_name, ignore_unavailable=True)
            print(f"Deleted index: {self.index_name} (if it existed)")
        except NotFoundError:
            print(f"Index {self.index_name} does not exist, nothing to delete.")
        except ConnectionError as e:
            print(f"Error deleting index: {e}")

        try:
            self.es.indices.create(index=self.index_name, body=self.index_settings)
            print(f"Created index: {self.index_name}")
        except ConnectionError as e:
            print(f"Error creating index: {e}")

    def prepare_and_index_documents(self, reviews):
        """
        Prepare documents by encoding and indexing them into Elasticsearch.
        
        :param reviews: List of review documents to be indexed.
        """
        for doc in tqdm(reviews):
            # Handle None values for numerical fields
            doc['author.playtimeforever'] = doc.get('author.playtimeforever', 0) or 0
            doc['author.playtime_last_two_weeks'] = doc.get('author.playtime_last_two_weeks', 0) or 0
            doc['author.playtime_at_review'] = doc.get('author.playtime_at_review', 0) or 0
            doc['author.last_played'] = doc.get('author.last_played', 0) or 0
            doc['votes_up'] = doc.get('votes_up', 0) or 0
            doc['timestamp_created'] = doc.get('timestamp_created', 0) or 0
            doc['timestamp_updated'] = doc.get('timestamp_updated', 0) or 0

            # Encode title and review if a model is provided
            if self.model:
                title = doc["title"]
                review = doc['review']
                tr = title + ' ' + review

                # Encode the text and convert to lists
                doc['title_vector'] = self.model.encode(title).tolist()
                doc['review_vector'] = self.model.encode(review).tolist()
                doc['title_review_vector'] = self.model.encode(tr).tolist()
            else:
                # Placeholder if no model is provided
                doc['title_vector'] = []
                doc['review_vector'] = []
                doc['title_review_vector'] = []

            # Index the document
            try:
                self.es.index(index=self.index_name, document=doc)
            except Exception as e:
                print(f"Failed to index document {doc}: {e}")

    def fetch_first_documents(self, size=10):
        body = {
            "query": {
                "match_all": {}
            },
            "size": size
        }

        try:
            response = self.es.search(index=self.index_name, body=body)
            return response['hits']['hits']
        except NotFoundError:
            print(f"Index {self.index_name} not found.")
        except ConnectionError as e:
            print(f"Error searching index: {e}")
        except Exception as e:
            print(f"An error occurred: {e}")
        return None

    def query_reviews(self, title, query, k=5, num_candidates=10000):
        """
        Query reviews using both KNN and keyword search.
    
        :param title: The title of the game to filter results.
        :param query: The natural language query for searching.
        :param k: The number of nearest neighbors to retrieve.
        :param num_candidates: Number of candidates for KNN search.
        :return: The combined search results from KNN and keyword queries.
        """
        # Encode the query
        v_q = self.model.encode(query).tolist()  # Ensure it is a list
    
        # KNN Query
        knn_query = {
            "field": "review_vector",
            "query_vector": v_q,
            "k": k,
            "num_candidates": num_candidates,
            "boost": 0.5,
            "filter": {
                "term": {
                    "title": title  # Using title for filtering
                }
            }
        }
    
        # Keyword Query
        keyword_query = {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["review^3", "title"],
                        "type": "best_fields",
                        "boost": 0.5,
                    }
                },
                "filter": {
                    "term": {
                        "title": title  # Using title for filtering
                    }
                }
            }
        }
    
        # Execute the keyword query
        keyword_results = []
        try:
            keyword_response = self.es.search(index=self.index_name, body={"query": keyword_query}, size=k)
            keyword_results = keyword_response["hits"]["hits"]
        except Exception as e:
            print(f"An error occurred during keyword search: {e}")
    
        # Execute the KNN query
        knn_results = []
        try:
            knn_response = self.es.search(index=self.index_name, body={"knn": knn_query}, size=k)
            knn_results = knn_response["hits"]["hits"]
        except Exception as e:
            print(f"An error occurred during KNN search: {e}")
    
        # Combine results
        combined_results = keyword_results + knn_results  # You may want to deduplicate or sort these results
    
        return combined_results

    def query_reviews_hybrid(self, title, query, k=5, num_candidates=10000, vector_field="review_vector"):
        """
        Query reviews using both KNN and keyword search with an option to select the vector field.
    
        :param title: The title of the game to filter results.
        :param query: The natural language query for searching.
        :param k: The number of nearest neighbors to retrieve.
        :param num_candidates: Number of candidates for KNN search.
        :param vector_field: The field to use for KNN vector search. Can be 'review_vector', 'title_vector', or 'title_review_vector'.
        :return: The combined search results from KNN and keyword queries.
        """
        
        # Validate the vector field
        if vector_field not in ["review_vector", "title_vector", "title_review_vector"]:
            raise ValueError(f"Invalid vector field: {vector_field}. Must be 'review_vector', 'title_vector', or 'title_review_vector'.")
        
        # Encode the query
        v_q = self.model.encode(query).tolist()  # Ensure it is a list
    
        # KNN Query
        knn_query = {
            "field": vector_field,  # Use the selected vector field
            "query_vector": v_q,
            "k": k,
            "num_candidates": num_candidates,
            "boost": 0.5,
            "filter": {
                "term": {
                    "title": title  # Using title for filtering
                }
            }
        }
    
        # Keyword Query
        keyword_query = {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["review^3", "title"],
                        "type": "best_fields",
                        "boost": 0.5,
                    }
                },
                "filter": {
                    "term": {
                        "title": title  # Using title for filtering
                    }
                }
            }
        }
    
        # Execute the keyword query
        keyword_results = []
        try:
            keyword_response = self.es.search(index=self.index_name, body={"query": keyword_query}, size=k)
            keyword_results = keyword_response["hits"]["hits"]
        except Exception as e:
            print(f"An error occurred during keyword search: {e}")
    
        # Execute the KNN query
        knn_results = []
        try:
            knn_response = self.es.search(index=self.index_name, body={"knn": knn_query}, size=k)
            knn_results = knn_response["hits"]["hits"]
        except Exception as e:
            print(f"An error occurred during KNN search: {e}")
    
        # Combine results
        combined_results = keyword_results + knn_results  # You may want to deduplicate or sort these results
    
        return combined_results


    def get_distinct_titles(self):
        """
        Get distinct titles from the Elasticsearch index.
    
        :return: A list of distinct titles.
        """
        try:
            # Define the aggregation query
            aggs = {
                "distinct_titles": {
                    "terms": {
                        "field": "title.keyword",  # Use ".keyword" for exact matches on keyword fields
                        "size": 1000  # Adjust this size as needed
                    }
                }
            }
    
            # Execute the aggregation query
            response = self.es.search(index=self.index_name, body={"size": 0, "aggs": aggs})
            distinct_titles = response['aggregations']['distinct_titles']['buckets']
    
            # Extract titles from the buckets
            titles_list = [bucket['key'] for bucket in distinct_titles]
            return titles_list
    
        except Exception as e:
            print(f"An error occurred while fetching distinct titles: {e}")
            return []


# Usage example
# Assuming you have a list of reviews and a model for encoding
indexer = ReviewIndexer(model=model)  # Pass your model here
indexer.create_index()  # Ensure the index is created first

# Sample input for indexing
# reviews = [
#     {
#         'appid': '721180',
#         'timestamp_query': 1727608376,
#         'title': 'Dustborn',
#         'recommendationid': '173934401',
#         'author.steamid': '76561198028269344',
#         'author.playtimeforever': None,
#         'author.playtime_last_two_weeks': 0,
#         'author.playtime_at_review': 90,
#         'author.last_played': 1725140143,
#         'language': 'english',
#         'review': 'I went into this with an open mind...',
#         'voted_up': False,
#         'votes_up': 401,
#         'timestamp_created': 1725142580,
#         'timestamp_updated': 1725142580,
#         'title_vector': [],  # Initial placeholders
#         'review_vector': [],
#         'title_review_vector': []
#     }
# ]

# # Prepare and index documents
# indexer.prepare_and_index_documents(reviews)

# # Fetch and print the first documents after indexing
# first_documents = indexer.fetch_first_documents(size=10)
# if first_documents:
#     for doc in first_documents:
#         print(doc['_source'])  # Print the document details


Successfully connected to Elasticsearch.
Deleted index: steam-reviews (if it existed)
Created index: steam-reviews


In [15]:
# Directory containing the data files
data_dir = os.path.abspath('../reviews-assistant/data/reviews')

# Initialize an empty list to hold all reviews
reviews = []

# List objects in the directory
objects_in_directory = os.listdir(data_dir)

# Iterate over the files in the directory
for obj in objects_in_directory:
    if obj.endswith('.json'):  # Check if the file is a JSON file
        file_path = os.path.join(data_dir, obj)
        with open(file_path, 'r', encoding='utf-8') as jsonfile:
            # Load the reviews from the JSON file
            file_reviews = json.load(jsonfile)
            reviews.extend(file_reviews)  # Append reviews to the main list
# Print the first i reviews
i = 2  # Change this to print more reviews if needed
for review in reviews[:i]:
    print(f"Author ID: {review['author.steamid']}")
    print(f"Review: {review.get('review', 'No text')}")
    print(f"Timestamp Created: {review['timestamp_created']}")
    print("-" * 79)

Author ID: 76561198420943538
Review: ---{ Graphics }---
✅ You forget what reality is
☐ Beautiful
☐ Good
☐ Decent
☐ Bad
☐ You will get eye cancer
☐ Get a pepper spray for your eye instead

---{ Gameplay }---
☐ Won’t ever touch any other game anymore
✅ Very good
☐ Good
☐ It's just gameplay
☐ Mehh
☐ Watch paint dry instead
☐ Tic Tac toe is better

---{ Audio }---
☐ Eargasm
✅ Very good
☐ Good
☐ Not too bad
☐ Bad
☐ I'm now deaf

---{ Audience }---
☐ Kids
✅Teens
✅ Adults
☐ Grandma

---{ PC Requirements }---
☐ Check if you can run paint
☐ Potato
☐ Decent
✅ Fast
☐ Rich boi
☐ Ask NASA if they have a spare computer
☐ Search the galaxy for dark matter fuel to run

---{ Difficulty }---
☐ Just press 'W'
☐ Easy
✅ Easy to learn / Hard to master
☐ Significant brain usage
☐ Difficult
☐ Dark Souls

---{ Grind }---
☐ Nothing to grind
☐ Only if u care about leaderboards/ranks
✅ Isn't necessary to progress
☐ Average grind level
☐ Too much grind
☐ You'll need a second life for grinding

---{ Story }---
☐ No

In [16]:
len(reviews)

1782

In [17]:
# Prepare and index documents
indexer.prepare_and_index_documents(reviews)

100%|██████████| 1782/1782 [02:45<00:00, 10.79it/s]


In [18]:
# print(doc['_source'].keys())

In [19]:
# Fetch and print the first documents after indexing
# first_documents = indexer.fetch_first_documents(size=10)
# if first_documents:
#     for doc in first_documents:
#         print(doc['_source'])  # Print the document details

In [20]:
# Create an instance of ReviewIndexer
indexer = ReviewIndexer()

# Get distinct titles
distinct_titles = indexer.get_distinct_titles()

# Print distinct titles
for title in distinct_titles:
    print(title)

Successfully connected to Elasticsearch.


In [21]:
# Usage example
indexer = ReviewIndexer(model=model)  # Ensure your model is passed here
# indexer.create_index()  # Ensure the index is created first

# Sample input for querying
title = "God of War: Ragnarok"  # Example game title
query = 'Is God of War: Ragnarok a game for kids?'

# Perform the query
results = indexer.query_reviews(title, query)

# Convert the results to a pandas DataFrame
if results:  # Check if results are not empty
    # Extract the source data from the Elasticsearch response
    data = [doc['_source'] for doc in results]
    
    # Create a DataFrame
    df_results = pd.DataFrame(data)
    
    # Set pandas options to display full content of the 'review' column
    # pd.set_option('display.max_colwidth', None)  # or use a larger number to set a specific width
    # pd.set_option('display.max_rows', None)  # Show all rows if needed
    pd.reset_option('all')

    # Display the DataFrame
    # print(df_results.head())  # or use df_results to view the entire DataFrame
else:
    print("No results found.")

Successfully connected to Elasticsearch.


  keyword_response = self.es.search(index=self.index_name, body={"query": keyword_query}, size=k)
  knn_response = self.es.search(index=self.index_name, body={"knn": knn_query}, size=k)
  pd.reset_option('all')


In [22]:
df_results

Unnamed: 0,appid,timestamp_query,title,recommendationid,author.steamid,author.playtimeforever,author.playtime_last_two_weeks,author.playtime_at_review,author.last_played,language,review,voted_up,votes_up,timestamp_created,timestamp_updated,title_vector,review_vector,title_review_vector
0,2322010,1727608376,God of War: Ragnarok,175347860,76561198354466742,0,827,250,1727081833,english,"to God of War: Ragnarok Dev. Team,\n\nI’m writ...",True,989,1726803482,1726816251,"[-0.030018333345651627, 0.03668512776494026, -...","[0.07209023833274841, -0.03378300368785858, -0...","[0.07215426117181778, -0.027821576222777367, -..."
1,2322010,1727608376,God of War: Ragnarok,175313650,76561198053124774,0,80,72,1727116275,english,God of War Ragnarok could have been an unforge...,False,1080,1726761245,1726884791,"[-0.030018333345651627, 0.03668512776494026, -...","[-0.027094047516584396, -0.07563106715679169, ...","[-0.029218800365924835, -0.06694116443395615, ..."
2,2322010,1727608376,God of War: Ragnarok,175731340,76561199237205683,0,3321,2062,1727606415,english,"If you played God of War (2018) and liked it, ...",True,11,1727249712,1727249712,"[-0.030018333345651627, 0.03668512776494026, -...","[0.011011939495801926, -0.039762064814567566, ...","[0.018289970234036446, -0.027919186279177666, ..."
3,2322010,1727608376,God of War: Ragnarok,175635671,76561199170479681,0,1555,303,1727565301,english,Why is this 100% single player experience forc...,False,261,1727120061,1727217923,"[-0.030018333345651627, 0.03668512776494026, -...","[0.009475771337747574, -0.08857275545597076, 0...","[0.02115711197257042, -0.08730130642652512, 0...."
4,2322010,1727608376,God of War: Ragnarok,175320197,76561198342881763,0,1011,60,1727471180,english,"The days of searching for ""Spider-Man PC, Unch...",True,401,1726767566,1726767566,"[-0.030018333345651627, 0.03668512776494026, -...","[-0.05745435878634453, -0.029408765956759453, ...","[-0.049518052488565445, -0.01963861472904682, ..."
5,2322010,1727608376,God of War: Ragnarok,175731340,76561199237205683,0,3321,2062,1727606415,english,"If you played God of War (2018) and liked it, ...",True,11,1727249712,1727249712,"[-0.030018333345651627, 0.03668512776494026, -...","[0.011011939495801926, -0.039762064814567566, ...","[0.018289970234036446, -0.027919186279177666, ..."
6,2322010,1727608376,God of War: Ragnarok,175310996,76561198124407909,0,4221,6,1727449949,english,"[h1]A Great Sequel, Held Back by Woke[/h1]\n\n...",True,384,1726758795,1726764427,"[-0.030018333345651627, 0.03668512776494026, -...","[-0.035042811185121536, -0.041156742721796036,...","[-0.02929731272161007, -0.034129079431295395, ..."
7,2322010,1727608376,God of War: Ragnarok,175699724,76561198089929485,0,2681,2605,1727214509,english,after hearing so many negative things about ra...,True,19,1727204312,1727204312,"[-0.030018333345651627, 0.03668512776494026, -...","[-0.021168339997529984, -0.013783842325210571,...","[-0.00749595882371068, 0.014234294183552265, 0..."
8,2322010,1727608376,God of War: Ragnarok,175324218,76561198191868681,0,695,68,1727606965,english,"Loved the first God of War, played it multiple...",True,3,1726771543,1726772222,"[-0.030018333345651627, 0.03668512776494026, -...","[0.05148572474718094, -0.01094889733940363, -5...","[0.02400249056518078, -0.0324452705681324, -0...."
9,2322010,1727608376,God of War: Ragnarok,175736674,76561197984102076,0,568,568,1727002220,english,a children's game for little babies. every puz...,False,13,1727259027,1727259092,"[-0.030018333345651627, 0.03668512776494026, -...","[-0.05048228055238724, -0.016842171549797058, ...","[-0.03342920169234276, -0.018790561705827713, ..."


In [23]:
# Usage example
indexer = ReviewIndexer(model=model)  # Ensure your model is passed here
# indexer.create_index()  # Ensure the index is created first

# Sample input for querying
title = "God of War: Ragnarok"  # Example game title
query = 'Is God of War: Ragnarok a game for kids?'

# Perform the hybrid query using title_review_vector
results = indexer.query_reviews_hybrid(title, query, vector_field="title_review_vector")

# Convert the results to a pandas DataFrame
if results:  # Check if results are not empty
    # Extract the source data from the Elasticsearch response
    data = [doc['_source'] for doc in results]
    
    # Create a DataFrame
    df_results = pd.DataFrame(data)
    
    # Set pandas options to display full content of the 'review' column
    # pd.set_option('display.max_colwidth', None)  # or use a larger number to set a specific width
    # pd.set_option('display.max_rows', None)  # Show all rows if needed
    pd.reset_option('all')

    # Display the DataFrame
    # print(df_results.head())  # or use df_results to view the entire DataFrame
else:
    print("No results found.")

Successfully connected to Elasticsearch.


  keyword_response = self.es.search(index=self.index_name, body={"query": keyword_query}, size=k)
  knn_response = self.es.search(index=self.index_name, body={"knn": knn_query}, size=k)
  pd.reset_option('all')


In [24]:
df_results

Unnamed: 0,appid,timestamp_query,title,recommendationid,author.steamid,author.playtimeforever,author.playtime_last_two_weeks,author.playtime_at_review,author.last_played,language,review,voted_up,votes_up,timestamp_created,timestamp_updated,title_vector,review_vector,title_review_vector
0,2322010,1727608376,God of War: Ragnarok,175347860,76561198354466742,0,827,250,1727081833,english,"to God of War: Ragnarok Dev. Team,\n\nI’m writ...",True,989,1726803482,1726816251,"[-0.030018333345651627, 0.03668512776494026, -...","[0.07209023833274841, -0.03378300368785858, -0...","[0.07215426117181778, -0.027821576222777367, -..."
1,2322010,1727608376,God of War: Ragnarok,175313650,76561198053124774,0,80,72,1727116275,english,God of War Ragnarok could have been an unforge...,False,1080,1726761245,1726884791,"[-0.030018333345651627, 0.03668512776494026, -...","[-0.027094047516584396, -0.07563106715679169, ...","[-0.029218800365924835, -0.06694116443395615, ..."
2,2322010,1727608376,God of War: Ragnarok,175731340,76561199237205683,0,3321,2062,1727606415,english,"If you played God of War (2018) and liked it, ...",True,11,1727249712,1727249712,"[-0.030018333345651627, 0.03668512776494026, -...","[0.011011939495801926, -0.039762064814567566, ...","[0.018289970234036446, -0.027919186279177666, ..."
3,2322010,1727608376,God of War: Ragnarok,175635671,76561199170479681,0,1555,303,1727565301,english,Why is this 100% single player experience forc...,False,261,1727120061,1727217923,"[-0.030018333345651627, 0.03668512776494026, -...","[0.009475771337747574, -0.08857275545597076, 0...","[0.02115711197257042, -0.08730130642652512, 0...."
4,2322010,1727608376,God of War: Ragnarok,175320197,76561198342881763,0,1011,60,1727471180,english,"The days of searching for ""Spider-Man PC, Unch...",True,401,1726767566,1726767566,"[-0.030018333345651627, 0.03668512776494026, -...","[-0.05745435878634453, -0.029408765956759453, ...","[-0.049518052488565445, -0.01963861472904682, ..."
5,2322010,1727608376,God of War: Ragnarok,175736674,76561197984102076,0,568,568,1727002220,english,a children's game for little babies. every puz...,False,13,1727259027,1727259092,"[-0.030018333345651627, 0.03668512776494026, -...","[-0.05048228055238724, -0.016842171549797058, ...","[-0.03342920169234276, -0.018790561705827713, ..."
6,2322010,1727608376,God of War: Ragnarok,175723966,76561198032937507,0,958,667,1727565176,english,absolutely love this game! 10x better than the...,True,11,1727236740,1727236740,"[-0.030018333345651627, 0.03668512776494026, -...","[-0.05623815953731537, 0.037968963384628296, 0...","[-0.04380227252840996, 0.007600828073918819, -..."
7,2322010,1727608376,God of War: Ragnarok,175324218,76561198191868681,0,695,68,1727606965,english,"Loved the first God of War, played it multiple...",True,3,1726771543,1726772222,"[-0.030018333345651627, 0.03668512776494026, -...","[0.05148572474718094, -0.01094889733940363, -5...","[0.02400249056518078, -0.0324452705681324, -0...."
8,2322010,1727608376,God of War: Ragnarok,175692493,76561198329851243,0,2684,2461,1727287811,english,"This is more then just a game, it's an experie...",True,12,1727196752,1727196752,"[-0.030018333345651627, 0.03668512776494026, -...","[-0.0069306050427258015, -0.038518667221069336...","[-0.01866106316447258, -0.05488935485482216, -..."
9,2322010,1727608376,God of War: Ragnarok,175697832,76561198263367035,0,3136,2494,1727477460,english,This is a great game. Everything is perfect ex...,False,17,1727202169,1727202169,"[-0.030018333345651627, 0.03668512776494026, -...","[-0.03265281021595001, -0.031750183552503586, ...","[-0.06248420476913452, -0.029605818912386894, ..."


In [25]:
import json
import os

# Define the data directory path
data_dir = os.path.abspath('../reviews-assistant/data/ground_truth')

# Define the input file path
input_file = os.path.join(data_dir, "ground_truth_retrieval.json")

# Check if the file exists before attempting to read
if os.path.exists(input_file):
    # Read the results from the JSON file
    with open(input_file, 'r', encoding='utf-8') as json_file:
        ground_truth = json.load(json_file)  # Load the JSON data into a Python object

    # Now ground_truth contains the data from ground_truth_retrieval.json
    print(f"Data loaded from {input_file}")
    print(f"Number of entries: {len(ground_truth)}")
    # Optionally display the first few records
    print(ground_truth[:5])  # Display the first 5 entries
else:
    print(f"File {input_file} does not exist!")

Data loaded from /home/jovyan/reviews-assistant/data/ground_truth/ground_truth_retrieval.json
Number of entries: 105
[{'appid': '2239550', 'question': 'What are the content themes present in Watch Dogs: Legion that I should be aware of?'}, {'appid': '2239550', 'question': 'Does Watch Dogs: Legion contain any excessive violence or graphic depictions?'}, {'appid': '2239550', 'question': 'Are there any instances of strong language or profanity in Watch Dogs: Legion?'}, {'appid': '2239550', 'question': 'Does Watch Dogs: Legion include any substance abuse or gambling mechanics?'}, {'appid': '2239550', 'question': 'Is there any disturbing imagery or horror elements in Watch Dogs: Legion?'}]


In [26]:
ground_truth[:2]

[{'appid': '2239550',
  'question': 'What are the content themes present in Watch Dogs: Legion that I should be aware of?'},
 {'appid': '2239550',
  'question': 'Does Watch Dogs: Legion contain any excessive violence or graphic depictions?'}]

In [45]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)

In [46]:
def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

In [151]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)

def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

In [152]:
def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score += 1 / (rank + 1)
                break  # Stop after finding the first relevant document

    return total_score / len(relevance_total)


In [153]:
def elastic_search_hybrid(field, query, vector, title):

    index_name='steam-reviews'
    
    knn_query = {
        "field": field,  # The vector field to search (e.g., "title_review_vector")
        "query_vector": vector,
        "k": 5,
        "num_candidates": 10000,
        "boost": 0.5,
        "filter": {
            "term": {
                "title": title  # Filter by "title" (game title)
            }
        }
    }

    keyword_query = {
        "bool": {
            "must": {
                "multi_match": {
                    "query": query,
                    "fields": ["review^3", "title"],  # Replace with relevant fields
                    "type": "best_fields",
                    "boost": 0.5,
                }
            },
            "filter": {
                "term": {
                    "title": title  # Filter by "title" (game title)
                }
            }
        }
    }

    search_query = {
        "knn": knn_query,
        "query": keyword_query,
        "size": 5,
        "_source": ["review", "title", "appid"]  # Fields to return
    }

    es_results = es.search(
        index=index_name,
        body=search_query
    )
    
    result_docs = []
    
    for hit in es_results['hits']['hits']:
        result_docs.append(hit['_source'])

    return result_docs

In [154]:
def elastic_search_hybrid(field, query, vector, title):
    index_name = 'steam-reviews'

    # KNN Query
    knn_query = {
        "field": field,  # The vector field to search (e.g., "title_review_vector")
        "query_vector": vector,
        "k": 5,
        "num_candidates": 10000,
        "boost": 0.5
    }

    # Keyword Query
    keyword_query = {
        "bool": {
            "must": [
                {
                    "multi_match": {
                        "query": query,
                        "fields": ["review^3", "title"],  # Fields for keyword search
                        "type": "best_fields",
                        "boost": 0.5
                    }
                }
            ],
            "filter": {
                "term": {
                    "title": title  # Filter by game title
                }
            }
        }
    }

    # Execute the KNN search first
    knn_search_query = {
        "knn": knn_query,
        "_source": ["review", "title", "appid"],  # Fields to return
        "size": 5
    }

    try:
        knn_results = es.search(
            index=index_name,
            body=knn_search_query
        )
        knn_docs = [hit['_source'] for hit in knn_results['hits']['hits']]
    except Exception as e:
        print(f"An error occurred during KNN search: {e}")
        knn_docs = []

    # Execute the keyword search
    keyword_search_query = {
        "query": keyword_query,
        "_source": ["review", "title", "appid"],  # Fields to return
        "size": 5
    }

    try:
        keyword_results = es.search(
            index=index_name,
            body=keyword_search_query
        )
        keyword_docs = [hit['_source'] for hit in keyword_results['hits']['hits']]
    except Exception as e:
        print(f"An error occurred during keyword search: {e}")
        keyword_docs = []

    # Combine the results from both queries (KNN and keyword)
    combined_results = knn_docs + keyword_docs

    return combined_results


In [155]:
def elastic_search_hybrid(field, query, vector, appid):

    index_name='steam-reviews'
    
    knn_query = {
        "field": field,
        "query_vector": vector,
        "k": 5,
        "num_candidates": 10000,
        "boost": 0.5,
        "filter": {
            "term": {
                "appid": appid
            }
        }
    }

    keyword_query = {
        "bool": {
            "must": {
                "multi_match": {
                    "query": query,
                    "fields": ["question", "appid"],
                    "type": "best_fields",
                    "boost": 0.5,
                }
            },
            "filter": {
                "term": {
                    "appid": appid
                }
            }
        }
    }

    # reviews:
    search_query = {
        "knn": knn_query,
        "query": keyword_query,
        "size": 5,
        "_source": ["appid",
        "timestamp_query",
        "title",
        "recommendationid",
        "author.steamid",
        "author.playtimeforever",
        "author.playtime_last_two_weeks",
        "author.playtime_at_review",
        "author.last_played",
        "language",
        "review",
        "voted_up",
        "votes_up",
        "timestamp_created",
        "timestamp_updated"
        ]
    }

    es_results = es.search(
        index=index_name,
        body=search_query
    )
    
    result_docs = []
    
    for hit in es_results['hits']['hits']:
        result_docs.append(hit['_source'])

    return result_docs

In [156]:
def question_hybrid(q):
    question = q['question']
    appid = q['appid']

    v_q = model.encode(question)

    return elastic_search_hybrid('review_vector', question, v_q, appid)

In [157]:
def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['question']
        results = search_function(q)
        relevance = [d['appid'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [158]:
def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        print(q)
        
        # doc_id = q['question']
        doc_id = q['appid']
        
        results = search_function(q)
        print(results[-1])
        
        relevance = [d['appid'] == doc_id for d in results]
        print(relevance[-1])
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [159]:
evaluate(ground_truth, question_hybrid)

  5%|▍         | 5/105 [00:00<00:02, 45.32it/s]

{'appid': '2239550', 'question': 'What are the content themes present in Watch Dogs: Legion that I should be aware of?'}
{'appid': '2239550', 'timestamp_query': 1727608376, 'title': 'Watch Dogs: Legion', 'recommendationid': '175787501', 'author.steamid': '76561199140202822', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 876, 'author.playtime_at_review': 2436, 'author.last_played': 1727320474, 'language': 'english', 'review': "Story – 6/10 - Overall not too much of a breathtaking story. The story could get quite repetitive, not something that is too crazy. An average story. \r\n\r\nMechanics – 9/10 - Really solid mechanics. Things like unlocking new tech, parkour, and the seamless hacking throughout the game is very smooth and enjoyable, lovely mechanics. \r\n\r\nCharacters – 6/10 - Had a few great characters like Wrench and Aiden. \r\n\r\nControls – 7/10 - Great smooth controls. No complaints. \r\n\r\nGraphics – 8/10 - Beautiful graphics that really capture London. Was

 15%|█▌        | 16/105 [00:00<00:01, 49.87it/s]

{'appid': '243470', 'timestamp_query': 1727608376, 'title': 'Watch_Dogs', 'recommendationid': '175716824', 'author.steamid': '76561199388285711', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 2386, 'author.last_played': 1725148835, 'language': 'english', 'review': 'Watch dogs is definitely in my top 5 best games I have ever played, I really liked the overall story, I also liked the many "side quests" you can do in the game (my favourites are the fixer contracts and the one where you intercept criminals), the gameplay is pretty solid, the movement is smooth and the graphics are stunning even for today\'s standards, though I have to say the lack of modding capability brings the game down a bit.\n\nI recommend waiting for a promotion when the full game including DLC\'s are around £5, and for some reason multiplayer doesn\'t work when you buy the game through steam', 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1727225641, 'timestam

 25%|██▍       | 26/105 [00:00<00:01, 41.18it/s]

{'appid': '1680880', 'timestamp_query': 1727608376, 'title': 'Forspoken', 'recommendationid': '175846278', 'author.steamid': '76561199203223557', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 15, 'author.playtime_at_review': 115, 'author.last_played': 1726956105, 'language': 'english', 'review': "if forspoken has a million fans, i'm one of them\nif forspoken has one fan, than i am that one.\nif forspoken has no fans, that means i'm dead.", 'voted_up': True, 'votes_up': 3, 'timestamp_created': 1727399058, 'timestamp_updated': 1727399058}
True
{'appid': '1680880', 'question': 'Does Forspoken contain any graphic violence or disturbing imagery?'}
{'appid': '1680880', 'timestamp_query': 1727608376, 'title': 'Forspoken', 'recommendationid': '175057713', 'author.steamid': '76561198017438051', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 228, 'author.last_played': 1719797750, 'language': 'english', 'review': "Horrible gameplay 

 34%|███▍      | 36/105 [00:00<00:01, 44.88it/s]

{'appid': '2369390', 'timestamp_query': 1727608376, 'title': 'Far Cry 6', 'recommendationid': '175768817', 'author.steamid': '76561197970790375', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 206, 'author.playtime_at_review': 8058, 'author.last_played': 1727295531, 'language': 'english', 'review': "Gorgeous graphics cannot improve a game with a poor plot, repetitive gameplay and characters I just don't care that much about. Did I ever tell you the definition of insanity? Ubisoft churning out the same game 4 times after Farcry 2. Bethesda and Obsidian have wiped the floor with Ubisoft with the immersive Fallout series.", 'voted_up': False, 'votes_up': 3, 'timestamp_created': 1727294210, 'timestamp_updated': 1727294210}
True
{'appid': '2369390', 'question': 'Is there any indication that the game promotes discriminatory themes or ideologies?'}
{'appid': '2369390', 'timestamp_query': 1727608376, 'title': 'Far Cry 6', 'recommendationid': '175768817', 'author.steamid': '7656

 44%|████▍     | 46/105 [00:01<00:01, 46.05it/s]

{'appid': '1086940', 'timestamp_query': 1727608376, 'title': "Baldur's Gate 3", 'recommendationid': '175156733', 'author.steamid': '76561198963772352', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 4075, 'author.playtime_at_review': 24010, 'author.last_played': 1727605416, 'language': 'english', 'review': "Baldurs Gate 3 is by far one of the best games I've ever played in my life so far...I could stop there but I'll elaborate. This game just warms and hugs my D&D nerd heart...for this a game made for fans of D&D and every fantasy lover out there. The game lore is rich with so much details and bit and bobs to discover, the story is epic and will stay with me for the rest of life. Some of the best turn-based combat I've ever seen in a video game you have so many options of what you can do and you don't have to simply attack you can throw shit, jump, shove, light your sword on fire, throw a barrel of explosives, heal your ally by throwing a potion at them (it just works l

 53%|█████▎    | 56/105 [00:01<00:01, 44.22it/s]

{'appid': '1832040', 'timestamp_query': 1727608376, 'title': 'Flintlock: The Siege of Dawn', 'recommendationid': '174837953', 'author.steamid': '76561197995199618', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 530, 'author.last_played': 1726197057, 'language': 'english', 'review': "Really fun game, no complaints. Plays like the newer God of War games mixed with a pinch of Souls. Not nearly as hard as Souls games with a much more comprehensible story. Really enjoyed this, doesn't overstay its welcome either.", 'voted_up': True, 'votes_up': 8, 'timestamp_created': 1726197658, 'timestamp_updated': 1726197658}
True
{'appid': '1832040', 'question': 'Are there any elements in this game that promote gambling or mimic gambling mechanics?'}
{'appid': '1832040', 'timestamp_query': 1727608376, 'title': 'Flintlock: The Siege of Dawn', 'recommendationid': '174837953', 'author.steamid': '76561197995199618', 'author.playtimeforever': 0, 'author.playti

 58%|█████▊    | 61/105 [00:01<00:01, 35.61it/s]

{'appid': '552520', 'timestamp_query': 1727608376, 'title': 'Far Cry 5', 'recommendationid': '175992224', 'author.steamid': '76561198115721523', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 225, 'author.playtime_at_review': 225, 'author.last_played': 1727575524, 'language': 'english', 'review': "Far Cry 4 was a great game. Most of the characters were pretty interesting, there were some puzzles to do, some interesting challenges, you could walk from one point of the mission to another without feeling the need to get a vehicle because things were close to each other. Albeit not the best open world, it was better designed than many others.\r\n\r\nFar Cry 5, on the other hand, is an open world FPS with a very big and a very empty map. All missions seem to be over 1 km away from each other, and without a vehicle or fast travel, getting from point A to point B is daunting. Not only is the game unnecessarily big, it's also empty. At best you'll get to see a barricade and may

 68%|██████▊   | 71/105 [00:01<00:00, 39.93it/s]

{'appid': '1496790', 'timestamp_query': 1727608376, 'title': 'Gotham Knights', 'recommendationid': '175211922', 'author.steamid': '76561198052281702', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 168, 'author.playtime_at_review': 313, 'author.last_played': 1726622750, 'language': 'english', 'review': "I really wanted to like this game but it's just terrible. I might give it another shot, but the story isn't great, the combat sucks, the movement isn't nice and fluid like in every other game. It's like a bad Spiderman knock off.\r\n\r\nIt's online required, excessive loading, yeah, I'm going to go play something else. Maybe it's fun with a friend, I dunno but I doubt it tbh, the combat is pretty stale.", 'voted_up': False, 'votes_up': 1, 'timestamp_created': 1726622898, 'timestamp_updated': 1726622898}
True
{'appid': '1496790', 'question': 'Are there any mechanics related to gambling or in-game purchases that could be concerning?'}
{'appid': '1496790', 'timestamp_query'

 79%|███████▉  | 83/105 [00:01<00:00, 45.08it/s]

{'appid': '582160', 'timestamp_query': 1727608376, 'title': "Assassin's Creed Origin", 'recommendationid': '174624611', 'author.steamid': '76561198186248867', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 4182, 'author.last_played': 1722465056, 'language': 'english', 'review': "Assassin's Creed Origins is a great game with a great story who’s held back from going from “a great game” to “a timeless classic” by a weak narrative last arc and a repetitive gameplay once you’ve attained the higher levels. \nIt's still a really good game with beatiful environments and a really good cast of characters that are sublimed by a voice acting as close as perfection as it can get.\n", 'voted_up': True, 'votes_up': 2, 'timestamp_created': 1725916353, 'timestamp_updated': 1725916353}
True
{'appid': '582160', 'question': 'What is the game length, and does it have any features that might encourage addiction or excessive play?'}
{'appid': '582160', 'timesta

 89%|████████▊ | 93/105 [00:02<00:00, 45.26it/s]

{'appid': '2208920', 'timestamp_query': 1727608376, 'title': "Assassin's Creed Valhalla", 'recommendationid': '174726349', 'author.steamid': '76561198157744810', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 8221, 'author.last_played': 1726053949, 'language': 'english', 'review': "I bought the Complete Edition of AC Valhalla for $40 as many people on the internet recommended it. One thing I took note of was the constant mixed reviews regarding the game play and the storyline that was given to the players which was repetitive and boring as compared to the earlier versions of Assassins Creed (E.g Black flag). So I decided to just play it with an unbiased view of the game as I have with other Assassins Creed games.\n\nHere's what I love about this game - \n\nThe Long History of the game: Although they were straying far (hack and slash, assassinations, and the likes) from the original feels from the older versions like AC Unity or Black flag

 93%|█████████▎| 98/105 [00:02<00:00, 45.59it/s]

{'appid': '812140', 'timestamp_query': 1727608376, 'title': "Assassin's Creed Odyssey", 'recommendationid': '175493044', 'author.steamid': '76561198287547255', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 10, 'author.playtime_at_review': 2377, 'author.last_played': 1726996240, 'language': 'english', 'review': 'Very nice game, and though it is too big, i still remember the shock when the whole map is opened for the first time', 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1726958393, 'timestamp_updated': 1726958393}
True
{'appid': '812140', 'question': "What are the potential addictive elements in Assassin's Creed Odyssey that could encourage excessive gameplay?"}
{'appid': '812140', 'timestamp_query': 1727608376, 'title': "Assassin's Creed Odyssey", 'recommendationid': '174948606', 'author.steamid': '76561198035391503', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 4337, 'author.last_played': 1674829362, 'langu

100%|██████████| 105/105 [00:02<00:00, 43.73it/s]

{'appid': '794540', 'timestamp_query': 1727608376, 'title': 'Neo Cab', 'recommendationid': '173843207', 'author.steamid': '76561198126586236', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 878, 'author.last_played': 1725053394, 'language': 'english', 'review': 'A fantastic visual novel with some of the most relatable and witty dialogue I\'ve seen in any game. A mix of political commentary on the advances of technology, how that affects our humanity and the struggle to maintain a connection with people in an ever-evolving social scene. The fact that you see the world from your car\'s seat through the eyes of your passengers, each with their own bias, wants and needs gives you a very nuanced and believable description of the world. In other words, worldbuilding at its finest, which fits perfectly with the cyberpunk theme! I can\'t help but fell some Taxi Driver vibes, but in a less violent, more wholesome way.\n\nAlmost each passenger/cust




{'hit_rate': 1.0, 'mrr': 1.0}

In [160]:
def text_hybrid(q):
    question = q['question']
    appid = q['appid']

    v_q = model.encode(question)

    return elastic_search_hybrid('title_vector', question, v_q, appid)

In [161]:
evaluate(ground_truth, text_hybrid)

  0%|          | 0/105 [00:00<?, ?it/s]

{'appid': '2239550', 'question': 'What are the content themes present in Watch Dogs: Legion that I should be aware of?'}
{'appid': '2239550', 'timestamp_query': 1727608376, 'title': 'Watch Dogs: Legion', 'recommendationid': '175051728', 'author.steamid': '76561198108355517', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 775, 'author.last_played': 1726081501, 'language': 'english', 'review': "As a big WatchDogs fan I must say, Legion is quite disappointing.It’s a decent game on its own, but as part of the franchise it’s certainly the weakest. The story took the wrong turn somewhere, dropping the lore and themes of the first two games completely, plus lots of gameplay features which made the first two games stand out were dropped completely.It just isn't WatchDogs anymore.", 'voted_up': False, 'votes_up': 8, 'timestamp_created': 1726432099, 'timestamp_updated': 1726432099}
True
{'appid': '2239550', 'question': 'Does Watch Dogs: Legion cont

  5%|▍         | 5/105 [00:00<00:02, 42.67it/s]

{'appid': '2239550', 'timestamp_query': 1727608376, 'title': 'Watch Dogs: Legion', 'recommendationid': '175051728', 'author.steamid': '76561198108355517', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 775, 'author.last_played': 1726081501, 'language': 'english', 'review': "As a big WatchDogs fan I must say, Legion is quite disappointing.It’s a decent game on its own, but as part of the franchise it’s certainly the weakest. The story took the wrong turn somewhere, dropping the lore and themes of the first two games completely, plus lots of gameplay features which made the first two games stand out were dropped completely.It just isn't WatchDogs anymore.", 'voted_up': False, 'votes_up': 8, 'timestamp_created': 1726432099, 'timestamp_updated': 1726432099}
True
{'appid': '2239550', 'question': 'Is there any disturbing imagery or horror elements in Watch Dogs: Legion?'}
{'appid': '2239550', 'timestamp_query': 1727608376, 'title': 'Watch Dogs:

 10%|▉         | 10/105 [00:00<00:02, 46.20it/s]

{'appid': '243470', 'timestamp_query': 1727608376, 'title': 'Watch_Dogs', 'recommendationid': '175789409', 'author.steamid': '76561199039812852', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 1983, 'author.playtime_at_review': 1460, 'author.last_played': 1727579851, 'language': 'english', 'review': 'The only way to enjoy a ubislop game nowadays is to throw on one nearly a decade or more old. Would recommend with mods.', 'voted_up': True, 'votes_up': 1, 'timestamp_created': 1727325744, 'timestamp_updated': 1727325744}
True
{'appid': '2443720', 'question': 'Is Concord appropriate for children regarding violence or gore?'}
{'appid': '2443720', 'timestamp_query': 1727608376, 'title': 'Concord', 'recommendationid': '174050963', 'author.steamid': '76561197989869264', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 1044, 'author.last_played': 1725218177, 'language': 'english', 'review': "As a Destiny 2 Crucible player, there are 

 15%|█▌        | 16/105 [00:00<00:01, 48.86it/s]

{'appid': '2443720', 'timestamp_query': 1727608376, 'title': 'Concord', 'recommendationid': '174050963', 'author.steamid': '76561197989869264', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 1044, 'author.last_played': 1725218177, 'language': 'english', 'review': "As a Destiny 2 Crucible player, there are many things I like about Concord, yet I cannot in good consciousness leave a positive review due to some fundamental issues with the launch version of the game.\r\n\r\nFirst, the obvious problem is a lack of players. This means when you do find a game, you will most likely find the same players. There can be no doubt that Concord has really clicked for a dozen of players, which you will find out, when your entire team of level 2 accounts blindly rushes into the open, to get absolutely destroyed in seconds. I suppose the only upside of the long queue times, is that by the time people actually get into a game, the Steam refund window will 

 21%|██        | 22/105 [00:00<00:01, 50.06it/s]

{'appid': '1680880', 'timestamp_query': 1727608376, 'title': 'Forspoken', 'recommendationid': '174101702', 'author.steamid': '76561198134475716', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 734, 'author.last_played': 1725323334, 'language': 'english', 'review': 'i waited to buy this game because i saw a lot of bad reviews about it. picked it up on sale and honestly it is a pretty good game. combat fun and story wasnt the greatest but i thought it was pretty good. boss fights are really fun. all and all a pretty good game', 'voted_up': True, 'votes_up': 18, 'timestamp_created': 1725323496, 'timestamp_updated': 1725323496}
True
{'appid': '1680880', 'question': 'Does Forspoken contain any graphic violence or disturbing imagery?'}
{'appid': '1680880', 'timestamp_query': 1727608376, 'title': 'Forspoken', 'recommendationid': '174101702', 'author.steamid': '76561198134475716', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 

 27%|██▋       | 28/105 [00:00<00:01, 49.96it/s]

{'appid': '2369390', 'timestamp_query': 1727608376, 'title': 'Far Cry 6', 'recommendationid': '173935992', 'author.steamid': '76561198081019780', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 1820, 'author.last_played': 1725408351, 'language': 'english', 'review': 'kinda boring and repeditive they give you everything you need in the first  20 min of the game no real reason to  upgrade  and seems like they  just  tryed to  put in abunch of filler   over all its playable but  its  very  meh. wouldnt pay  full price for it', 'voted_up': False, 'votes_up': 2, 'timestamp_created': 1725144416, 'timestamp_updated': 1725144416}
True
{'appid': '2369390', 'question': 'Are there any elements in Far Cry 6 that could be deemed inappropriate for children?'}
{'appid': '2369390', 'timestamp_query': 1727608376, 'title': 'Far Cry 6', 'recommendationid': '173935992', 'author.steamid': '76561198081019780', 'author.playtimeforever': 0, 'author.playtime_last_

 31%|███▏      | 33/105 [00:00<00:01, 49.23it/s]

{'appid': '315210', 'timestamp_query': 1727608376, 'title': 'Suicide Squad: Kill the Justice League', 'recommendationid': '174539768', 'author.steamid': '76561198034435234', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 1754, 'author.last_played': 1725812427, 'language': 'english', 'review': "Save yourself time, trouble and money and find a cut scene movie of this game on YouTube.\n\nThough I really like the story the game play is not fun, repetitive quests to level up makes the game become very grindy very quickly.\n\nI probably brought this game 2+ months ago and though I've completed it in 29 ish hours, I kept getting so bored I just didn't want to play it after I've stopped for the day.\n\nAnd the Superman fight is just BS.", 'voted_up': False, 'votes_up': 19, 'timestamp_created': 1725812803, 'timestamp_updated': 1725812803}
True
{'appid': '315210', 'question': 'Does this game feature any graphic depictions of blood or gore that migh

 36%|███▌      | 38/105 [00:00<00:01, 47.63it/s]

{'appid': '1086940', 'timestamp_query': 1727608376, 'title': "Baldur's Gate 3", 'recommendationid': '174018405', 'author.steamid': '76561198800043197', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 601, 'author.playtime_at_review': 18391, 'author.last_played': 1727505590, 'language': 'english', 'review': "Best Game I have ever played. \n\nI knew about D&D before playing this but I didn't enjoy it because of my lack of imagination, but this game did a tremendous job at getting me to like the world of D&D more even with my lack of imagination I now see why so many people love D&D and I'm even considering to learn more about it outside of this game. \n\nIf you enjoy story driven games you must give this game a chance, that's the only reason I did and I don't regret it. \n\nLooking forward the update for the mods to go for another campaign.", 'voted_up': True, 'votes_up': 66, 'timestamp_created': 1725223295, 'timestamp_updated': 1725223295}
True
{'appid': '1086940', 'quest

 41%|████      | 43/105 [00:00<00:01, 45.59it/s]

{'appid': '1086940', 'timestamp_query': 1727608376, 'title': "Baldur's Gate 3", 'recommendationid': '174018405', 'author.steamid': '76561198800043197', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 601, 'author.playtime_at_review': 18391, 'author.last_played': 1727505590, 'language': 'english', 'review': "Best Game I have ever played. \n\nI knew about D&D before playing this but I didn't enjoy it because of my lack of imagination, but this game did a tremendous job at getting me to like the world of D&D more even with my lack of imagination I now see why so many people love D&D and I'm even considering to learn more about it outside of this game. \n\nIf you enjoy story driven games you must give this game a chance, that's the only reason I did and I don't regret it. \n\nLooking forward the update for the mods to go for another campaign.", 'voted_up': True, 'votes_up': 66, 'timestamp_created': 1725223295, 'timestamp_updated': 1725223295}
True
{'appid': '2322010', 'quest

 46%|████▌     | 48/105 [00:01<00:01, 44.17it/s]

{'appid': '2322010', 'timestamp_query': 1727608376, 'title': 'God of War: Ragnarok', 'recommendationid': '175347860', 'author.steamid': '76561198354466742', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 827, 'author.playtime_at_review': 250, 'author.last_played': 1727081833, 'language': 'english', 'review': "to God of War: Ragnarok Dev. Team,\n\nI’m writing to bring to your attention an issue regarding the current PC port of God of War: Ragnarok that affects users with 4GB VRAM graphics cards. When attempting to launch the game on such systems, an error message is displayed, indicating that there is not enough VRAM, and the game closes without allowing users to proceed. It is my humble request to kindly reconsider or remove this VRAM limitation. From personal experience and community feedback, I can confirm that the game runs well on certain integrated graphics processors (such as the Ryzen 3 3200G, Ryzen 5 3400G, and Ryzen 7 5700G), which are generally weaker than man

 50%|█████     | 53/105 [00:01<00:01, 45.30it/s]

{'appid': '1832040', 'timestamp_query': 1727608376, 'title': 'Flintlock: The Siege of Dawn', 'recommendationid': '174536481', 'author.steamid': '76561198355764501', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 1414, 'author.last_played': 1725810005, 'language': 'english', 'review': "Has it's shortcomings, but overall fun/satisfying combat with great voice acting. Did have a few game crashes, but loading back in was fast. Game did feel short. Could benefit from NG+. Overall would recommend. Good for a AA game.", 'voted_up': True, 'votes_up': 2, 'timestamp_created': 1725810181, 'timestamp_updated': 1725810181}
True
{'appid': '1832040', 'question': 'Are there any elements in this game that promote gambling or mimic gambling mechanics?'}
{'appid': '1832040', 'timestamp_query': 1727608376, 'title': 'Flintlock: The Siege of Dawn', 'recommendationid': '174536481', 'author.steamid': '76561198355764501', 'author.playtimeforever': 0, 'author.play

 55%|█████▌    | 58/105 [00:01<00:01, 46.21it/s]

{'appid': '304390', 'timestamp_query': 1727608376, 'title': 'FOR HONOR', 'recommendationid': '175748495', 'author.steamid': '76561198148864047', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 65685, 'author.last_played': 1719249076, 'language': 'english', 'review': "Listen. Imma keep it real with all of you who is reading this. The concept of the game is not bad and the game-play itself is one of a kind, hell it has the potential to be the best of it's kind (Swordplay) but the overall mechanics and balance of the game is trash. First thing first, The majority of the people who play this game are Viking lovers, so that faction gets the most upgrades and buffs of all kinds, while everyone else gets Nerf. Now I play the samurai faction, Orochi (a samurai hero) who literally gets Nerf after every patch, for really no reason at all while OP characters continuously gets buffs. It's crazy. Here's an example of the unfair balance, Berserker, a vi

 60%|██████    | 63/105 [00:01<00:00, 46.69it/s]

{'appid': '552520', 'timestamp_query': 1727608376, 'title': 'Far Cry 5', 'recommendationid': '175147123', 'author.steamid': '76561198214420220', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 65, 'author.playtime_at_review': 1990, 'author.last_played': 1726553446, 'language': 'english', 'review': "TL-DR:  If you like GTA5 and don't mind some abused religious references, you will enjoy this game.\r\n\r\nGame is reasonably stable, but if you jump to different things, the aging game system starts to show its weaknesses.\r\n\r\nRequires a Ubisoft account to access the game properly. I found this to be disappointing. I don't want/need yet another storefront on my computer to try to toss advertisements my way.\r\n\r\nThe pacing is a bit forced, but there are lots of interesting characters to meet and deal with in this game. There are a lot of religious references here. It is implemented as a way to guide the personas of the BBG and his various henchmen.\r\n\r\nThe guns are in

 66%|██████▌   | 69/105 [00:01<00:00, 48.01it/s]

{'appid': '1496790', 'timestamp_query': 1727608376, 'title': 'Gotham Knights', 'recommendationid': '174266225', 'author.steamid': '76561198072033688', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 1180, 'author.last_played': 1725525394, 'language': 'english', 'review': '[h1]Superheroes in training[/h1]\n\nAn experience that might be enjoyable with a coop partner, but not worth it if you are going solo.\n\nRepetitiveness is the keyword. There are many activities or "patrols" as they call them, to gain levels and grind materials for equipment crafting. They all turn out pretty much samey, in that it all ends in an all out brawl, pummeling enemies that feel like punchbags, soaking a lot of damage. Yet these patrols are short and way too simplistic in design. More handcrafted experiences would\'ve been nice. It felt like a copy-paste each night.\n\nWithout knowing the actual intent of the designers, it felt like one of those MMORPGs that has

 70%|███████   | 74/105 [00:01<00:00, 48.08it/s]

{'appid': '2698940', 'timestamp_query': 1727608376, 'title': 'The Crew Motorfest', 'recommendationid': '174702196', 'author.steamid': '76561199148282477', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 1337, 'author.playtime_at_review': 1356, 'author.last_played': 1727589658, 'language': 'english', 'review': '(Yap fest incoming) (428 hours on Xbox) When I heard this game was coming out, the only reason I was hyped about it was because it was based on one of my favorite Hawaiian Islands, but I could tell they took one too many pages out of horizons book, on top of the $70 price tag and the removal of The Crew 1, I was pretty disinterested pretty quickly.\n\nA few weeks passed by and I was getting pretty tired of Forza Horizon 5, I started seeing videos about how lackluster the game really was and how flavorless the map is, not to mention the reused content and constant flow of stupid car passes that seem cheap, but add up over time. I love Arcade Racers and having to dro

 75%|███████▌  | 79/105 [00:01<00:00, 47.56it/s]

{'appid': '582160', 'timestamp_query': 1727608376, 'title': "Assassin's Creed Origin", 'recommendationid': '174100836', 'author.steamid': '76561198261392141', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 85, 'author.playtime_at_review': 3873, 'author.last_played': 1726873662, 'language': 'english', 'review': "Really good open world assassin's creed game. After this is where they went down hill for me. One of the main cons for me in this game are the damn prices for DLC. $40?! thats a whole nother game. What stopped me from playing more and getting all the achievements. ill just wait for a sale to eventually happen", 'voted_up': True, 'votes_up': 3, 'timestamp_created': 1725322207, 'timestamp_updated': 1725322207}
True
{'appid': '1817070', 'question': 'Is Marvel’s Spider-Man Remastered appropriate for children, considering it may include any harmful content?'}
{'appid': '1817070', 'timestamp_query': 1727608376, 'title': 'Marvel’s Spider-Man Remastered', 'recommendation

 80%|████████  | 84/105 [00:01<00:00, 47.08it/s]

{'appid': '1817070', 'timestamp_query': 1727608376, 'title': 'Marvel’s Spider-Man Remastered', 'recommendationid': '175839114', 'author.steamid': '76561197960799839', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 652, 'author.playtime_at_review': 2419, 'author.last_played': 1727387986, 'language': 'english', 'review': 'Really takes you back to where just swinging around the town as spider man is fun.', 'voted_up': True, 'votes_up': 2, 'timestamp_created': 1727388022, 'timestamp_updated': 1727388022}
True
{'appid': '2208920', 'question': "What kind of violent content is present in Assassin's Creed Valhalla?"}
{'appid': '2208920', 'timestamp_query': 1727608376, 'title': "Assassin's Creed Valhalla", 'recommendationid': '174992328', 'author.steamid': '76561198087537828', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 251, 'author.playtime_at_review': 5061, 'author.last_played': 1726815822, 'language': 'english', 'review': 'As always another banger from ubis

 85%|████████▍ | 89/105 [00:01<00:00, 45.65it/s]

{'appid': '2208920', 'timestamp_query': 1727608376, 'title': "Assassin's Creed Valhalla", 'recommendationid': '174992328', 'author.steamid': '76561198087537828', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 251, 'author.playtime_at_review': 5061, 'author.last_played': 1726815822, 'language': 'english', 'review': 'As always another banger from ubisoft. They really know how to make awesome games with plenty of content that is well worth the money. the only cons is its just a little glitchy and it doesnt tell you whether the wealth or mystery requires you to be further in the story. If you have bad internet you may have a hard time because it will crash every so often possibly even constantly.', 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1726376676, 'timestamp_updated': 1726376676}
True
{'appid': '721180', 'question': 'What is the overall content rating of Dustborn, and does it contain any excessive violence or gore that could be harmful to children?'}
{'appid

 90%|████████▉ | 94/105 [00:02<00:00, 45.28it/s]

{'appid': '812140', 'timestamp_query': 1727608376, 'title': "Assassin's Creed Odyssey", 'recommendationid': '174820205', 'author.steamid': '76561197971019080', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 2073, 'author.playtime_at_review': 9550, 'author.last_played': 1727111157, 'language': 'english', 'review': 'Completed this on ps4 and just had to play it again on the deck.', 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1726171711, 'timestamp_updated': 1726171711}
True
{'appid': '812140', 'question': "What are the potential addictive elements in Assassin's Creed Odyssey that could encourage excessive gameplay?"}
{'appid': '812140', 'timestamp_query': 1727608376, 'title': "Assassin's Creed Odyssey", 'recommendationid': '174820205', 'author.steamid': '76561197971019080', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 2073, 'author.playtime_at_review': 9550, 'author.last_played': 1727111157, 'language': 'english', 'review': 'Completed this on p

 94%|█████████▍| 99/105 [00:02<00:00, 41.89it/s]

{'appid': '1545560', 'timestamp_query': 1727608376, 'title': 'Shadow Gambit: The Cursed Crew', 'recommendationid': '175933124', 'author.steamid': '76561198071794257', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 957, 'author.playtime_at_review': 278, 'author.last_played': 1727602746, 'language': 'english', 'review': "I enjoy most stealth games. This is yet another one. Story wise, not my first choice, I preferred Desperados more. But it's similar gameplay and always looking for new challenges.\r\nSomehow the graphics are nicer than other games, but at the same time I find it more distracting.", 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1727512147, 'timestamp_updated': 1727512147}
True
{'appid': '1545560', 'question': 'Is there any addictive gameplay mechanics in Shadow Gambit: The Cursed Crew, such as microtransactions or loot boxes that could encourage excessive play?'}
{'appid': '1545560', 'timestamp_query': 1727608376, 'title': 'Shadow Gambit: The Curse

 99%|█████████▉| 104/105 [00:02<00:00, 35.41it/s]

{'appid': '794540', 'timestamp_query': 1727608376, 'title': 'Neo Cab', 'recommendationid': '175310048', 'author.steamid': '76561199200313777', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 204, 'author.playtime_at_review': 267, 'author.last_played': 1726757626, 'language': 'english', 'review': "I wasn't expecting much when I bought it, I thought I'd like the games vibes and the world, but the story was surpisingly very gripping :}\r\n\r\nindie games sure keep surprising me, and you can tell there was much love poured into this. def reccomend.", 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1726757920, 'timestamp_updated': 1726757920}
True
{'appid': '794540', 'question': 'Does the game include any explicit or suggestive sexual themes or imagery that I should be aware of?'}
{'appid': '794540', 'timestamp_query': 1727608376, 'title': 'Neo Cab', 'recommendationid': '175310048', 'author.steamid': '76561199200313777', 'author.playtimeforever': 0, 'author.playtime_las

100%|██████████| 105/105 [00:02<00:00, 44.23it/s]

{'appid': '794540', 'timestamp_query': 1727608376, 'title': 'Neo Cab', 'recommendationid': '175310048', 'author.steamid': '76561199200313777', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 204, 'author.playtime_at_review': 267, 'author.last_played': 1726757626, 'language': 'english', 'review': "I wasn't expecting much when I bought it, I thought I'd like the games vibes and the world, but the story was surpisingly very gripping :}\r\n\r\nindie games sure keep surprising me, and you can tell there was much love poured into this. def reccomend.", 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1726757920, 'timestamp_updated': 1726757920}
True





{'hit_rate': 1.0, 'mrr': 1.0}

In [162]:
def question_text_hybrid(q):
    question = q['question']
    appid = q['appid']

    v_q = model.encode(question)

    return elastic_search_hybrid('title_review_vector', question, v_q, appid)

In [163]:
evaluate(ground_truth, question_text_hybrid)

  0%|          | 0/105 [00:00<?, ?it/s]

{'appid': '2239550', 'question': 'What are the content themes present in Watch Dogs: Legion that I should be aware of?'}
{'appid': '2239550', 'timestamp_query': 1727608376, 'title': 'Watch Dogs: Legion', 'recommendationid': '175993111', 'author.steamid': '76561199316658748', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 352, 'author.playtime_at_review': 359, 'author.last_played': 1726973926, 'language': 'english', 'review': '', 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1727577823, 'timestamp_updated': 1727577823}
True
{'appid': '2239550', 'question': 'Does Watch Dogs: Legion contain any excessive violence or graphic depictions?'}


  5%|▍         | 5/105 [00:00<00:02, 46.07it/s]

{'appid': '2239550', 'timestamp_query': 1727608376, 'title': 'Watch Dogs: Legion', 'recommendationid': '175534814', 'author.steamid': '76561199447171311', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 688, 'author.playtime_at_review': 2784, 'author.last_played': 1727008072, 'language': 'english', 'review': ' b', 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1727007114, 'timestamp_updated': 1727007114}
True
{'appid': '2239550', 'question': 'Are there any instances of strong language or profanity in Watch Dogs: Legion?'}
{'appid': '2239550', 'timestamp_query': 1727608376, 'title': 'Watch Dogs: Legion', 'recommendationid': '174705048', 'author.steamid': '76561198403118121', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 49, 'author.last_played': 1726020439, 'language': 'english', 'review': 'It is literally everything Watch dogs shouldnt be', 'voted_up': False, 'votes_up': 1, 'timestamp_created': 1726020478, 'timestam

 10%|▉         | 10/105 [00:00<00:02, 44.95it/s]

{'appid': '243470', 'timestamp_query': 1727608376, 'title': 'Watch_Dogs', 'recommendationid': '174903499', 'author.steamid': '76561198362774266', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 121, 'author.playtime_at_review': 552, 'author.last_played': 1726774017, 'language': 'english', 'review': 'lOW RESPON', 'voted_up': False, 'votes_up': 0, 'timestamp_created': 1726278317, 'timestamp_updated': 1726278317}
True
{'appid': '2443720', 'question': 'Is Concord appropriate for children regarding violence or gore?'}
{'appid': '2443720', 'timestamp_query': 1727608376, 'title': 'Concord', 'recommendationid': '174169422', 'author.steamid': '76561197989938411', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 1447, 'author.last_played': 1725647742, 'language': 'english', 'review': "Concord didn't get a proper chance. Bad marketing and art direction are defintely points as to why it didn't take off as intended. But the gameplay is so

 14%|█▍        | 15/105 [00:00<00:01, 47.02it/s]

{'appid': '2443720', 'timestamp_query': 1727608376, 'title': 'Concord', 'recommendationid': '174344517', 'author.steamid': '76561197960335317', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 143, 'author.last_played': 1725656308, 'language': 'english', 'review': "Good game. Fun mechanics, beautiful graphics, great music and sound. I love the Disney-like cutscenes and art style, it immediately feels warm and familiar. No bugs to be seen, very surprising for a new game. The theme is Guardians of the Galaxy meets Overwatch, but the gameplay is fresh and interesting. I love the timed racing mode, it reminds me of Mirror's Edge parkour. More games should have skill trials like this. \n\nI'm sad to hear about Concord's early end. I guess people don't spend much money up front on multiplayer games since so many are F2P now. I don't blame anyone for saving money, but I guess we're stuck with loot boxes and virtual currencies forever. I hope Conco

 19%|█▉        | 20/105 [00:00<00:01, 45.07it/s]

{'appid': '447040', 'timestamp_query': 1727608376, 'title': 'Watch_Dogs 2', 'recommendationid': '175685647', 'author.steamid': '76561199154817640', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 1693, 'author.last_played': 1721214240, 'language': 'english', 'review': 'very fun game. the story isnt that good, but the gameplay and graphics make up for it. the side content is pretty fun too', 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1727190545, 'timestamp_updated': 1727190545}
True
{'appid': '1680880', 'question': 'Is Forspoken suitable for my children given the terrible dialogue mentioned in reviews?'}


 24%|██▍       | 25/105 [00:00<00:01, 43.63it/s]

{'appid': '1680880', 'timestamp_query': 1727608376, 'title': 'Forspoken', 'recommendationid': '175934375', 'author.steamid': '76561198111147540', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 2679, 'author.playtime_at_review': 2679, 'author.last_played': 1727406287, 'language': 'english', 'review': "Freaking fun af game to be playing on my RoG ALLY. Beat the game in 3 days speed running everything as my favourite game has always been the Infamous Second Son on my PS4 console.\n\nPlaying Forspoken is just like home. Sifting through the magical parkour with whatever skills I have is childsplay. \n\nThose who down votes the game are just asswacked boring classical players who never played good titles before. Storyline were good as well just as Horizon Zero Dawn.\n\nMediocre graphics unlike Horizon Zero, who the eff gives a damn about going all shiate on graphics like the PS5 pro. Its stoopid. I'd rather a good content then lame arse bling bling.", 'voted_up': True, 'votes

 33%|███▎      | 35/105 [00:00<00:01, 43.42it/s]

{'appid': '2369390', 'timestamp_query': 1727608376, 'title': 'Far Cry 6', 'recommendationid': '174413216', 'author.steamid': '76561197998630247', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 337, 'author.playtime_at_review': 1468, 'author.last_played': 1727595510, 'language': 'english', 'review': "If you enjoyed Far Cry 5 you will enjoy Far Cry 6.  It's an iteration of the formula found in FC5.  Open world, bases to conquer, smaller side quests and larger main story quests.  Many hours of entertainment.\n\nBe sure to install the Libertad mod to disable chromatic aberration and the narrow FOV in bases.  Not sure why Ubisoft decided for us that we cannot disable those in the settings menu.\n\nGodspeed, Guerrilla", 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1725687624, 'timestamp_updated': 1725687624}
True
{'appid': '315210', 'question': 'Is the game Suicide Squad: Kill the Justice League appropriate for children, or does it contain violent content?'}
{'appid'

 43%|████▎     | 45/105 [00:01<00:01, 44.39it/s]

{'appid': '1086940', 'timestamp_query': 1727608376, 'title': "Baldur's Gate 3", 'recommendationid': '174437210', 'author.steamid': '76561199069379712', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 861, 'author.playtime_at_review': 11796, 'author.last_played': 1727477751, 'language': 'english', 'review': 'I love literally everything I have no complaints. The devs always listen to our needs, adding mod downloading directly from the game, more evil endings, more romantic scenes. Honestly to spend up to 60 euros for this game and have EVERYTHING, no paywall whatsoever, is a luxury.', 'voted_up': True, 'votes_up': 41, 'timestamp_created': 1725713267, 'timestamp_updated': 1725713267}
True
{'appid': '1086940', 'question': "Does Baldur's Gate 3 include any elements related to gambling or addictive mechanics that I should consider before allowing my children to play?"}
{'appid': '1086940', 'timestamp_query': 1727608376, 'title': "Baldur's Gate 3", 'recommendationid': '17417946

 52%|█████▏    | 55/105 [00:01<00:01, 46.04it/s]

{'appid': '1832040', 'timestamp_query': 1727608376, 'title': 'Flintlock: The Siege of Dawn', 'recommendationid': '175854095', 'author.steamid': '76561199490283861', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 1557, 'author.playtime_at_review': 1577, 'author.last_played': 1727415780, 'language': 'english', 'review': 'having fun on this game the skill tree is so good not bad but soo good to play', 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1727411645, 'timestamp_updated': 1727411645}
True
{'appid': '1832040', 'question': 'Are there any elements in this game that promote gambling or mimic gambling mechanics?'}
{'appid': '1832040', 'timestamp_query': 1727608376, 'title': 'Flintlock: The Siege of Dawn', 'recommendationid': '174837953', 'author.steamid': '76561197995199618', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 530, 'author.last_played': 1726197057, 'language': 'english', 'review': "Really fun game, no co

 62%|██████▏   | 65/105 [00:01<00:00, 45.83it/s]

{'appid': '552520', 'timestamp_query': 1727608376, 'title': 'Far Cry 5', 'recommendationid': '174234036', 'author.steamid': '76561198280170159', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 198, 'author.last_played': 1725570462, 'language': 'english', 'review': 'clearly not as good as previous titles exchanging the main character focus for customization seems to have somewhat crippled (imo) their ability to put you in the seat of a character, its possible the writers just need a bit of work on getting custom characters to feel more "alive"/involved, but its not a big issue in my eyes. im not seeing alot of glaring issues really yet, so far ive had a blast watching how the villans act with their crazy eyes during cutscenes. the enemies are a tiny bit repetitive but as long as i get to slaughter mindlessly im satisfied.\r\n\r\nnot a bad game with the sale price.', 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1725483172, 'timestam

 67%|██████▋   | 70/105 [00:01<00:00, 43.91it/s]

{'appid': '2698940', 'timestamp_query': 1727608376, 'title': 'The Crew Motorfest', 'recommendationid': '175064806', 'author.steamid': '76561199107009002', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 1410, 'author.playtime_at_review': 455, 'author.last_played': 1727564624, 'language': 'english', 'review': "Let me draw an abstract comparison. Watch Dogs 2, a game great for many reasons, one of which is that it made 'nerdy' characters relatable. The conversations the protagonists in that game had were dumb, but they were relatable in a way. \n\nThis game tries to do that and by God it does such a fucking horrible job at it. It's a DRIVING game. And a good one! Genuinely without the horribly written dialogue it would be so much better. \n\nAnyway, aside from that, it's a good game! The driving feels fucking amazing, the environment is great, the animations are great in first person, they feel somewhat natural unlike most games where you move the wheel like, 10°. I can't 

 71%|███████▏  | 75/105 [00:01<00:00, 40.11it/s]

{'appid': '582160', 'timestamp_query': 1727608376, 'title': "Assassin's Creed Origin", 'recommendationid': '174824313', 'author.steamid': '76561199498081110', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 3899, 'author.last_played': 1721407557, 'language': 'english', 'review': 'Literally such a good game, from graphics to story. Simply amazing.', 'voted_up': True, 'votes_up': 1, 'timestamp_created': 1726176927, 'timestamp_updated': 1726176927}
True
{'appid': '1817070', 'question': 'Is Marvel’s Spider-Man Remastered appropriate for children, considering it may include any harmful content?'}


 76%|███████▌  | 80/105 [00:01<00:00, 39.70it/s]

{'appid': '1817070', 'timestamp_query': 1727608376, 'title': 'Marvel’s Spider-Man Remastered', 'recommendationid': '175643497', 'author.steamid': '76561198353471728', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 1519, 'author.playtime_at_review': 723, 'author.last_played': 1727544111, 'language': 'english', 'review': "Every time i booted up the game, my inner child came out and i couldn't help but simply enjoy being in Spidey's shoes again. Last time i played anything Spider-Man related was PS2 games. It felt amazing. Truly a wonderful game", 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1727129663, 'timestamp_updated': 1727129663}
True
{'appid': '1817070', 'question': 'What is the general gameplay experience like in Marvel’s Spider-Man Remastered?'}
{'appid': '1817070', 'timestamp_query': 1727608376, 'title': 'Marvel’s Spider-Man Remastered', 'recommendationid': '175850023', 'author.steamid': '76561199128780971', 'author.playtimeforever': 0, 'author.playtime_

 86%|████████▌ | 90/105 [00:02<00:00, 43.09it/s]

{'appid': '2208920', 'timestamp_query': 1727608376, 'title': "Assassin's Creed Valhalla", 'recommendationid': '174538274', 'author.steamid': '76561199194035883', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 4554, 'author.last_played': 1687609385, 'language': 'english', 'review': "This game is a dissapointment in everyway\n\nAs a long-time fan of the Assassin's Creed franchise, I had high hopes for Assassin’s Creed Valhalla, but after spending countless hours in this game, I can't help but regret buying it. Ubisoft has turned what could have been a great Viking adventure into a drawn-out, bloated, and lifeless experience.\n\nFirst and foremost, the length of the game is absolutely unnecessary. Ubisoft has padded the game with an endless list of fetch quests, useless side missions, and tedious grinding that doesn't feel rewarding. Instead of focusing on meaningful content, Valhalla is stuffed with filler that only serves to artificially i

 95%|█████████▌| 100/105 [00:02<00:00, 42.61it/s]

{'appid': '812140', 'timestamp_query': 1727608376, 'title': "Assassin's Creed Odyssey", 'recommendationid': '175511886', 'author.steamid': '76561199192781600', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 1288, 'author.last_played': 1696623952, 'language': 'english', 'review': 'the abilitys are cool. doesnt feel very assasin like but still very fun and over all easy to enjoy.', 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1726982140, 'timestamp_updated': 1726982140}
True
{'appid': '1545560', 'question': 'Does Shadow Gambit: The Cursed Crew contain excessive violence or graphic depictions of harm?'}
{'appid': '1545560', 'timestamp_query': 1727608376, 'title': 'Shadow Gambit: The Cursed Crew', 'recommendationid': '174788064', 'author.steamid': '76561198040356406', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 37, 'author.playtime_at_review': 347, 'author.last_played': 1726513207, 'language': 'english', 'review': 

100%|██████████| 105/105 [00:02<00:00, 43.47it/s]

{'appid': '794540', 'timestamp_query': 1727608376, 'title': 'Neo Cab', 'recommendationid': '175171850', 'author.steamid': '76561198024806714', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 227, 'author.playtime_at_review': 235, 'author.last_played': 1726577013, 'language': 'english', 'review': "Cyberpunk Uber simulator but as a San Francisco visual novel set in cyber Los Ochos.  It's obviously written by Leigh Alexander and her narrative voice shines through.  The graphics are beautiful especially for something made in Unity.  The atmosphere and music is nice although it can be slow at times.  You're basically exploring the narratives of your customers which are delivered episodically.  The final emotional battle between Savvy and Lina owes a lot to the insult sword fights from Monkey Island.\n\nWorth playing if you've got it in your backlog and worth a buy if it's 85% off.  It'll last you a quick night or two.  It just kind of ends without anything visible occurring o




{'hit_rate': 1.0, 'mrr': 1.0}

In [164]:
def compute_rrf(rank, k=60):
    """ Our own implementation of the relevance score """
    return 1 / (k + rank)

def elastic_search_hybrid_rrf(field, query, vector, appid, k=60):

    index_name = 'steam-reviews'
    
    knn_query = {
        "field": field,
        "query_vector": vector,
        "k": 10,
        "num_candidates": 10000,
        "boost": 0.5,
        "filter": {
            "term": {
                "appid": appid
            }
        }
    }

    keyword_query = {
        "bool": {
            "must": {
                "multi_match": {
                    "query": query,
                    "fields": ["question", "appid"],
                    "type": "best_fields",
                    "boost": 0.5,
                }
            },
            "filter": {
                "term": {
                    "appid": appid
                }
            }
        }
    }

    knn_results = es.search(
        index=index_name, 
        body={
            "knn": knn_query, 
            "size": 10
        }
    )['hits']['hits']
    
    keyword_results = es.search(
        index=index_name, 
        body={
            "query": keyword_query, 
            "size": 10
        }
    )['hits']['hits']
    
    rrf_scores = {}
    # Calculate RRF using vector search results
    for rank, hit in enumerate(knn_results):
        doc_id = hit['_id']
        rrf_scores[doc_id] = compute_rrf(rank + 1, k)

    # Adding keyword search result scores
    for rank, hit in enumerate(keyword_results):
        doc_id = hit['_id']
        if doc_id in rrf_scores:
            rrf_scores[doc_id] += compute_rrf(rank + 1, k)
        else:
            rrf_scores[doc_id] = compute_rrf(rank + 1, k)

    # Sort RRF scores in descending order
    reranked_docs = sorted(rrf_scores.items(), key=lambda x: x[1], reverse=True)
    
    # Get top-K documents by the score
    final_results = []
    for doc_id, score in reranked_docs[:5]:
        doc = es.get(index=index_name, id=doc_id)
        final_results.append(doc['_source'])
    
    return final_results

In [165]:
def question_text_hybrid_rrf(q):
    question = q['question']
    appid = q['appid']

    v_q = model.encode(question)

    return elastic_search_hybrid_rrf('title_review_vector', question, v_q, appid)

evaluate(ground_truth, question_text_hybrid_rrf)

  3%|▎         | 3/105 [00:00<00:03, 26.67it/s]

{'appid': '2239550', 'question': 'What are the content themes present in Watch Dogs: Legion that I should be aware of?'}
{'appid': '2239550', 'timestamp_query': 1727608376, 'title': 'Watch Dogs: Legion', 'recommendationid': '175993111', 'author.steamid': '76561199316658748', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 352, 'author.playtime_at_review': 359, 'author.last_played': 1726973926, 'language': 'english', 'review': '', 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1727577823, 'timestamp_updated': 1727577823, 'title_vector': [-0.031208040192723274, -0.04140456020832062, 0.007626562379300594, -0.05640276148915291, 0.017036626115441322, 0.07028614729642868, -0.02655501663684845, -0.03512350469827652, 0.05046771839261055, -0.004683449864387512, 0.0064065903425216675, -0.034316763281822205, 0.06852371990680695, -0.0006094787968322635, 0.007349507883191109, -0.016963135451078415, 0.0868942141532898, 0.009798023849725723, 0.03648601472377777, -0.0172030739486

 10%|█         | 11/105 [00:00<00:03, 29.99it/s]

{'appid': '243470', 'timestamp_query': 1727608376, 'title': 'Watch_Dogs', 'recommendationid': '174359056', 'author.steamid': '76561198981002312', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 1348, 'author.last_played': 1725810208, 'language': 'english', 'review': 'Its pretty good', 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1725631311, 'timestamp_updated': 1725631311, 'title_vector': [-0.024610770866274834, -0.07019967585802078, 0.013932466506958008, 0.00626661442220211, 0.013111689127981663, 0.10618165135383606, 0.018596932291984558, -0.08027968555688858, 0.06686598807573318, -0.006215371191501617, 0.033393051475286484, -0.012798620387911797, 0.003700600005686283, -0.019148286432027817, -0.010120592080056667, -0.02861606702208519, 0.05672342702746391, 0.0318467877805233, 0.02216988056898117, -0.04864049702882767, -0.04951934888958931, -0.017381399869918823, 0.01824028603732586, 0.08887158334255219, -0.12655402719974518, -0.0

 16%|█▌        | 17/105 [00:00<00:03, 28.33it/s]

{'appid': '2443720', 'timestamp_query': 1727608376, 'title': 'Concord', 'recommendationid': '174146257', 'author.steamid': '76561198280783242', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 75, 'author.last_played': 1724937106, 'language': 'english', 'review': "Concord is a victim of a long dev cycle and uninspired direction. If this was free-to-play, it might've had a chance to cultivate a dedicated player base, but even then those players may have moved on to better games. Concord has a cool galaxy and lore that I wish was used for a single-player game.\r\nActually, forget that. \r\nSony, just fund that [b] Bloodborne PC port [/b] pls and drown in money? [i] Pretty please [/i]? Thanks.", 'voted_up': False, 'votes_up': 47, 'timestamp_created': 1725381205, 'timestamp_updated': 1725381205, 'title_vector': [0.04651255905628204, 0.021019315347075462, -0.013262023217976093, 0.015302128158509731, -0.07626236230134964, 0.0690762847661972, -0.0

 23%|██▎       | 24/105 [00:00<00:02, 28.68it/s]

{'appid': '447040', 'timestamp_query': 1727608376, 'title': 'Watch_Dogs 2', 'recommendationid': '175648303', 'author.steamid': '76561199570796457', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 97, 'author.playtime_at_review': 2858, 'author.last_played': 1727401478, 'language': 'english', 'review': 'very fun even after completing the story', 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1727137014, 'timestamp_updated': 1727137014, 'title_vector': [-0.035487089306116104, -0.08131872862577438, 0.019066616892814636, 0.001918379683047533, -0.007544113788753748, 0.1006850004196167, -0.007168173789978027, -0.06107202172279358, 0.0700518935918808, -0.009436942636966705, 0.03450194001197815, -0.01907002367079258, 0.012965372763574123, -0.027325915172696114, 0.005738384556025267, -0.02274613082408905, 0.07131781429052353, 0.04021658003330231, 0.03399556502699852, -0.050705764442682266, -0.041504621505737305, -0.034933075308799744, 0.02806973271071911, 0.0694481208920478

 29%|██▊       | 30/105 [00:01<00:02, 27.07it/s]

{'appid': '1680880', 'timestamp_query': 1727608376, 'title': 'Forspoken', 'recommendationid': '175538865', 'author.steamid': '76561198072779782', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 25, 'author.playtime_at_review': 25, 'author.last_played': 1726762241, 'language': 'english', 'review': "Not great , not the worst. It's somewhere in the middle . It has some great moments , but also some really bad one . Buy it on sale , maybe when it's 20-30 pounds , including the dlc.", 'voted_up': True, 'votes_up': 1, 'timestamp_created': 1727010771, 'timestamp_updated': 1727010771, 'title_vector': [-0.07209538668394089, -0.1020004004240036, -0.06395156681537628, 0.0031834165565669537, -0.0293442290276289, 0.12226997315883636, 0.09427493810653687, -0.011234881356358528, 0.016394803300499916, -0.020399102941155434, 0.021283546462655067, 0.05384467914700508, -0.017657820135354996, -0.04331977292895317, 0.011689926497638226, 0.00032379734329879284, 0.14626933634281158, 0.07546304

 32%|███▏      | 34/105 [00:01<00:02, 28.29it/s]

{'appid': '315210', 'timestamp_query': 1727608376, 'title': 'Suicide Squad: Kill the Justice League', 'recommendationid': '175338752', 'author.steamid': '76561198398223166', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 3929, 'author.playtime_at_review': 1731, 'author.last_played': 1726925781, 'language': 'english', 'review': 'great game can be repetitive but its fun', 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1726790296, 'timestamp_updated': 1726790296, 'title_vector': [0.018467215821146965, 0.024351824074983597, -0.06513151526451111, -0.10613536834716797, 0.09964432567358017, 0.08851472288370132, 0.07750560343265533, 0.04421189799904823, 0.036054112017154694, 0.038830872625112534, 0.02860449068248272, -0.004575718659907579, 0.010037116706371307, 0.03576195612549782, 0.05619358643889427, 0.00469497125595808, 0.061923135071992874, -0.04587120935320854, -0.03221171349287033, -0.0046423617750406265, -0.005464145913720131, -0.032773829996585846, 0.020340057089

 39%|███▉      | 41/105 [00:01<00:02, 26.19it/s]

{'appid': '1086940', 'timestamp_query': 1727608376, 'title': "Baldur's Gate 3", 'recommendationid': '175922191', 'author.steamid': '76561198066424986', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 1514, 'author.playtime_at_review': 3586, 'author.last_played': 1727577002, 'language': 'english', 'review': 'before i bought this game i was very hesitant on if i would like it just because of the combat style but after playing through a full save i believe this is one of the greatest games i have ever played. You can play the game however you want and there is always loads of content no matter which direction you go. i believe everyone and their mothers should play this game at least once. 10/10', 'voted_up': True, 'votes_up': 2, 'timestamp_created': 1727496773, 'timestamp_updated': 1727496773, 'title_vector': [-0.0021828575991094112, -0.012875277549028397, -0.04406603053212166, -0.04100967198610306, -0.009287942200899124, -0.03674192354083061, -0.012858791276812553, -0.103

 45%|████▍     | 47/105 [00:01<00:02, 25.25it/s]

{'appid': '2322010', 'timestamp_query': 1727608376, 'title': 'God of War: Ragnarok', 'recommendationid': '175377339', 'author.steamid': '76561198033159972', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 731, 'author.playtime_at_review': 161, 'author.last_played': 1727384956, 'language': 'english', 'review': "Good job sony, if I hadn't bought the game from instant gaming, I would have asked for a refund. Giving my passport ID just to access one of your game is nonsense and it will not only tarnish your already piss poor reputation but also get you more people to never buy one of your game, even tho they're masterpiece.", 'voted_up': False, 'votes_up': 119, 'timestamp_created': 1726842982, 'timestamp_updated': 1726842982, 'title_vector': [-0.030018333345651627, 0.03668512776494026, -0.06068187206983566, 0.0030811249744147062, -0.00488177640363574, 0.0607912614941597, 0.08302398025989532, -0.010123156942427158, 0.1018785685300827, -0.0035371033009141684, -0.05385114625096

 50%|█████     | 53/105 [00:01<00:01, 26.77it/s]

{'appid': '1832040', 'timestamp_query': 1727608376, 'title': 'Flintlock: The Siege of Dawn', 'recommendationid': '175854095', 'author.steamid': '76561199490283861', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 1557, 'author.playtime_at_review': 1577, 'author.last_played': 1727415780, 'language': 'english', 'review': 'having fun on this game the skill tree is so good not bad but soo good to play', 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1727411645, 'timestamp_updated': 1727411645, 'title_vector': [-0.13268093764781952, 0.07841040194034576, 0.00474591925740242, 0.032247066497802734, -0.01931910030543804, 0.04207124933600426, 0.002141091274097562, 0.015311146154999733, -0.08628498762845993, -0.014378827065229416, -0.0445815809071064, 0.004941070917993784, -0.05287206918001175, 0.008067778311669827, -0.0008252341067418456, -0.028431806713342667, 0.004139703232795, 0.002813011175021529, 0.08388656377792358, 0.015977490693330765, -0.008670437149703503, 0.01920

 56%|█████▌    | 59/105 [00:02<00:01, 26.17it/s]

{'appid': '304390', 'timestamp_query': 1727608376, 'title': 'FOR HONOR', 'recommendationid': '175863945', 'author.steamid': '76561198439414268', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 454, 'author.last_played': 1652135314, 'language': 'english', 'review': 'Campaign is ok, but multiplayer, not so great. Like someone said "This game is like Fentanyl, it feels good in minor doses but if you take to much you will kill yourself." - S3RKT', 'voted_up': False, 'votes_up': 2, 'timestamp_created': 1727428862, 'timestamp_updated': 1727428862, 'title_vector': [-0.036877479404211044, 0.19749166071414948, 0.005078896414488554, -0.04963330924510956, -0.048484377562999725, 0.07398693263530731, 0.07931988686323166, -0.07489237934350967, 0.012428680434823036, 0.04433915391564369, -0.003651408711448312, -0.05001228302717209, 0.02306411787867546, -0.010812114924192429, -0.0049315886572003365, 0.05519746616482735, 0.04435193911194801, 0.0802418664097

 59%|█████▉    | 62/105 [00:02<00:01, 23.69it/s]

{'appid': '552520', 'timestamp_query': 1727608376, 'title': 'Far Cry 5', 'recommendationid': '175586182', 'author.steamid': '76561199444212653', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 12, 'author.playtime_at_review': 12, 'author.last_played': 1727056766, 'language': 'english', 'review': "Far Cry 5 is my favorite farcry as it is base don a non-pussy generation and is the most like real life in the west. I was excited to download this game and play it on PC. That is until a popup for ubersoft came up, requiring me to make an ubersoft account to even install and play the game. This is called bait and switch. I paid my money to own my copy of this game on my computer and they did not disclose ahead of time that you are required to sell your data with another account, made in your name, with another company. Being forced to make a steam account to get PC games now days isn't enough, Now you have to have a steam account and then on top of that make accounts with all t

 65%|██████▍   | 68/105 [00:02<00:01, 24.98it/s]

{'appid': '1496790', 'timestamp_query': 1727608376, 'title': 'Gotham Knights', 'recommendationid': '174859931', 'author.steamid': '76561198159341422', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 1321, 'author.last_played': 1726366445, 'language': 'english', 'review': "When you open the social menu, the game crashes. This game should never exist. I paid 2 dollars for this game and it was still too much money. The only reason why I'm playing this game is because I had a bet and I had to keep my word.", 'voted_up': False, 'votes_up': 2, 'timestamp_created': 1726231124, 'timestamp_updated': 1726231124, 'title_vector': [0.03399800881743431, -0.037514470517635345, -0.10720440745353699, 0.0398595854640007, -0.030118297785520554, -0.01693931221961975, -0.04038681462407112, -0.03909076750278473, -0.04976147413253784, -0.050662823021411896, 0.00416133739054203, -0.028917450457811356, 0.026187341660261154, 0.003583624493330717, -0.036022435873746

 72%|███████▏  | 76/105 [00:02<00:01, 27.75it/s]

{'appid': '582160', 'timestamp_query': 1727608376, 'title': "Assassin's Creed Origin", 'recommendationid': '174746478', 'author.steamid': '76561199493504352', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 280, 'author.playtime_at_review': 329, 'author.last_played': 1726843144, 'language': 'english', 'review': 'I would 100% Recommend this to people who want to buy it\r\nBut its alot of Money but if they just lower the price it would be great\r\nand if you want the game Then when there is an sale going on\r\nlike 85% I would 100% recomend this to you/people who want to buy it\r\n :D', 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1726076267, 'timestamp_updated': 1726076267, 'title_vector': [0.013862956315279007, -0.024770470336079597, -0.16092175245285034, -0.05314734950661659, -0.02757808193564415, 0.057654496282339096, 0.025549544021487236, -0.08206751197576523, 0.046947404742240906, 0.010089526884257793, -0.0004057683690916747, 0.024576663970947266, -0.0430389

 78%|███████▊  | 82/105 [00:03<00:00, 27.63it/s]

{'appid': '1817070', 'timestamp_query': 1727608376, 'title': 'Marvel’s Spider-Man Remastered', 'recommendationid': '175850023', 'author.steamid': '76561199128780971', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 1330, 'author.playtime_at_review': 959, 'author.last_played': 1727481530, 'language': 'english', 'review': "Amazing!! Love the graphics and the game play. I've been wanting this since it came out on play station. I even bought a gaming computer to play this. I've also already bought the Marvel's Spider-Man: Miles Morales because I knew I would love this game. I highly recommend buying this.", 'voted_up': True, 'votes_up': 0, 'timestamp_created': 1727404991, 'timestamp_updated': 1727404991, 'title_vector': [-0.10311632603406906, -0.053698938339948654, 0.021640196442604065, -0.005982811097055674, -0.0766599103808403, -0.0031886762008070946, -0.05408354476094246, -0.011845272034406662, -0.059839047491550446, -0.005345311481505632, -0.03592531383037567, 0.08319286

 81%|████████  | 85/105 [00:03<00:00, 26.87it/s]

{'appid': '2208920', 'timestamp_query': 1727608376, 'title': "Assassin's Creed Valhalla", 'recommendationid': '174101248', 'author.steamid': '76561198086713481', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 240, 'author.playtime_at_review': 2497, 'author.last_played': 1727570993, 'language': 'english', 'review': 'I\'ve played every Assassin\'s Creed game, but I wish I had skipped this one—it\'s mediocre at best.\nThe first thing that struck me was the poor voice acting. Both the male and female leads are unremarkable , though the male is slightly better.\n\nAs for the story, the main plot is passable, but it feels repetitive. The characters are pretty forgettable, with only a few exceptions like Sigurd.\n\nThe side quests, however, are where the game truly lost me. The Niflheim segment, in particular, motivated me to write this review. Who thought this was a good idea? It’s like a frustrating \'Hades\' mode—if you die, you start from scratch, buying upgrades and fight

 87%|████████▋ | 91/105 [00:03<00:00, 24.65it/s]

{'appid': '721180', 'timestamp_query': 1727608376, 'title': 'Dustborn', 'recommendationid': '173814391', 'author.steamid': '76561199049713186', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 0, 'author.playtime_at_review': 1287, 'author.last_played': 1725027231, 'language': 'english', 'review': 'Dustborn has the vibe of a "be gay do crime" sticker slapped on a graffiti-covered wall - at first! And then it gets deep.\n\nA fun ride that tugged at my heartstrings a lot. The ending felt good, but it was also bittersweet, saying goodbye to this world and the story! The STORY is the part that really won me over. It got better the longer I stuck with it. I enjoyed the ability to make a LOT of choices, to influence my companions in their attitudes and ultimately myself - Pax.\n\nTakeaway: Found family touring across a really intense - intense mostly in how dangerous it felt - America as a mediocre punk rock band (mediocre because the BEST I could do in the rhythm minigames was 

 92%|█████████▏| 97/105 [00:03<00:00, 26.62it/s]

{'appid': '812140', 'timestamp_query': 1727608376, 'title': "Assassin's Creed Odyssey", 'recommendationid': '175243766', 'author.steamid': '76561198835479154', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 1, 'author.playtime_at_review': 9510, 'author.last_played': 1726863394, 'language': 'english', 'review': "Despite being an ambitious game, Assassin's Creed Odyssey often feels bloated. While the vast, open-world is beautiful, it can feel repetitive and is filled with unnecessary quests that drag down the experience. The RPG mechanics, a fresh idea, sometimes take away from the Assassin's Creed experience, making it feel more like an open-world checklist simulator. That said, the story is engaging and the historical setting is captivating, especially for fans of Ancient Greece. If you have the patience for the grind, there's still fun to be had", 'voted_up': True, 'votes_up': 18, 'timestamp_created': 1726669780, 'timestamp_updated': 1726938113, 'title_vector': [0.0182

100%|██████████| 105/105 [00:03<00:00, 26.90it/s]

{'appid': '1545560', 'timestamp_query': 1727608376, 'title': 'Shadow Gambit: The Cursed Crew', 'recommendationid': '174788064', 'author.steamid': '76561198040356406', 'author.playtimeforever': 0, 'author.playtime_last_two_weeks': 37, 'author.playtime_at_review': 347, 'author.last_played': 1726513207, 'language': 'english', 'review': 'It is the same game as the previous entries (Desperados 3 and Shadow Gambit), only with far more abilities and content\n\nIf you liked the previous, this one is worth it for sure', 'voted_up': True, 'votes_up': 1, 'timestamp_created': 1726138497, 'timestamp_updated': 1726138497, 'title_vector': [-0.06712406128644943, 0.009421498514711857, 0.0073942262679338455, -0.043221961706876755, -0.05410829931497574, 0.07348617911338806, 0.049635160714387894, -0.024856140837073326, -0.025988273322582245, -0.019289180636405945, -0.022457605227828026, 0.006011052057147026, 0.02052491344511509, -0.027519918978214264, -0.04368121549487114, 0.011507898569107056, 0.12379150




{'hit_rate': 1.0, 'mrr': 1.0}

In [None]:
index = minsearch.Index(
    # text_fields=["author.steamid", "author.playtimeforever", "author.playtime_last_two_weeks", "author.playtime_at_review", "author.last_played", "language", "review", "timestamp_created", "timestamp_updated"],
    text_fields=["title", "language", "review"],
    keyword_fields=["appid", "recommendationid"]
)

index.fit(reviews)

# Retrieval evaluation

In [None]:
# Directory containing the ground truth file
data_dir = os.path.abspath('../reviews-assistant/data/ground_truth')

# Path to the ground_truth_retrieval.json file
file_path = os.path.join(data_dir, 'ground_truth_retrieval.json')

# Check if the file exists
if os.path.exists(file_path):
    try:
        # Open and load the JSON file
        with open(file_path, 'r', encoding='utf-8') as jsonfile:
            ground_truth_data = json.load(jsonfile)  # Load the JSON data into a Python object

        # Ensure each item is a dictionary and contains 'id' and 'question'
        if all(isinstance(item, dict) and 'appid' in item and 'question' in item for item in ground_truth_data):
            print("Data successfully loaded and is in the correct format.")

            # Example: Print a sample of the data
            for item in ground_truth_data[:5]:  # Print the first 5 questions
                print(f"ID: {item['appid']}, Question: {item['question']}")
        else:
            print("Error: The data format is incorrect or missing required fields ('id', 'question').")
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
else:
    print(f"File {file_path} does not exist.")

In [None]:
ground_truth_data[:1]

In [None]:
for doc in tqdm(reviews):
    question = doc['review']
    text = doc['text']
    qt = question + ' ' + text

    doc['question_vector'] = model.encode(question)
    doc['text_vector'] = model.encode(text)
    doc['question_text_vector'] = model.encode(qt)

In [None]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)

def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

In [None]:
def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score += 1 / (rank + 1)
                break  # Stop after finding the first relevant document

    return total_score / len(relevance_total)


In [None]:
def minsearch_search(query):
    boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [None]:
def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['appid']
        results = search_function(q)
        relevance = [d['appid'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [None]:
from tqdm.auto import tqdm

In [None]:
evaluate(ground_truth_data, lambda q: minsearch_search(q['question']))

# Finding the best parameters

In [None]:
len(ground_truth_data)

In [None]:
# Find the midpoint of the dataset
midpoint = len(ground_truth_data) // 2  # integer division to get the midpoint

# Split the dataset into two equal halves
df_validation = ground_truth_data[:midpoint]
df_test = ground_truth_data[midpoint:]

In [None]:
df_validation[-1]

In [None]:
df_test[-1]

In [None]:
import random

def simple_optimize(param_ranges, objective_function, n_iterations=10):
    best_params = None
    best_score = float('-inf')  # Assuming we're minimizing. Use float('-inf') if maximizing.

    for _ in range(n_iterations):
        # Generate random parameters
        current_params = {}
        for param, (min_val, max_val) in param_ranges.items():
            if isinstance(min_val, int) and isinstance(max_val, int):
                current_params[param] = random.randint(min_val, max_val)
            else:
                current_params[param] = random.uniform(min_val, max_val)
        
        # Evaluate the objective function
        current_score = objective_function(current_params)
        
        # Update best if current is better
        if current_score > best_score:  # Change to > if maximizing
            best_score = current_score
            best_params = current_params
    
    return best_params, best_score

In [None]:
def minsearch_search(query, boost=None):
    if boost is None:
        boost = {}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

In [None]:
param_ranges = {
    'exercise_name': (0.0, 3.0),
    'type_of_activity': (0.0, 3.0),
    'type_of_equipment': (0.0, 3.0),
    'body_part': (0.0, 3.0),
    'type': (0.0, 3.0),
    'muscle_groups_activated': (0.0, 3.0),
    'instructions': (0.0, 3.0),
}

def objective(boost_params):
    def search_function(q):
        return minsearch_search(q['question'], boost_params)

    results = evaluate(df_validation, search_function)
    return results['mrr']

In [None]:
simple_optimize(param_ranges, objective, n_iterations=20)

In [None]:
def minsearch_improved(query):
    boost = {'exercise_name': 1.5333100039172263,
  'type_of_activity': 1.5395670128097776,
  'type_of_equipment': 2.39211934690812,
  'body_part': 1.6244459844173096,
  'type': 2.7948598963438456,
  'muscle_groups_activated': 1.4303057412632778,
  'instructions': 1.46575166599529}

    results = index.search(
        query=query,
        filter_dict={},
        boost_dict=boost,
        num_results=10
    )

    return results

evaluate(ground_truth_data, lambda q: minsearch_improved(q['question']))