In [35]:
demo = False

# IMPORT

In [2]:
# Import necessary modules
from requests import Request, Session
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects
import json
import pprint 
import os

# Import progress bar module
from alive_progress import alive_bar

# Import time and sys modules
import time
import sys

# Import BoardGameGeek client
from boardgamegeek import BGGClient

# Create an instance of the BoardGameGeek client
bgg = BGGClient()

# EXTRACTION

In [9]:
# Retrieve the hot items in the 'boardgame' category from BoardGameGeek
hot_items = bgg.hot_items('boardgame')

# Create an empty dictionary to store items that encounter errors
miss = {}

# Create an empty list to store the extracted data
data = []

count = 0

# Iterate over each hot item
for item in hot_items:
    try:
        count = count + 1
        print(f'[{count}]')
        
        # Retrieve the game details, including comments, for the current item
        game = bgg.game(game_id=item.id, comments=True)
        
        # Create a progress bar with the length of the comments
        with alive_bar(len(game.comments), force_tty=True) as bar:
            
            # Iterate over each comment in the game
            for comment in game.comments:
                # Create a dictionary to store the comment data
                com_data = {
                    "id": item.id,
                    "title": item.name,
                    "user": comment.commenter,
                    "comment": comment.comment,
                    "rating": comment.rating
                }
                
                # Append the comment data to the list
                data.append(com_data)
                
                # time.sleep(0.01)
                
                # Update the progress bar
                bar()
        
    except:
        # If an error occurs, print 'error' and add the item to the 'miss' dictionary
        print('ERROR - Skipping the rest of the comments...\n')
        miss[item.id] = item.name

if demo:
    # Specify the folder path where you want to save the file
    folder_path = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv\version_demo'
    # Specify the filename
    filename = "comment_data_demo.json"
else:
    folder_path = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv'
    filename = "comment_data.json"

# Construct the full file path
file_path = os.path.join(folder_path, filename)

# Save the extracted data to the JSON file
with open(file_path, 'w') as f:
    json.dump(data, f, indent=2)  # indent=2 is not needed but makes the file human-readable if the data is nested

ERROR - Skipping the rest of the comments...

[9]
|████████████████████████████████████████| 15/15 [100%] in 0.0s (118779.57/s)   

[10]
|████████████████████████████████████████| 14/14 [100%] in 0.0s (88800.16/s)    

[11]
ERROR - Skipping the rest of the comments...

[12]
|████████████████████████████████████████| 101/101 [100%] in 0.0s (236519.44/s) 

[13]
ERROR - Skipping the rest of the comments...

[14]
ERROR - Skipping the rest of the comments...

[15]
|████████████████████████████████████████| 17/17 [100%] in 0.0s (120718.09/s)   

[16]
ERROR - Skipping the rest of the comments...

[17]
|████████████████████████████████████████| 93/93 [100%] in 0.0s (162720.01/s)   

[18]
ERROR - Skipping the rest of the comments...

[19]
|████████████████████████████████████████| 272/272 [100%] in 0.0s (227420.96/s) 

[20]
ERROR - Skipping the rest of the comments...

[21]
ERROR - Skipping the rest of the comments...

[22]
ERROR - Skipping the rest of the comments...

[23]
ERROR - Skipping the

# WRANGLING

In [4]:
import pandas as pd
import json
import os

# Set the display option to show all columns in pandas DataFrame
pd.set_option('display.max_columns', None)

if demo:
    # Specify the folder path where you want to save the file
    folder_path = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv\version_demo'
    # Specify the filename
    filename = "comment_data_demo.json"
else:
    folder_path = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv'
    filename = "comment_data.json"

# Construct the full file path
file_path = os.path.join(folder_path, filename)

# Read the JSON file containing the comment data
with open(file_path, 'r') as f:
    post_list = json.load(f)
    
# Print the number of comments before any formatting
print(f'Amount of comments before any formatting: {len(post_list)}')

# Convert the JSON data into a pandas DataFrame
df = pd.json_normalize(post_list)

if demo:
    # Set the path for the original data directory
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv\version_demo'
    # Save the DataFrame as a CSV file in the specified directory
    df.to_csv(os.path.join(path_original_data, 'comment_data_demo.csv'), index=False)
else:
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv'
    df.to_csv(os.path.join(path_original_data, 'comment_data.csv'), index=False)

# Display the first row of the DataFrame
df.head(1)

Amount of comments before any formatting: 136806


Unnamed: 0,id,title,user,comment,rating
0,390478,Gloomhaven: Second Edition,amusedleg,Day 1 of the GH 2.0 campaign. Can't wait.,


# CLEANING

## Reading raw data

In [5]:
import pandas as pd
import numpy as np
import json
import os

if demo:
    # Set the path for the original data directory
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv\version_demo'
    # Read the CSV file into a DataFrame
    df = pd.read_csv(os.path.join(path_original_data, 'comment_data_demo.csv'), low_memory=False)
else:
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv'
    df = pd.read_csv(os.path.join(path_original_data, 'comment_data.csv'), low_memory=False)

df.head(1)

Unnamed: 0,id,title,user,comment,rating
0,390478,Gloomhaven: Second Edition,amusedleg,Day 1 of the GH 2.0 campaign. Can't wait.,


In [6]:
# Print some statistics of the 'comment' field

# Calculate the percentage of non-null comments
comment_percentage = round(df.comment.notnull().mean() * 100, 2)
print(str(comment_percentage) + '%')

# Calculate the maximum, minimum, and mean length of comments
max_length = df.comment.str.len().max()
min_length = df.comment.str.len().min()
mean_length = df.comment.str.len().mean()
print(max_length)
print(min_length)
print(mean_length)
print()

# Search the number of comments containing searched words within the text of the message

pattern = "random"

# Count comments containing the search pattern
contains_pattern = df.comment.str.contains(pattern, na=False).sum()

# Count comments starting with the search pattern
starts_with_pattern = df.comment.str.startswith(pattern, na=False).sum()

# Count comments exactly matching the search pattern
exact_match_pattern = df.comment.str.fullmatch(pattern, na=False).sum()

print(contains_pattern)
print(starts_with_pattern)
print(exact_match_pattern)

# Filter out rows with null comments and reset the index
df = df[df.comment.notnull()]
df.reset_index(drop=True, inplace=True)

# Print the first messages that contain the pattern
matching_comments = df.loc[df.comment.str.contains(pattern, na=False), 'comment']
print(matching_comments)

99.99%
18588.0
1.0
202.97686183628562

2851
2
0
102       After just one play I get the feeling I have s...
146       I really don’t get the hype around Heat, but t...
183       An okay "deck management” game with a relatabl...
326       Update: After some more plays I feel that some...
435       [imageID=6940449small inline] This really feel...
                                ...                        
136562    The character progression system is very inter...
136659    These are thoughts from a single play through ...
136667    This is pure a take that game! don't try to "p...
136718    Too long with 2P. Better with 3P, but the card...
136745    Review: Highly interactive, tactical, and stre...
Name: comment, Length: 2851, dtype: object


## Filtering the data

In [7]:
from guess_language import guess_language
import enchant
import string
import re

# Function to check if a comment is in English
def is_english_batch(batch):
    # Create a batch of processed texts
    processed_texts = batch['comment'].str.lower().str.findall(r"[a-zA-Z0-9']+")

    # Create an English dictionary
    english_dictionary = enchant.Dict("en_US")

    # Check if any comment in the batch is in English
    is_english = processed_texts.apply(lambda text: sum(english_dictionary.check(word) for word in text) >= len(text) / 2)

    # Return a boolean Series indicating if each comment is in English
    return is_english

In [8]:
from IPython.display import display, HTML
from alive_progress import alive_bar
from tqdm import tqdm
import pandas as pd
import time
import sys

# Batch processing
batch_size = 1000  # Number of rows to process in each batch
num_rows = len(df)
result = pd.Series([], dtype='float64')  # Store the results

# Calculate the number of batches
num_batches = (num_rows // batch_size) + 1

# Initialize a progress bar
with tqdm(total=num_batches, ncols=num_batches) as pbar:
    # Process each batch
    for i in range(0, num_rows, batch_size):
        # Extract a batch of rows from the DataFrame
        batch = df.iloc[i:i+batch_size]
        
        # Filter out non-English rows in the batch
        batch_english = batch.loc[is_english_batch(batch)]
        
        # Concatenate the English rows to the result
        result = pd.concat([result, batch_english])
        
        # Update the progress bar
        pbar.update(1)

# Reset the index of the resulting DataFrame
result.reset_index(drop=True, inplace=True)

# Print the updated DataFrame
result.head(5)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 137/137 [06:55<00:00,  3.04s/it]


Unnamed: 0,0,id,title,user,comment,rating
0,,390478.0,Gloomhaven: Second Edition,amusedleg,Day 1 of the GH 2.0 campaign. Can't wait.,
1,,390478.0,Gloomhaven: Second Edition,bark,BGCJ made me do it. Stop making fake dungeon c...,1.0
2,,390478.0,Gloomhaven: Second Edition,Brefs,"Cant wait for It, thanks for the amazing job s...",10.0
3,,390478.0,Gloomhaven: Second Edition,Dali187,A perfect non greed driven game made even more...,10.0
4,,390478.0,Gloomhaven: Second Edition,DJ_Tsaladi_Tjatekok,"Nem vagyok egy Homályrév-rajongó, de ha már me...",


In [12]:
import pandas as pd

# Assuming you have two DataFrames: df1 and df2 representing the two databases
df1 = df
# print('Check.')
df2 = result
# print('Check.')

# Print the size of each database
print('Current database:', len(result))
print('Original database:', len(df))
print('Difference:', len(df)-len(result))

# Find rows with differing 'comment' in df1 compared to df2
diff_df1 = df1[~df1['comment'].isin(df2['comment'])]

# Find rows with differing 'comment' in df2 compared to df1
diff_df2 = df2[~df2['comment'].isin(df1['comment'])]

# Concatenate the differing rows into a single DataFrame
diff_combined = pd.concat([diff_df1, diff_df2])

# Reset the index of the resulting DataFrame
diff_combined.reset_index(drop=True, inplace=True)

# Print the differences
diff_combined.head(5)

Check.
Check.
Current database: 125426
Original database: 136787
Difference: 11361


Unnamed: 0,id,title,user,comment,rating,0
0,390478.0,Gloomhaven: Second Edition,Fuzzel,Unnötige Geldmache mit der erfolgreichen Marke.,1.0,
1,366013.0,Heat: Pedal to the Metal,a2greg,nyp,,
2,366013.0,Heat: Pedal to the Metal,Abri,56x87mm cards (330pcs),,
3,366013.0,Heat: Pedal to the Metal,alexbatbee,zatu,,
4,366013.0,Heat: Pedal to the Metal,ANDREWSOFT,Jugadas varias partidas en solitario con el mó...,8.2,


In [13]:
# Add a new column with the length of each comment
result['text_length'] = result['comment'].apply(lambda x: len(x))  

# Add a new column with the word count of each comment
result['word_count'] = result['comment'].apply(lambda x: len(x.split())) 

# Filter out rows with word count less than or equal to 5
result = result[result['word_count'] > 5]  

# Drop the first column (assumed to be unnecessary)
result = result.drop(result.columns[0], axis=1)  

# Print the first 5 rows of the resulting DataFrame
result.head(5)  

# Save the pre-processed DataFrame to a CSV file
if demo:
    result.to_csv(os.path.join(path_original_data, 'pre_processed_comment_data_demo.csv'), index=False)
else:
    result.to_csv(os.path.join(path_original_data, 'pre_processed_comment_data.csv'), index=False)

# PREPROCESS

In [25]:
# Import necessary modules
import pandas as pd
import numpy as np
import json
import os

# Set the display option to show all columns in pandas DataFrame
pd.set_option('display.max_columns', None)

if demo:
    # Set the path for the original data directory
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv\version_demo'
    # Construct the file path to the CSV file
    csv_file_path = os.path.join(path_original_data, 'pre_processed_comment_data_demo.csv')
else:
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv'
    csv_file_path = os.path.join(path_original_data, 'pre_processed_comment_data.csv')

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path, low_memory=False)

## Punctuation Removal

In [26]:
import string

# Function to remove punctuation from a text
def remove_punctuation(text):
    # Create a set of allowed characters (letters, numbers, and space)
    allowed_chars = set(string.ascii_letters + string.digits + ' ')
    
    # Remove punctuation characters not in the allowed set
    processed_text = ''.join(char for char in text if char in allowed_chars)
    
    return processed_text

# Apply the remove_punctuation() function to the 'comment' column and store the result in a new column 'processed_comment'
df['processed_comment'] = df['comment'].apply(remove_punctuation)

# Lower case all the messages
df['processed_comment'] = df['processed_comment'].str.lower()

## Tokenization

In [27]:
import re

# Function to tokenize a text
def tokenization(text):
    # Split the text on spaces to create tokens
    tokens = text.split()
    
    return tokens

# Apply the tokenization() function to the 'processed_comment' column and store the result in a new column 'comment_tokenized'
df['comment_tokenized'] = df['processed_comment'].apply(lambda x: tokenization(x))

## Stopword Removal

In [28]:
import gensim
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
import nltk
from nltk.stem import WordNetLemmatizer, SnowballStemmer
from nltk.stem.porter import *
import numpy as np

# Download the required NLTK resources (uncomment if needed)
# nltk.download('wordnet')
# nltk.download('omw-1.4')

# Stop words present in the library
stopwords = nltk.corpus.stopwords.words('english')
print(stopwords[0:10])

# Defining the function to remove stopwords from tokenized text
def remove_stopwords(text):
    output= [i for i in text if i not in stopwords]
    return output

# Applying the function
df['comment_key_words']= df['comment_tokenized'].apply(lambda x:remove_stopwords(x))

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're"]


## Stemming

## Lemmatization

## Gensim preprocessing

In [29]:
import gensim
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
from nltk.stem import WordNetLemmatizer, SnowballStemmer
from nltk.stem.porter import *
import numpy as np
import nltk
import json

np.random.seed(400)

# Download the required NLTK resource (uncomment if needed)
# nltk.download('wordnet')

reference_sheet = {}  # Dictionary to store word reference sheet

stemmer = SnowballStemmer("english")

# Function to lemmatize and stem a word
def lemmatize_stemming(text):
    return stemmer.stem(WordNetLemmatizer().lemmatize(text, pos='v'))

# Tokenize, lemmatize, and filter stopwords
def preprocess(text):
    result = []
    for token in gensim.utils.simple_preprocess(text):
        if token not in STOPWORDS and len(token) > 2:
            word = lemmatize_stemming(token)
            if word in reference_sheet:
                if token not in reference_sheet[word]:
                    reference_sheet[word].append(token)
            else:
                reference_sheet[word] = [token]
            result.append(word)
    return result

# Tokenize, lemmatize, and filter verbs
def preprocess_verbs(text):
    text = gensim.utils.simple_preprocess(text)
    tagged_tokens = nltk.pos_tag(text)
    filtered_tokens = [token for token, pos_tag in tagged_tokens if pos_tag not in ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']]
    
    result = []
    for token in filtered_tokens:
        if token not in STOPWORDS and len(token) > 2:
            result.append(lemmatize_stemming(token))
    return result

# print('Start.')
df['gensim_comment'] = df['comment'].apply(preprocess)
# print('Next.')
df['gensim_comment_verbs'] = df['comment'].apply(preprocess_verbs)
# print('Finish.')

# Save reference sheet as a JSON file
json_data = json.dumps(reference_sheet)
with open('reference_sheet.json', 'w') as file:
    file.write(json_data)

## Restructring the dataset

In [37]:
# Get the list of column names
columns = list(df.columns)
print(columns, '\n')

# Filter the DataFrame based on the length of 'gensim_comment' column
df = df[df['gensim_comment'].map(lambda d: len(d)) >= 5]
df = df.reset_index(drop=True)

# Calculate the average length of 'gensim_comment' column
average_length = df['gensim_comment'].apply(lambda x: len(x)).mean()
print(average_length)

if demo:
     # Set the path for the original data directory
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv\version_demo'
    # Save the DataFrame to a CSV file
    df.to_csv(os.path.join(path_original_data, 'post_processed_comment_data_demo.csv'), index=False)
else:
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv'
    df.to_csv(os.path.join(path_original_data, 'post_processed_comment_data.csv'), index=False)

# Display a sample of 5 rows from the DataFrame
df.sample(5)

['id', 'title', 'user', 'comment', 'rating', 'text_length', 'word_count', 'processed_comment', 'comment_tokenized', 'comment_key_words', 'gensim_comment', 'gensim_comment_verbs'] 

242.00339533107888


Unnamed: 0,id,title,user,comment,rating,text_length,word_count,processed_comment,comment_tokenized,comment_key_words,gensim_comment,gensim_comment_verbs
9500,224517.0,Brass: Birmingham,Northwest Smith,"Sleeved (Sleeve kings), Folded Space organizer",8.0,46,6,sleeved sleeve kings folded space organizer,"['sleeved', 'sleeve', 'kings', 'folded', 'spac...","['sleeved', 'sleeve', 'kings', 'folded', 'spac...","['sleev', 'sleev', 'king', 'fold', 'space', 'o...","['sleev', 'king', 'space', 'organ']"
71405,291457.0,Gloomhaven: Jaws of the Lion,casadeisogniburritts,Initial rating. Very good game with just the ...,7.5,114,20,initial rating very good game with just the r...,"['initial', 'rating', 'very', 'good', 'game', ...","['initial', 'rating', 'good', 'game', 'right',...","['initi', 'rat', 'good', 'game', 'right', 'mix...","['initi', 'rat', 'good', 'game', 'right', 'mix..."
36422,174430.0,Gloomhaven,grunner,My wife and I have fallen in love with every a...,10.0,138,29,my wife and i have fallen in love with every a...,"['my', 'wife', 'and', 'i', 'have', 'fallen', '...","['wife', 'fallen', 'love', 'every', 'aspect', ...","['wife', 'fall', 'love', 'aspect', 'game', 'sp...","['wife', 'love', 'aspect', 'game', 'hour']"
24587,266192.0,Wingspan,1234567,"Boring on my initial plays, though I'm sure wo...",6.0,172,29,boring on my initial plays though im sure woul...,"['boring', 'on', 'my', 'initial', 'plays', 'th...","['boring', 'initial', 'plays', 'though', 'im',...","['bore', 'initi', 'play', 'sure', 'better', 'l...","['initi', 'play', 'sure', 'better', 'strategi'..."
62061,230802.0,Azul,LionPacifique,"Meh. I mean, don't get me wrong, the design an...",6.5,160,30,meh i mean dont get me wrong the design and pr...,"['meh', 'i', 'mean', 'dont', 'get', 'me', 'wro...","['meh', 'mean', 'dont', 'get', 'wrong', 'desig...","['meh', 'mean', 'wrong', 'design', 'present', ...","['meh', 'mean', 'wrong', 'design', 'present', ..."


In [39]:
import pandas as pd
import numpy as np
import json
import os

pd.set_option('display.max_columns', None)

# Set the maximum number of columns to display
pd.set_option('display.max_columns', None)

if demo:
    # Set the path for the original data directory
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv\version_demo'
    # Read the CSV file into a DataFrame
    df = pd.read_csv(os.path.join(path_original_data, 'post_processed_comment_data_demo.csv'), low_memory=False)
else:
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv'
    df = pd.read_csv(os.path.join(path_original_data, 'post_processed_comment_data.csv'), low_memory=False)

# Display a sample of 10 rows from the DataFrame
df.sample(5)

Unnamed: 0,id,title,user,comment,rating,text_length,word_count,processed_comment,comment_tokenized,comment_key_words,gensim_comment,gensim_comment_verbs
47191,199792.0,Everdell,ytjunkies,The amazing art cannot be overstated. The game...,9.0,1028,171,the amazing art cannot be overstated the game ...,"['the', 'amazing', 'art', 'cannot', 'be', 'ove...","['amazing', 'art', 'cannot', 'overstated', 'ga...","['amaz', 'art', 'overst', 'game', 'gorgeous', ...","['amaz', 'art', 'game', 'gorgeous', 'compon', ..."
26093,266192.0,Wingspan,dustmonster,"Great, clever game. Different than so many oth...",10.0,55,9,great clever game different than so many other...,"['great', 'clever', 'game', 'different', 'than...","['great', 'clever', 'game', 'different', 'many...","['great', 'clever', 'game', 'differ', 'game']","['great', 'clever', 'game', 'differ', 'game']"
71408,291457.0,Gloomhaven: Jaws of the Lion,Castanza,This is my first time playing a campaign game....,8.5,228,40,this is my first time playing a campaign game ...,"['this', 'is', 'my', 'first', 'time', 'playing...","['first', 'time', 'playing', 'campaign', 'game...","['time', 'play', 'campaign', 'game', 'great', ...","['time', 'campaign', 'game', 'great', 'experi'..."
334,366013.0,Heat: Pedal to the Metal,MacTele,Great game. The system is great. Heat is brill...,6.0,143,27,great game the system is great heat is brillan...,"['great', 'game', 'the', 'system', 'is', 'grea...","['great', 'game', 'system', 'great', 'heat', '...","['great', 'game', 'great', 'heat', 'brillant',...","['great', 'game', 'great', 'heat', 'brillant',..."
44868,199792.0,Everdell,Elonka,"On my first play, quite enjoyed it, there were...",9.0,141,27,on my first play quite enjoyed it there were m...,"['on', 'my', 'first', 'play', 'quite', 'enjoye...","['first', 'play', 'quite', 'enjoyed', 'many', ...","['play', 'enjoy', 'way', 'thing', 'work', 'art...","['play', 'way', 'thing', 'art', 'love', 'happi']"


In [40]:
# Check the number of posts that contain specific words
print(len(df[df.comment.str.contains('luck')]))
print(len(df[df.comment.str.contains('random')]))
print(len(df[df.comment.str.contains('boring')]))
print(len(df[df.comment.str.contains('complex')]))
print(len(df[df.comment.str.contains('complicated')]))
print(len(df[df.comment.str.contains('bookkeeping')]))
print()
print(len(df[df.comment.str.contains('edition')]))
print(len(df[df.comment.str.contains('version')]))
print(len(df[df.comment.str.contains('expansion')]))

# Display a sample of 5 rows from the DataFrame that contain the word 'boring'
df[df.comment.str.contains('boring')].sample(5)

3097
2755
1185
3057
868
130

1177
2136
5469


Unnamed: 0,id,title,user,comment,rating,text_length,word_count,processed_comment,comment_tokenized,comment_key_words,gensim_comment,gensim_comment_verbs
22692,312484.0,Lost Ruins of Arnak,EHngel,I'm loving this at the moment. I was initially...,10.0,585,93,im loving this at the moment i was initially d...,"['im', 'loving', 'this', 'at', 'the', 'moment'...","['im', 'loving', 'moment', 'initially', 'doubt...","['love', 'moment', 'initi', 'doubt', 'enjoy', ...","['moment', 'initi', 'doubt', 'game', 'solo', '..."
61603,230802.0,Azul,Jmccue,A tile selecting and placing game with extreme...,3.0,583,110,a tile selecting and placing game with extreme...,"['a', 'tile', 'selecting', 'and', 'placing', '...","['tile', 'selecting', 'placing', 'game', 'extr...","['tile', 'select', 'place', 'game', 'extrem', ...","['tile', 'select', 'place', 'game', 'extrem', ..."
47106,199792.0,Everdell,Werbaer,"base rating: 5.5 - average, slightly boring, l...",4.0,177,34,base rating 55 average slightly boring luck f...,"['base', 'rating', '55', 'average', 'slightly'...","['base', 'rating', '55', 'average', 'slightly'...","['base', 'rat', 'averag', 'slight', 'bore', 'l...","['base', 'rat', 'averag', 'slight', 'bore', 'l..."
6784,316554.0,Dune: Imperium,SirHandsome,"Removes agency from worker placement, defeatin...",3.0,674,120,removes agency from worker placement defeating...,"['removes', 'agency', 'from', 'worker', 'place...","['removes', 'agency', 'worker', 'placement', '...","['remov', 'agenc', 'worker', 'placement', 'def...","['remov', 'agenc', 'worker', 'placement', 'pur..."
40029,174430.0,Gloomhaven,Uncivil,"Not my type of game, I don't like long sloggis...",3.0,983,176,not my type of game i dont like long sloggish ...,"['not', 'my', 'type', 'of', 'game', 'i', 'dont...","['type', 'game', 'dont', 'like', 'long', 'slog...","['type', 'game', 'like', 'long', 'sloggish', '...","['type', 'game', 'like', 'long', 'sloggish', '..."
