In [82]:
demo = False

# IMPORT

In [58]:
# Import necessary modules
from requests import Request, Session
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects
import json
import pprint 
import os

# Import progress bar module
from alive_progress import alive_bar

# Import time and sys modules
import time
import sys

# Import BoardGameGeek client
from boardgamegeek import BGGClient

# Create an instance of the BoardGameGeek client
bgg = BGGClient()

# EXTRACTION

In [None]:
# Retrieve the hot items in the 'boardgame' category from BoardGameGeek
hot_items = bgg.hot_items('boardgame')

# Create an empty dictionary to store items that encounter errors
miss = {}

# Create an empty list to store the extracted data
data = []

count = 0

# Iterate over each hot item
for item in hot_items:
    try:
        count = count + 1
        print(f'[{count}]')
        
        # Retrieve the game details, including comments, for the current item
        game = bgg.game(game_id=item.id, comments=True)
        
        # Create a progress bar with the length of the comments
        with alive_bar(len(game.comments), force_tty=True) as bar:
            
            # Iterate over each comment in the game
            for comment in game.comments:
                # Create a dictionary to store the comment data
                com_data = {
                    "id": item.id,
                    "title": item.name,
                    "user": comment.commenter,
                    "comment": comment.comment,
                    "rating": comment.rating
                }
                
                # Append the comment data to the list
                data.append(com_data)
                
                # time.sleep(0.01)
                
                # Update the progress bar
                bar()
        
    except:
        # If an error occurs, print 'error' and add the item to the 'miss' dictionary
        print('ERROR - Skipping the rest of the comments...\n')
        miss[item.id] = item.name

if demo:
    # Specify the folder path where you want to save the file
    folder_path = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv\version_demo'
    # Specify the filename
    filename = "comment_data_demo.json"
else:
    folder_path = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv'
    filename = "comment_data.json"

# Construct the full file path
file_path = os.path.join(folder_path, filename)

# Save the extracted data to the JSON file
with open(file_path, 'w') as f:
    json.dump(data, f, indent=2)  # indent=2 is not needed but makes the file human-readable if the data is nested

# WRANGLING

In [59]:
import pandas as pd
import json
import os

# Set the display option to show all columns in pandas DataFrame
pd.set_option('display.max_columns', None)

if demo:
    # Specify the folder path where you want to save the file
    folder_path = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv\version_demo'
    # Specify the filename
    filename = "comment_data_demo.json"
    print('Using demo...')
else:
    folder_path = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv'
    filename = "comment_data.json"

# Construct the full file path
file_path = os.path.join(folder_path, filename)

# Read the JSON file containing the comment data
with open(file_path, 'r') as f:
    post_list = json.load(f)
    
# Print the number of comments before any formatting
print(f'Amount of comments before any formatting: {len(post_list)}')

# Convert the JSON data into a pandas DataFrame
df = pd.json_normalize(post_list)

if demo:
    # Set the path for the original data directory
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv\version_demo'
    # Save the DataFrame as a CSV file in the specified directory
    df.to_csv(os.path.join(path_original_data, 'comment_data_demo.csv'), index=False)
    rint('Using demo...')
else:
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv'
    df.to_csv(os.path.join(path_original_data, 'comment_data.csv'), index=False)

# Display the first row of the DataFrame
df.head(1)

Amount of comments before any formatting: 72651


Unnamed: 0,id,title,user,comment,rating
0,390478,Gloomhaven: Second Edition,amusedleg,Day 1 of the GH 2.0 campaign. Can't wait.,


# CLEANING

## Reading raw data

In [60]:
import pandas as pd
import numpy as np
import json
import os

if demo:
    # Set the path for the original data directory
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv\version_demo'
    # Read the CSV file into a DataFrame
    df = pd.read_csv(os.path.join(path_original_data, 'comment_data_demo.csv'), low_memory=False)
    rint('Using demo...')
else:
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv'
    df = pd.read_csv(os.path.join(path_original_data, 'comment_data.csv'), low_memory=False)

print('[*]', len(df))    
df.head(1)

72651


Unnamed: 0,id,title,user,comment,rating
0,390478,Gloomhaven: Second Edition,amusedleg,Day 1 of the GH 2.0 campaign. Can't wait.,


In [62]:
# Print some statistics of the 'comment' field

# Calculate the percentage of non-null comments
comment_percentage = round(df.comment.notnull().mean() * 100, 2)
print(str(comment_percentage) + '%')

# Calculate the maximum, minimum, and mean length of comments
max_length = df.comment.str.len().max()
min_length = df.comment.str.len().min()
mean_length = df.comment.str.len().mean()
print(max_length)
print(min_length)
print(mean_length)
print()

# Search the number of comments containing searched words within the text of the message
pattern = "random"

# Count comments containing the search pattern
contains_pattern = df.comment.str.contains(pattern, na=False).sum()

# Count comments starting with the search pattern
starts_with_pattern = df.comment.str.startswith(pattern, na=False).sum()

# Count comments exactly matching the search pattern
exact_match_pattern = df.comment.str.fullmatch(pattern, na=False).sum()

print(contains_pattern)
print(starts_with_pattern)
print(exact_match_pattern)

# Filter out rows with null comments and reset the index
df = df[df.comment.notnull()]
df.reset_index(drop=True, inplace=True)

# Print the first messages that contain the pattern
matching_comments = df.loc[df.comment.str.contains(pattern, na=False), 'comment']
print(matching_comments)

print('[*]', len(df))    

100.0%
18588
1
199.270062641977

1326
1
0
102      After just one play I get the feeling I have s...
146      I really don’t get the hype around Heat, but t...
183      An okay "deck management” game with a relatabl...
326      Update: After some more plays I feel that some...
435      [imageID=6940449small inline] This really feel...
                               ...                        
72410    The character progression system is very inter...
72507    These are thoughts from a single play through ...
72515    This is pure a take that game! don't try to "p...
72566    Too long with 2P. Better with 3P, but the card...
72593    Review: Highly interactive, tactical, and stre...
Name: comment, Length: 1326, dtype: object
[*] 72635


## Filtering the data

In [63]:
from guess_language import guess_language
import enchant
import string
import re

# Function to check if a comment is in English
def is_english_batch(batch):
    # Create a batch of processed texts
    processed_texts = batch['comment'].str.lower().str.findall(r"[a-zA-Z0-9']+")

    # Create an English dictionary
    english_dictionary = enchant.Dict("en_US")

    # Check if any comment in the batch is in English
    is_english = processed_texts.apply(lambda text: sum(english_dictionary.check(word) for word in text) >= len(text) / 2)

    # Return a boolean Series indicating if each comment is in English
    return is_english

In [69]:
from IPython.display import display, HTML
from alive_progress import alive_bar
from tqdm import tqdm
import pandas as pd
import time
import sys

# Batch processing
batch_size = 1000  # Number of rows to process in each batch
num_rows = len(df)
result = pd.Series([], dtype='float64')  # Store the results

# Calculate the number of batches
num_batches = (num_rows // batch_size) + 1

# Initialize a progress bar
with tqdm(total=num_batches, ncols=num_batches) as pbar:
    # Process each batch
    for i in range(0, num_rows, batch_size):
        # Extract a batch of rows from the DataFrame
        batch = df.iloc[i:i+batch_size]
        
        # Filter out non-English rows in the batch
        batch_english = batch.loc[is_english_batch(batch)]
        
        # Concatenate the English rows to the result
        result = pd.concat([result, batch_english])
        
        # Update the progress bar
        pbar.update(1)

# Reset the index of the resulting DataFrame
result.reset_index(drop=True, inplace=True)

# Print the updated DataFrame
result.head(5)

100%|████████████████████████████████████| 73/73 [03:37<00:00,  2.98s/it]


Unnamed: 0,0,id,title,user,comment,rating
0,,390478.0,Gloomhaven: Second Edition,amusedleg,Day 1 of the GH 2.0 campaign. Can't wait.,
1,,390478.0,Gloomhaven: Second Edition,bark,BGCJ made me do it. Stop making fake dungeon c...,1.0
2,,390478.0,Gloomhaven: Second Edition,Brefs,"Cant wait for It, thanks for the amazing job s...",10.0
3,,390478.0,Gloomhaven: Second Edition,Dali187,A perfect non greed driven game made even more...,10.0
4,,390478.0,Gloomhaven: Second Edition,DJ_Tsaladi_Tjatekok,"Nem vagyok egy Homályrév-rajongó, de ha már me...",


In [70]:
import pandas as pd

# Print the size of each database
print('Current database:', len(result))
print('Original database:', len(df))
print('Difference:', len(df)-len(result))

# Assuming you have two DataFrames: df1 and df2 representing the two databases
df1 = df
# print('Check.')
df2 = result
# print('Check.')

# Find rows with differing 'comment' in df1 compared to df2
diff_df1 = df1[~df1['comment'].isin(df2['comment'])]

# Find rows with differing 'comment' in df2 compared to df1
diff_df2 = df2[~df2['comment'].isin(df1['comment'])]

# Concatenate the differing rows into a single DataFrame
diff_combined = pd.concat([diff_df1, diff_df2])

# Reset the index of the resulting DataFrame
diff_combined.reset_index(drop=True, inplace=True)

# Print the differences
diff_combined.head(5)

Current database: 66267
Original database: 72635
Difference: 6368


Unnamed: 0,id,title,user,comment,rating,0
0,390478.0,Gloomhaven: Second Edition,Fuzzel,Unnötige Geldmache mit der erfolgreichen Marke.,1.0,
1,366013.0,Heat: Pedal to the Metal,a2greg,nyp,,
2,366013.0,Heat: Pedal to the Metal,Abri,56x87mm cards (330pcs),,
3,366013.0,Heat: Pedal to the Metal,alexbatbee,zatu,,
4,366013.0,Heat: Pedal to the Metal,ANDREWSOFT,Jugadas varias partidas en solitario con el mó...,8.2,


In [71]:
# Add a new column with the length of each comment
result['text_length'] = result['comment'].apply(lambda x: len(x))  

# Add a new column with the word count of each comment
result['word_count'] = result['comment'].apply(lambda x: len(x.split())) 

# Filter out rows with word count less than or equal to 5
print('Original database:', len(result))
result = result[result['word_count'] > 5]  

# Print the size of each database
print('Current database:', len(result))

# Drop the first column (assumed to be unnecessary)
result = result.drop(result.columns[0], axis=1)  

# Print the first 5 rows of the resulting DataFrame
result.head(5)  

# Save the pre-processed DataFrame to a CSV file
if demo:
    result.to_csv(os.path.join(path_original_data, 'pre_processed_comment_data_demo.csv'), index=False)
else:
    result.to_csv(os.path.join(path_original_data, 'pre_processed_comment_data.csv'), index=False)

Original database: 66267
Current database: 45247


# PREPROCESS

In [72]:
# Import necessary modules
import pandas as pd
import numpy as np
import json
import os

# Set the display option to show all columns in pandas DataFrame
pd.set_option('display.max_columns', None)

if demo:
    # Set the path for the original data directory
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv\version_demo'
    # Construct the file path to the CSV file
    csv_file_path = os.path.join(path_original_data, 'pre_processed_comment_data_demo.csv')
    print('Using demo...')
else:
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv'
    csv_file_path = os.path.join(path_original_data, 'pre_processed_comment_data.csv')

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path, low_memory=False)

## Punctuation Removal

In [73]:
import string

# Function to remove punctuation from a text
def remove_punctuation(text):
    # Create a set of allowed characters (letters, numbers, and space)
    allowed_chars = set(string.ascii_letters + string.digits + ' ')
    
    # Remove punctuation characters not in the allowed set
    processed_text = ''.join(char for char in text if char in allowed_chars)
    
    return processed_text

# Apply the remove_punctuation() function to the 'comment' column and store the result in a new column 'processed_comment'
df['processed_comment'] = df['comment'].apply(remove_punctuation)

# Lower case all the messages
df['processed_comment'] = df['processed_comment'].str.lower()

## Tokenization

In [74]:
import re

# Function to tokenize a text
def tokenization(text):
    # Split the text on spaces to create tokens
    tokens = text.split()
    
    return tokens

# Apply the tokenization() function to the 'processed_comment' column and store the result in a new column 'comment_tokenized'
df['comment_tokenized'] = df['processed_comment'].apply(lambda x: tokenization(x))

## Stopword Removal

In [75]:
import gensim
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
import nltk
from nltk.stem import WordNetLemmatizer, SnowballStemmer
from nltk.stem.porter import *
import numpy as np

# Download the required NLTK resources (uncomment if needed)
# nltk.download('wordnet')
# nltk.download('omw-1.4')

# Stop words present in the library
stopwords = nltk.corpus.stopwords.words('english')
print(stopwords[0:10])

# Defining the function to remove stopwords from tokenized text
def remove_stopwords(text):
    output= [i for i in text if i not in stopwords]
    return output

# Applying the function
df['comment_key_words']= df['comment_tokenized'].apply(lambda x:remove_stopwords(x))

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're"]


## Stemming

## Lemmatization

## Gensim preprocessing

In [76]:
import gensim
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
from nltk.stem import WordNetLemmatizer, SnowballStemmer
from nltk.stem.porter import *
import numpy as np
import nltk
import json

np.random.seed(400)

# Download the required NLTK resource (uncomment if needed)
# nltk.download('wordnet')

reference_sheet = {}  # Dictionary to store word reference sheet

stemmer = SnowballStemmer("english")

# Function to lemmatize and stem a word
def lemmatize_stemming(text):
    return stemmer.stem(WordNetLemmatizer().lemmatize(text, pos='v'))

# Tokenize, lemmatize, and filter stopwords
def preprocess(text):
    result = []
    for token in gensim.utils.simple_preprocess(text):
        if token not in STOPWORDS and len(token) > 2:
            word = lemmatize_stemming(token)
            if word in reference_sheet:
                if token not in reference_sheet[word]:
                    reference_sheet[word].append(token)
            else:
                reference_sheet[word] = [token]
            result.append(word)
    return result

# Tokenize, lemmatize, and filter verbs
def preprocess_verbs(text):
    text = gensim.utils.simple_preprocess(text)
    tagged_tokens = nltk.pos_tag(text)
    filtered_tokens = [token for token, pos_tag in tagged_tokens if pos_tag not in ['VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ']]
    
    result = []
    for token in filtered_tokens:
        if token not in STOPWORDS and len(token) > 2:
            result.append(lemmatize_stemming(token))
    return result

# print('Start.')
df['gensim_comment'] = df['comment'].apply(preprocess)
# print('Next.')
df['gensim_comment_verbs'] = df['comment'].apply(preprocess_verbs)
# print('Finish.')

# Save reference sheet as a JSON file
json_data = json.dumps(reference_sheet)
with open('reference_sheet.json', 'w') as file:
    file.write(json_data)

## Restructring the dataset

In [84]:
# At this point is necesary to check if the variable 'demo' has not being changed
print(demo)
#demo = False

False


In [86]:
# Get the list of column names
columns = list(df.columns)
print(columns, '\n')

print('Original database:', len(df))
# Filter the DataFrame based on the length of 'gensim_comment' column
df = df[df['gensim_comment'].map(lambda d: len(d)) >= 5]
df = df.reset_index(drop=True)

# Print the size of each database
print('Current database:', len(df))


# Calculate the average length of 'gensim_comment' column
average_length = df['gensim_comment'].apply(lambda x: len(x)).mean()
print(average_length)

if demo:
     # Set the path for the original data directory
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv\version_demo'
    # Save the DataFrame to a CSV file
    df.to_csv(os.path.join(path_original_data, 'post_processed_comment_data_demo.csv'), index=False)
    print('Using demo...')
else:
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv'
    df.to_csv(os.path.join(path_original_data, 'post_processed_comment_data.csv'), index=False)

# Display a sample of 5 rows from the DataFrame
df.sample(5)

['id', 'title', 'user', 'comment', 'rating', 'text_length', 'word_count', 'processed_comment', 'comment_tokenized', 'comment_key_words', 'gensim_comment', 'gensim_comment_verbs'] 

Original database: 40175
Current database: 40175
242.03462352209084


Unnamed: 0,id,title,user,comment,rating,text_length,word_count,processed_comment,comment_tokenized,comment_key_words,gensim_comment,gensim_comment_verbs
16698,295947.0,Cascadia,Urtho,I have a feeling this would fill the same nich...,6.0,171,37,i have a feeling this would fill the same nich...,"['i', 'have', 'a', 'feeling', 'this', 'would',...","['feeling', 'would', 'fill', 'niche', 'shelf',...","['feel', 'nich', 'shelf', 'park', 'amaz', 'lon...","['feel', 'nich', 'shelf', 'park', 'amaz', 'lon..."
1326,331106.0,The Witcher: Old World,ahazperutz,Figure set for Witcher fans (you can even play...,1.0,55,10,figure set for witcher fans you can even play ...,"['figure', 'set', 'for', 'witcher', 'fans', 'y...","['figure', 'set', 'witcher', 'fans', 'even', '...","['figur', 'set', 'witcher', 'fan', 'play', 'ga...","['figur', 'witcher', 'fan', 'game']"
312,366013.0,Heat: Pedal to the Metal,jonathangmeyer,"Having now played this, I can see where all th...",9.0,589,101,having now played this i can see where all the...,"['having', 'now', 'played', 'this', 'i', 'can'...","['played', 'see', 'hype', 'comes', 'solid', 'g...","['have', 'play', 'hype', 'come', 'solid', 'gam...","['hype', 'solid', 'game', 'easi', 'gear', 'pre..."
15797,295947.0,Cascadia,jjvvhh,Abstract 1. Family 5. 2022 Spiel des Jahres,,43,8,abstract 1 family 5 2022 spiel des jahres,"['abstract', '1', 'family', '5', '2022', 'spie...","['abstract', '1', 'family', '5', '2022', 'spie...","['abstract', 'famili', 'spiel', 'des', 'jahr']","['abstract', 'famili', 'spiel', 'des', 'jahr']"
2905,342942.0,Ark Nova,mil05006,"This game is a ton of fun, though the endgame ...",9.0,189,38,this game is a ton of fun though the endgame c...,"['this', 'game', 'is', 'a', 'ton', 'of', 'fun'...","['game', 'ton', 'fun', 'though', 'endgame', 'c...","['game', 'ton', 'fun', 'endgam', 'come', 'pret...","['game', 'ton', 'fun', 'endgam', 'pretti', 'ab..."


In [87]:
import pandas as pd
import numpy as np
import json
import os

pd.set_option('display.max_columns', None)

# Set the maximum number of columns to display
pd.set_option('display.max_columns', None)

if demo:
    # Set the path for the original data directory
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv\version_demo'
    # Read the CSV file into a DataFrame
    df = pd.read_csv(os.path.join(path_original_data, 'post_processed_comment_data_demo.csv'), low_memory=False)
    print('Using demo...')
else:
    path_original_data = r'C:\Users\Usuario\Documents\JupyterFolder\unimi_files\IR\files_csv'
    df = pd.read_csv(os.path.join(path_original_data, 'post_processed_comment_data.csv'), low_memory=False)

# Display a sample of 10 rows from the DataFrame
df.sample(5)

Unnamed: 0,id,title,user,comment,rating,text_length,word_count,processed_comment,comment_tokenized,comment_key_words,gensim_comment,gensim_comment_verbs
15866,295947.0,Cascadia,Kankui,"Purchased from GameNerdz, Nerdz Day sale - $22.",8.0,47,8,purchased from gamenerdz nerdz day sale 22,"['purchased', 'from', 'gamenerdz', 'nerdz', 'd...","['purchased', 'gamenerdz', 'nerdz', 'day', 'sa...","['purchas', 'gamenerdz', 'nerdz', 'day', 'sale']","['gamenerdz', 'nerdz', 'day', 'sale']"
39243,192135.0,Too Many Bones,fiatkid55,"Quite a unique game. Tense, building difficult...",9.0,221,38,quite a unique game tense building difficulty ...,"['quite', 'a', 'unique', 'game', 'tense', 'bui...","['quite', 'unique', 'game', 'tense', 'building...","['uniqu', 'game', 'tens', 'build', 'difficulti...","['uniqu', 'game', 'tens', 'difficulti', 'tyran..."
21533,169786.0,Scythe,PolterGhost,"This is a miniature 4X game, basically reducin...",7.0,612,103,this is a miniature 4x game basically reducing...,"['this', 'is', 'a', 'miniature', '4x', 'game',...","['miniature', '4x', 'game', 'basically', 'redu...","['miniatur', 'game', 'basic', 'reduc', 'explor...","['miniatur', 'game', 'basic', 'explor', 'event..."
29035,255984.0,Sleeping Gods,Stonebeard,It can't be a 10 because I will eventually hav...,9.5,371,71,it cant be a 10 because i will eventually have...,"['it', 'cant', 'be', 'a', '10', 'because', 'i'...","['cant', '10', 'eventually', 'everywhere', 'fi...","['eventu', 'game', 'teach', 'readi', 'second',...","['eventu', 'game', 'readi', 'second', 'game', ..."
14203,205637.0,Arkham Horror: The Card Game,the_horror,This quickly became one our top 3 favorite co-...,9.0,110,20,this quickly became one our top 3 favorite coo...,"['this', 'quickly', 'became', 'one', 'our', 't...","['quickly', 'became', 'one', 'top', '3', 'favo...","['quick', 'favorit', 'game', 'fantast', 'desig...","['quick', 'favorit', 'game', 'fantast', 'desig..."


In [88]:
print(len(df))
print()

# Check the number of posts that contain specific words
print(len(df[df.comment.str.contains('luck')]))
print(len(df[df.comment.str.contains('random')]))
print(len(df[df.comment.str.contains('boring')]))
print(len(df[df.comment.str.contains('complex')]))
print(len(df[df.comment.str.contains('complicated')]))
print(len(df[df.comment.str.contains('bookkeeping')]))
print()
print(len(df[df.comment.str.contains('edition')]))
print(len(df[df.comment.str.contains('version')]))
print(len(df[df.comment.str.contains('expansion')]))

# Display a sample of 5 rows from the DataFrame that contain the word 'boring'
df[df.comment.str.contains('boring')].sample(5)

40175

1453
1305
616
1528
437
41

569
1050
3111


Unnamed: 0,id,title,user,comment,rating,text_length,word_count,processed_comment,comment_tokenized,comment_key_words,gensim_comment,gensim_comment_verbs
28508,255984.0,Sleeping Gods,EdwardZ,I pretty much love everything I have from Red ...,4.0,597,107,i pretty much love everything i have from red ...,"['i', 'pretty', 'much', 'love', 'everything', ...","['pretty', 'much', 'love', 'everything', 'red'...","['pretti', 'love', 'red', 'raven', 'game', 're...","['pretti', 'love', 'red', 'raven', 'game', 'pe..."
25453,285774.0,Marvel Champions: The Card Game,rolfisrolf,FFG is running out of ideas. They've taken ele...,2.0,171,31,ffg is running out of ideas theyve taken eleme...,"['ffg', 'is', 'running', 'out', 'of', 'ideas',...","['ffg', 'running', 'ideas', 'theyve', 'taken',...","['ffg', 'run', 'idea', 'take', 'element', 'pre...","['ffg', 'idea', 'element', 'previous', 'lcgs',..."
37014,291457.0,Gloomhaven: Jaws of the Lion,fenwayfrank,This game is a revelation. This is my first ti...,10.0,522,89,this game is a revelation this is my first tim...,"['this', 'game', 'is', 'a', 'revelation', 'thi...","['game', 'revelation', 'first', 'time', 'combi...","['game', 'revel', 'time', 'combin', 'charact',...","['game', 'revel', 'time', 'charact', 'build', ..."
22706,169786.0,Scythe,tbpinter,Ugh. Dry and boring. Sold. Sigh...Giving it a...,8.0,113,21,ugh dry and boring sold sighgiving it a secon...,"['ugh', 'dry', 'and', 'boring', 'sold', 'sighg...","['ugh', 'dry', 'boring', 'sold', 'sighgiving',...","['ugh', 'dri', 'bore', 'sell', 'sigh', 'give',...","['ugh', 'dri', 'bore', 'sigh', 'second', 'tri'..."
17466,169786.0,Scythe,Bjorne,Not a good euro. Not a good direct conflict ga...,5.0,83,16,not a good euro not a good direct conflict gam...,"['not', 'a', 'good', 'euro', 'not', 'a', 'good...","['good', 'euro', 'good', 'direct', 'conflict',...","['good', 'euro', 'good', 'direct', 'conflict',...","['good', 'euro', 'good', 'direct', 'conflict',..."
