In [1]:
import dask.dataframe as dd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm.auto import tqdm

# Set up logging
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Load all parquet files with Dask
logger.info("Loading parquet files...")
df = dd.read_parquet("length_check_parquet/*.parquet")
df = df[df['review_language'] == 'english']
# Check the shape before sampling
total_rows = df.map_partitions(len).compute()
total_rows = total_rows.sum()
logger.info(f"Total rows before sampling: {total_rows}")

# Sample 10% of the data and convert to pandas
# Using random_state for reproducibility
logger.info("Sampling data...")
df_sample = df.sample(frac=0.1, random_state=42).compute()

logger.info(f"Sample shape: {df_sample.shape}")
df_sample.head()

  from .autonotebook import tqdm as notebook_tqdm
INFO:__main__:Loading parquet files...
INFO:__main__:Total rows before sampling: 575391
INFO:__main__:Sampling data...
INFO:__main__:Sample shape: (57538, 29)


Unnamed: 0,name,steam_appid,required_age,is_free,controller_support,detailed_description,about_the_game,short_description,price_overview,metacritic_score,...,author_num_reviews,author_playtime_forever,author_play_time_last_two_weeks,author_playtime_at_review,author_last_played,review,voted_up,votes_up,votes_funny,weighted_vote_score
27065,Hollow Knight,367520,0,False,full,"<h2 class=""bb_tag"">Hollow Knight Expands with ...","<h2 class=""bb_tag"">Hollow Knight Expands with ...",Forge your own path in Hollow Knight! An epic ...,$14.99,87,...,1,4903.0,0.0,1977.0,1724703000.0,"I'm normally not a big metroidvannia fan, howe...",True,0,0,0.5
92223,Hollow Knight,367520,0,False,full,"<h2 class=""bb_tag"">Hollow Knight Expands with ...","<h2 class=""bb_tag"">Hollow Knight Expands with ...",Forge your own path in Hollow Knight! An epic ...,$14.99,87,...,5,2380.0,0.0,2379.0,1516734000.0,"I won't repeat common remarks. In my eyes, wh...",True,0,0,0.5
87621,Hollow Knight,367520,0,False,full,"<h2 class=""bb_tag"">Hollow Knight Expands with ...","<h2 class=""bb_tag"">Hollow Knight Expands with ...",Forge your own path in Hollow Knight! An epic ...,$14.99,87,...,34,5091.0,0.0,2576.0,1685726000.0,"Gorgeous graphics. Amazing music. Challenging,...",True,0,0,0.5
17858,Hollow Knight,367520,0,False,full,"<h2 class=""bb_tag"">Hollow Knight Expands with ...","<h2 class=""bb_tag"">Hollow Knight Expands with ...",Forge your own path in Hollow Knight! An epic ...,$14.99,87,...,29,3263.0,0.0,3263.0,1581268000.0,Decent platformer. Great combat. Fun explorati...,True,0,0,0.5
87719,Hollow Knight,367520,0,False,full,"<h2 class=""bb_tag"">Hollow Knight Expands with ...","<h2 class=""bb_tag"">Hollow Knight Expands with ...",Forge your own path in Hollow Knight! An epic ...,$14.99,87,...,8,2051.0,0.0,1913.0,1646817000.0,A very fun metroidvania type game. 40+ hours o...,True,0,0,0.5
