In [4]:
import requests
import polars as pl
import os
import time
from urllib.parse import quote

In [5]:
def get_steam_reviews(app_id, cursor='*'):
    url = f"https://store.steampowered.com/appreviews/{app_id}?json=1&filter=recent&language=english&cursor={cursor}&num_per_page=100"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if data['success'] == 1:
            return data
        else:
            print(f"Failed to get valid reviews for app ID {app_id}.")
            print(data)
            return data
    else:
        print(f"Failed to get reviews for app ID {app_id}. Status code: {response.status_code}")
        return None

def fetch_all_reviews(app_id):
    cursor = '*'
    all_reviews = []
    
    while True:
        data = get_steam_reviews(app_id, cursor)
        if data is None or data['query_summary']['num_reviews'] == 0:
            break
        
        # Collect reviews
        reviews = data['reviews']
        all_reviews.extend(reviews)
        
        # Update cursor
        cursor = quote(data['cursor'])
    
    # Create a DataFrame from reviews
    reviews_df = pl.DataFrame(all_reviews)
    num_entries = len(reviews_df)

    # Specify the directory and file name for the parquet file
    directory = "data\\parquets"
    file_name = f"{app_id}_reviews_{num_entries}.parquet"
    file_path = os.path.join(directory, file_name)
    
    # Save to parquet
    reviews_df.write_parquet(file_path)

    return num_entries

In [6]:
# start_time = time.time()
# num_entries = fetch_all_reviews(553850)
# elapsed_time = time.time() - start_time

# print(f"Die Ausführungszeit betrug {elapsed_time:.2f} Sekunden.")
# print(f"Anzahl Einträge:{num_entries}")

In [21]:
topseller = [x for x in list(pl.read_json('data/SteamTopSellers.json'))[0]]
topseller 

['730',
 '1145350',
 '1604030',
 '281990',
 '1840080',
 '553850',
 '2195250',
 '2215430',
 '1158310',
 '1363080',
 '2479810',
 '236390',
 '1172470',
 '1145360',
 '1599340',
 '236850',
 '1151340',
 '983870',
 '1086940',
 '394360',
 '374320',
 '813230',
 '1142710',
 '570',
 '1222670',
 '1812450',
 '381210',
 '2881650',
 '1677280',
 '306130',
 '1085660',
 '1245620',
 '529340',
 '39210',
 '1177980',
 '377160',
 '805550',
 '230410',
 '1774580',
 '1449850',
 '1669000',
 '1954200',
 '2670630',
 '1962663',
 '582660',
 '255710',
 '227300',
 '294100',
 '1172620',
 '2519060',
 '1248130',
 '570940',
 '1794680',
 '2426960',
 '359550',
 '578080',
 '271590',
 '1203620',
 '1517290',
 '1426210',
 '1250410',
 '435150',
 '552990',
 '1476970',
 '1284190',
 '1222700',
 '1091500',
 '870780',
 '252490',
 '2381740',
 '1326470',
 '413150',
 '2418520',
 '2399830',
 '1018830',
 '238960',
 '427410',
 '1172380',
 '761890',
 '756800',
 '1284210',
 '1407200',
 '1328670',
 '1693980',
 '1237950',
 '1887840',
 '2362300

In [8]:
folder_path = 'data\\parquets'
downloaded = []
for filename in os.listdir(folder_path):
    if filename.endswith('.parquet'):
        game = filename.split('_')[0]
        downloaded.append(game)
downloaded

['10', '1245620', '2066020', '377160', '553850']

In [23]:
for game in topseller:
    if game in downloaded:
        continue
    start_time = time.time()
    num_entries = fetch_all_reviews(553850)  # ID ist konstant, da sie nicht variiert
    elapsed_time = time.time() - start_time

    # Umrechnung der Dauer von Sekunden in Stunden, Minuten und Sekunden
    hours, remainder = divmod(elapsed_time, 3600)
    minutes, seconds = divmod(remainder, 60)

    print(f"""AppID: {game}
    Dauer: {int(hours)} Stunden, {int(minutes)} Minuten, {seconds:.2f} Sekunden
    Anzahl: {num_entries}
    ####################""")