# Coletar Informações Gerais dos Jogos Steam

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import time

import requests
import json
import html

from dbgpu import GPUDatabase


In [2]:
all_ids = []
for year in range(2019, 2025):
    ids_year = pd.read_csv(f"data/most_played_{year}.csv").to_dict('records')
    all_ids.append(ids_year)


id_all_games = set()
for ids_year in all_ids:
    for id in ids_year:
        id_all_games.add((id['id']))

id_all_games = list(id_all_games)
print(id_all_games)

[629760, 427520, 1551360, 582660, 1716740, 264710, 231430, 1145350, 2215430, 230410, 10, 1366540, 1293830, 1506830, 1222670, 1145360, 1203220, 250900, 252950, 526870, 1149460, 1222680, 1328670, 578080, 239140, 1217060, 550, 2670630, 632360, 892970, 1196590, 678960, 374320, 552500, 1282100, 346110, 201270, 108600, 1501750, 570, 601150, 2321470, 1128000, 280640, 848450, 440900, 1313860, 552520, 242760, 252490, 1222730, 779340, 2344520, 755790, 548430, 1607250, 2399830, 960090, 2050650, 1483870, 1816670, 648800, 1919590, 703080, 1250410, 1973530, 812140, 1623660, 692850, 244850, 1184370, 671860, 2881650, 3070070, 394360, 1794680, 359550, 1818750, 105600, 1966720, 2322560, 1790600, 1363080, 594570, 1172620, 1325200, 920210, 1845910, 1151640, 841370, 218620, 1593500, 606880, 1174180, 2679460, 1158310, 1282730, 1938090, 1286830, 2507950, 492720, 1337520, 251570, 1623730, 2767030, 1030840, 700600, 444090, 858810, 899770, 292030, 2567870, 1832640, 2479810, 221380, 858820, 2429640, 997070, 8383

In [3]:
info_games = []
for id in id_all_games:
    id = str(id)
    game_info_url = f"https://store.steampowered.com/api/appdetails?"
    params = {
        'appids': id,
        'cc': 'us',
        'l': 'english',
        'success': '1'
    }
        
    request = requests.get(game_info_url, params=params, timeout=10)
    time.sleep(1)
    
    game_data = json.loads(request.text)
    
    if game_data:
        try:
            data = game_data[id]['data']
            # coletando: nome, data de lançamento, idade mínima, se é gratuito, tipo da moeda e preço.
            # caso queira coletar mais informações, atribuir variáveis abaixo.
            name = data['name']
            print(name)

            release_date = data['release_date']['date']
            
            genres_list = data['genres']
            genres = ', '.join([g['description'] for g in genres_list]) if genres_list else None
            
            try:
                requirements = data['pc_requirements']['minimum']
                decoded_html = requirements.encode().decode('unicode_escape')
                decoded_html = html.unescape(decoded_html)
                soup = BeautifulSoup(decoded_html, "html.parser")
                
                graphic_list = soup.find_all('li')
                for item in graphic_list:
                    if ':' in item.get_text():
                        key, value = item.get_text().split(":", 1)
                        if key.strip().lower() == 'graphics':         
                            minimum = value.strip()
                        
            except Exception:
                minimum = None
                
            # Se existir preco, atribua a variável preço
            # Caso contrário, atrivua a variável preço None  
            try:
                preco = float(data['price_overview']['initial']) / 100
            except Exception:
                preco = 0
                
            if name in ["Horizon Zero Dawn™ Complete Edition", "Grand Theft Auto V Legacy", "Forza Horizon 4", "The Sims™ 4", "Marvel's Avengers - The Definitive Edition"] or "FIFA" in name or "NBA" in name:
                preco = 50

            # Criando dicionário do jogo atual e adicionando na lista
            info_games.append({
                'id' : id, 
                'game' : name, 
                'release_date' : release_date, 
                'price_USD' : preco,
                'genres' : genres,
                'pc_requirements' : minimum
                })
        except Exception:
            print(f"Erro ao processar ID {id}.")
            
df = pd.DataFrame(info_games)
df.to_csv("data/all_games_info.csv", index=False, encoding="utf-8")
print("ARQUIVO all_games_info.csv CRIADO.")

MORDHAU
Factorio
Forza Horizon 5
Black Desert
Starfield
Subnautica
Company of Heroes 2
Hades II
Ghost of Tsushima DIRECTOR'S CUT
Warframe
Counter-Strike
Dyson Sphere Program
Forza Horizon 4
FIFA 22
The Sims™ 4
Hades
NARAKA: BLADEPOINT
The Binding of Isaac: Rebirth
Rocket League®
Satisfactory
ICARUS
Need for Speed™ Heat
Mass Effect™ Legendary Edition
PUBG: BATTLEGROUNDS
Dying Light
Gunfire Reborn
Left 4 Dead 2
Supermarket Simulator
Risk of Rain 2
Valheim
Resident Evil Village
CODE VEIN
DARK SOULS™ III
Warhammer: Vermintide 2
REMNANT II®
ARK: Survival Evolved
Total War: SHOGUN 2
Project Zomboid
Lords of the Fallen
Dota 2
Devil May Cry 5
Deep Rock Galactic: Survivor
Cube World
Dark Shadows - Army of Evil
Subnautica: Below Zero
Conan Exiles
EA SPORTS™ FIFA 21
Far Cry® 5
The Forest
Rust
STAR WARS™: Squadrons
Total War: THREE KINGDOMS
Diablo® IV
Ring of Elysium
Deep Rock Galactic
MY HERO ULTRA RUMBLE
ARK: Survival Ascended
Bloons TD 6
Resident Evil 4
Draw & Guess
GUNDAM EVOLUTION
Raft
NBA 2K