# Análise de Jogos Populares da Steam
---

## Importações

In [17]:
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time

import requests
import json

import re
import unicodedata
from howlongtobeatpy import HowLongToBeat



---

## Coletando Dados e Armazenando em Arquivo CSV

In [18]:
# Configurações da janela do Selenium
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")

### Coletando IDs dos Jogos

In [19]:
year = 2019
all_ids = []

# inicializar navegador
driver = webdriver.Chrome(options=options)

while year <= 2024:
    # os links de 2019, 2020 e 2021+ são diferentes, então:
    if year == 2019:
        game_id_url = f"https://store.steampowered.com/sale/{year}_most_played/"
    elif year == 2020:
        game_id_url = f"https://store.steampowered.com/sale/BestOf{year}?tab=1"
    else:
        game_id_url = f"https://store.steampowered.com/sale/BestOf{year}?tab=3"
        
    driver.get(game_id_url)

    time.sleep(5)

    # Encontrando IDs
    soup = BeautifulSoup(driver.page_source, "html.parser")

    a_tags = soup.find_all("a", class_="Focusable")
    a_tags = a_tags[:50]

    ids_year = []        
    for tag in a_tags:
        # 35 chars antes do id
        id = ''
        for num in (tag['href'][35:]):
            if num == "/":
                ids_year.append({'id' : id})
                break
            id += num
            
    all_ids.append(ids_year)
    year += 1
    
# liberando memória
driver.quit()

### Armazenando IDs dos jogos de cada ano em arquivos csv

In [20]:
for i in range(len(all_ids)):
    year = 2019 + i
    df = pd.DataFrame(all_ids[i])
    df.to_csv(f'data/most_played_{year}', index=False)
    print(f"ARQUIVO most_played_{year}.csv CRIADO.")
    

ARQUIVO most_played_2019.csv CRIADO.
ARQUIVO most_played_2020.csv CRIADO.
ARQUIVO most_played_2021.csv CRIADO.
ARQUIVO most_played_2022.csv CRIADO.
ARQUIVO most_played_2023.csv CRIADO.
ARQUIVO most_played_2024.csv CRIADO.


### Montando Lista com todos os jogos

In [21]:
id_all_games = set()
for ids_year in all_ids:
    for id in ids_year:
        id_all_games.add(id['id'])

### Coletando Informações e Armazenando em Arquivo csv

In [22]:
info_games = []
for id in id_all_games:
    game_info_url = f"https://store.steampowered.com/api/appdetails?appids={id}&cc=us"
        
    request = requests.get(game_info_url, timeout=10)
    time.sleep(1)
    
    game_data = json.loads(request.text)
    
    if (game_data) and (id in game_data) and (game_data[id]['success']):
        try:
            data = game_data[id]['data']
            
            # coletando: nome, data de lançamento, idade mínima, se é gratuito, tipo da moeda e preço.
            # caso queira coletar mais informações, atribuir variáveis abaixo.
            name = data['name']
            release_date = data['release_date']['date']
            min_age = data['required_age']
            
            genres_list = data['genres']
            genres = ', '.join([g['description'] for g in genres_list]) if genres_list else None
            
            # Se existir preco, atribua a variável preço
            # Caso contrário, atrivua a variável preço None  
            try:
                preco = float(data['price_overview']['initial']) / 100
            except Exception:
                preco = 0

            # Criando dicionário do jogo atual e adicionando na lista
            info_games.append({
                'id' : id, 
                'game' : name, 
                'release_date' : release_date, 
                'price_USD' : preco,
                'genres' : genres,
                'min_age' : min_age,
                })
        except Exception:
            print(f"Erro ao processar ID {id}.")
            
df = pd.DataFrame(info_games)
df.to_csv("data/all_games_info", index=False, encoding="utf-8")
print("ARQUIVO all_games_info.csv CRIADO.")
            
    

ARQUIVO all_games_info.csv CRIADO.


#### Coletando Análises dos Jogos

In [23]:
df = pd.read_csv("data/all_games_info")

reviews_all_games = []
for id in df['id']:
    params = {
    "json": 1,
    "language": "english",
    }
    resp = requests.get(f"https://store.steampowered.com/appreviews/{id}", params=params)
    data = resp.json()
    summary = data['query_summary']
    
    reviews_all_games.append({
        'id' : id,
        'score' : summary['review_score'],
        'total_reviews' : summary['total_reviews'],
        'total_positive' : summary['total_positive'],
        'total_negative' : summary['total_negative']
    })
    
df = pd.DataFrame(reviews_all_games)
df.to_csv('data/all_games_review', index=False)

#### Coletando Tempo de jogo médio

In [60]:
df_game = pd.read_csv('data/all_games_info')


all_games_time = []
for id, game_name in zip(df_game['id'], df_game['game']):
    
    main_story = None
    completionist = None
    
    try:    
        game_search = HowLongToBeat().search(game_name, similarity_case_sensitive=False)
        
        if not game_search:
            # Normalizando o nome
            clean_game_name = unicodedata.normalize("NFKD", game_name)
            clean_game_name = clean_game_name.encode("ascii", "ignore").decode("utf-8")
            clean_game_name = re.sub(r"[^\w\s]", "", game_name)
            
            game_search = HowLongToBeat().search(clean_game_name, similarity_case_sensitive=False)
            
            if not game_search:
                clean_game_name = clean_game_name.split(" ")
                clean_game_name = " ".join(clean_game_name[:3])
                game_search = HowLongToBeat().search(clean_game_name, similarity_case_sensitive=False)

        entry = max(game_search, key=lambda x: x.similarity)
        
        main_story = entry.main_story
        completionist = entry.completionist
        
    except Exception as error:
        print("---------------------------------------------------------------|")
        print(f"Erro: {error}")
        print(f"Pesquisa mal sucedida para {game_name}.")
    
    all_games_time.append({
        "id" : id,
        "main_story" : main_story,
        "completionist" : completionist
        })
        
df_time = pd.DataFrame(all_games_time)
df_time.to_csv("data/all_games_time", index=False)

---------------------------------------------------------------|
Erro: max() iterable argument is empty
Pesquisa mal sucedida para 鬼谷八荒 Tale of Immortal.
---------------------------------------------------------------|
Erro: max() iterable argument is empty
Pesquisa mal sucedida para EA SPORTS™ FIFA 23.
---------------------------------------------------------------|
Erro: max() iterable argument is empty
Pesquisa mal sucedida para Wallpaper Engine.


### Visualizando/Manipulando Dados Coletados

In [57]:
# etc

159
