# Análise de Jogos Populares da Steam
---

## Importações

In [2]:
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time

import requests
import json
import html

import re
import unicodedata
from howlongtobeatpy import HowLongToBeat


import matplotlib.pyplot as plt



---

## Coletando Dados e Armazenando em Arquivo CSV

In [144]:
# Configurações da janela do Selenium
options = Options()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")

### Coletando IDs dos Jogos

In [145]:
year = 2019
all_ids = []

# inicializar navegador
driver = webdriver.Chrome(options=options)

while year <= 2024:
    # os links de 2019, 2020 e 2021+ são diferentes, então:
    if year == 2019:
        game_id_url = f"https://store.steampowered.com/sale/{year}_most_played/"
    elif year == 2020:
        game_id_url = f"https://store.steampowered.com/sale/BestOf{year}?tab=1"
    else:
        game_id_url = f"https://store.steampowered.com/sale/BestOf{year}?tab=3"
        
    driver.get(game_id_url)

    time.sleep(5)

    # Encontrando IDs
    soup = BeautifulSoup(driver.page_source, "html.parser")

    a_tags = soup.find_all("a", class_="Focusable")
    a_tags = a_tags[:50]

    ids_year = []        
    for tag in a_tags:
        # 35 chars antes do id
        id = ''
        for num in (tag['href'][35:]):
            if num == "/":
                ids_year.append({"id": id})
                break
            id += num
            
    all_ids.append(ids_year)
    year += 1
    
# liberando memória
driver.quit()

### Armazenando IDs dos jogos de cada ano em arquivos csv

In [146]:
for i in range(len(all_ids)):
    year = 2019 + i
    df = pd.DataFrame(all_ids[i])
    df.to_csv(f'data/most_played_{year}.csv', index=False)
    print(f"ARQUIVO most_played_{year}.csv CRIADO.")

ARQUIVO most_played_2019.csv CRIADO.
ARQUIVO most_played_2020.csv CRIADO.
ARQUIVO most_played_2021.csv CRIADO.
ARQUIVO most_played_2022.csv CRIADO.
ARQUIVO most_played_2023.csv CRIADO.
ARQUIVO most_played_2024.csv CRIADO.


### Montando Lista com todos os jogos

In [147]:
id_all_games = set()
for ids_year in all_ids:
    for id in ids_year:
        id_all_games.add(id['id'])

### Coletando Informações e Armazenando em Arquivo csv

In [148]:
info_games = []
for id in id_all_games:
    game_info_url = f"https://store.steampowered.com/api/appdetails?"
    params = {
        'appids': id,
        'cc': 'us',
        'l': 'english',
        'success': '1'
    }
        
    request = requests.get(game_info_url, params=params, timeout=10)
    time.sleep(1)
    
    game_data = json.loads(request.text)
    
    if game_data:
        try:
            data = game_data[id]['data']
            
            # coletando: nome, data de lançamento, idade mínima, se é gratuito, tipo da moeda e preço.
            # caso queira coletar mais informações, atribuir variáveis abaixo.
            name = data['name']
            print(name)

            release_date = data['release_date']['date']
            
            genres_list = data['genres']
            genres = ', '.join([g['description'] for g in genres_list]) if genres_list else None
            
            try:
                requirements = data['pc_requirements']['minimum']
                decoded_html = requirements.encode().decode('unicode_escape')
                decoded_html = html.unescape(decoded_html)
                soup = BeautifulSoup(decoded_html, "html.parser")
                
                graphic_list = soup.find_all('li')
                for item in graphic_list:
                    if ':' in item.get_text():
                        key, value = item.get_text().split(":", 1)
                        if key.strip().lower() == 'graphics':         
                            minimum = value.strip()
                        
            except Exception:
                minimum = None
                
            # Se existir preco, atribua a variável preço
            # Caso contrário, atrivua a variável preço None  
            try:
                preco = float(data['price_overview']['initial']) / 100
            except Exception:
                preco = 0

            # Criando dicionário do jogo atual e adicionando na lista
            info_games.append({
                'id' : id, 
                'game' : name, 
                'release_date' : release_date, 
                'price_USD' : preco,
                'genres' : genres,
                'pc_requirements' : minimum
                })
        except Exception:
            print(f"Erro ao processar ID {id}.")
            
df = pd.DataFrame(info_games)
df.to_csv("data/all_games_info.csv", index=False, encoding="utf-8")
print("ARQUIVO all_games_info.csv CRIADO.")
            
    

ARK: Survival Evolved
Call of Duty®: Black Ops 6
MultiVersus
Farming Simulator 19
Life is Strange 2
Delta Force
Grand Theft Auto V Legacy
Borderlands Game of the Year Enhanced
V Rising
Don't Starve Together
Total War: THREE KINGDOMS
BattleBit Remastered
Vampire Survivors
Path of Exile 2
The Forest
Tom Clancy's Rainbow Six® Siege X
Age of Empires IV: Anniversary Edition
ARMORED CORE™ VI FIRES OF RUBICON™
Cookie Clicker
Cube World
Hunt: Showdown 1896
Counter-Strike 2
Crusader Kings III
Dead by Daylight
Resident Evil 3
Dragon's Dogma 2
HELLDIVERS™ 2
Team Fortress 2
Football Manager 2024
DOOM Eternal
Goose Goose Duck
DRAGON BALL: Sparking! ZERO
Palworld
Once Human
Red Dead Redemption 2
EA SPORTS FC™ 24
Remnant: From the Ashes
Sid Meier's Civilization® V
Lost Ark
The Witcher 3: Wild Hunt
Battlefield™ V
Marvel Rivals
PAYDAY 2
Total War: SHOGUN 2
Mount & Blade II: Bannerlord
The Sims™ 4
DayZ
Sekiro™: Shadows Die Twice - GOTY Edition
Warhammer: Vermintide 2
Destiny 2
REMNANT II®
Eternal Return

#### Coletando Análises dos Jogos

In [149]:
df = pd.read_csv("data/all_games_info.csv")

reviews_all_games = []
for id in df['id']:
    params = {
    "json": 1,
    "language": "english",
    }
    resp = requests.get(f"https://store.steampowered.com/appreviews/{id}", params=params)
    data = resp.json()
    summary = data['query_summary']
    
    reviews_all_games.append({
        'id' : id,
        'score' : summary['review_score'],
        'total_reviews' : summary['total_reviews'],
        'total_positive' : summary['total_positive'],
        'total_negative' : summary['total_negative']
    })
    
df = pd.DataFrame(reviews_all_games)
df.to_csv('data/all_games_review.csv', index=False)

#### Coletando Tempo de jogo médio

In [150]:
df_game = pd.read_csv('data/all_games_info.csv')
all_games_time = []
for id, game_name in zip(df_game['id'], df_game['game']):
    
    main_story = None
    completionist = None
    
    try:
        if game_name == "鬼谷八荒 Tale of Immortal":
            game_name = "Tale of Immortal"
        elif game_name == "EA SPORTS™ FIFA 23":
            game_name = "FIFA 23"
        elif game_name == "暖雪 Warm Snow":
            game_name = "Warm Snow"
        
        game_search = HowLongToBeat().search(game_name, similarity_case_sensitive=False)
        
        if not game_search:
            # Normalizando o nome
            clean_game_name = unicodedata.normalize("NFKD", game_name)
            clean_game_name = clean_game_name.encode("ascii", "ignore").decode("utf-8")
            clean_game_name = re.sub(r"[^\w\s]", "", game_name)
            
            game_search = HowLongToBeat().search(clean_game_name, similarity_case_sensitive=False)
            
            if not game_search:
                clean_game_name = clean_game_name.split(" ")
                clean_game_name = " ".join(clean_game_name[:3])
                game_search = HowLongToBeat().search(clean_game_name, similarity_case_sensitive=False)

        entry = max(game_search, key=lambda x: x.similarity)
        
        main_story = entry.main_story
        completionist = entry.completionist
        
    except Exception as error:
        print("---------------------------------------------------------------|")
        print(f"Erro: {error}")
        print(f"Pesquisa mal sucedida para {game_name}.")
    
    all_games_time.append({
        "id" : id,
        "main_story" : main_story,
        "completionist" : completionist
        })
        
df_time = pd.DataFrame(all_games_time)
df_time.to_csv("data/all_games_time.csv", index=False)

---------------------------------------------------------------|
Erro: max() iterable argument is empty
Pesquisa mal sucedida para Wallpaper Engine.


#### Classificando GPUs

In [8]:
gpu_classification = {
    "very low": [
        "Intel GMA X4500, NVIDIA GeForce 9600M GT, AMD/ATI Mobility Radeon HD 3650 - requires 256MB VRAM and DirectXÂ® 11",
        "Integrated Graphics",
        "128 MB of Video RAM and support for Pixel Shader 3.0. Supported Video Cards: NVIDIA GeForce 6600 or better, ATI Radeon X1300 or better, Intel GMA X4500 or better",
        "NVIDIA Geforce GTS 450 / AMD Radeon HD 5570",
        "NVIDIA GeForce 8600/9600GT, ATI/AMD Radeon HD2600/3600",
        "NVIDIA GeForce GT 730 or equivalent",
        "1 GB & AMD 5570 or nVidia 450 or Intel Integrated Graphics 530",
        "Video card with 128 MB, Shader model 2.0. ATI X800, NVidia 6600 or better",
        "DirectX 9 graphics card with 512Mb Video RAM: AMD Radeon HD 2600 XT, nVidia 8600"
    ],
    "low": [
        "Nvidia GeForce GTX 650 Ti / AMD Radeon HD 7850",
        "Nvidia Geforce GTX 650, AMD Radeon HD 7770 graphics card or better (min. 2 GB VRAM, DX11 support)",
        "Nvidia GeForce GTX 650 2GB or AMD Radeon HD 7770 2GB",
        "Nvidia Geforce GTX 660 / AMD HD 7870 / Intel Arc A380",
        "NVIDIA GeForce GTX 750 Ti (Maxwell or newer), 2 GB or AMD Radeon R7 360, 2 GB",
        "AMD Radeon R9 285, NVIDIA GeForce GTX 660",
        "NVIDIA GeForce GTX 460 / AMD HD6850",
        "NVIDIA GeForce GTX 760 or AMD R9 270X",
        "NVIDIA GeForce GTX 760 | AMD Radeon HD 7950",
        "NVIDIA GeForce GTX 460 or AMD Radeon HD 5870",
        "NVIDIAÂ® GeForceÂ® GTX 660 2GB or GTX 1050 2GB / AMD Radeon HD 7850 2GB",
        "NVIDIA GeForce GTX 660, ATI Radeon HD 7850",
        "AMD Radeon R9 285, NVIDIA GeForce GTX 660 (2GB VRAM with Shader Model 5.0) (MORE DETAILS HERE)",
        "NVIDIA GeForce GTX 560 Ti / AMD Radeon HD 6850",
        "DirectX 11 level video card: AMD Radeon 77XX / NVIDIA GeForce GTX 660. The minimum supported resolution for the game is 720p.",
        "NVIDIA GeForce GTX 660 1GB",
        "GTX 580 / AMD HD 7870",
        "NVIDIA GTX 660 2GB or AMD Radeon HD 7850 2GB"
    ],
    "medium": [
        "NVIDIA GeForce GTX 1050 ti or AMD R9 380",
        "Nvidia GTX 1650 4GB / AMD RX 5500XT 4GB / Intel ARC A380 6GB",
        "NVIDIA GeForce GTX 1650, 4 GB or AMD Radeon RX 480, 4 GB",
        "NVIDIA GTX 1650 Super or AMD Radeon RX 5500 XT, with a minimum of 4GB of VRAM",
        "NVIDIA GeForce GTX 1050 Ti or AMD Radeon RX 470",
        "NVIDIA GeForce GTX 1050Ti (4GB), GTX 1060 (3GB), GTX 1650 (4GB) or AMD Radeon R9 280(3GB), AMD Radeon R9 290 (4GB), RX 470 (4GB)",
        "Nvidia GeForce GTX 980 / AMD Radeon RX 590 / Intel Arc A750",
        "NVIDIA GeForce GTX 1050 Ti 4GB or AMD Radeon RX 570 4GB",
        "AMD Radeon RX 470",
        "NVIDIA GeForce GTX 1060 3GB or AMD Radeon RX 570 4GB",
        "NVIDIA GeForce 760 GTX or AMD Radeon R9 280",
        "AMD Radeon RX 560 with 4GB VRAM / NVIDIA GeForce GTX 1050 Ti with 4GB VRAM",
        "Nvidia GeForce GTX 1060 6GB / AMD Radeon RX 580 8GB / Intel Arc A750",
        "Nvidia GeForce GTX 780 (3 GB) or AMD Radeon R9 290 (4GB)",
        "GeForce GTX 1050Ti / AMD Radeon RX 570 / Intel Arc A380",
        "NVIDIA GTX 770 | AMD Radeon RX 570",
        "NVIDIA GeForce GTX 960 2GB / AMD Radeon R7 370 2GB",
        "NVIDIA GeForce GTX 970 or higher, AMD Radeon RX 480 or higher",
        "NVIDIA GeForce GTX 1050 Ti or AMD Radeon RX 580",
        "NVIDIA GeForce GTX 960 4GB or AMD Radeon RX 470 4GB",
        "Nvidia GeForce GTX 1050Ti, AMD Radeon RX 470 graphics card or better (min 3GB VRAM, DX12 support)",
        "NVIDIA GeForce GTX 1060 (req. 6GB VRAM) / AMD Radeon RX 580 (req. 6GB VRAM) / Intel ARC A380",
        "AMD Radeon RX 5600 XT, NVIDIA GeForce 1080",
        "AMD Radeon RX 560,Nvidia GeForce GTX 1050 Ti",
        "NVIDIA GeForce GTX 970 OR AMD Radeon RX 570",
        "NVIDIA GeForce GTX 1650",
        "NVIDIA GeForce GTX 1060 6GB / AMD Radeon RX 580 8GB",
        "NVIDIA GeForce GTX 1050 Ti OR AMD Radeon RX 570"
    ],
    "high": [
        "NVIDIA GTX 1650 Super or AMD Radeon RX 5500 XT, with a minimum of 4GB of VRAM",
        "NVIDIA GeForce GTX 1070 / AMD Radeon RX 5500 XT with 8GB VRAM",
        "GeForce GTX 1650  / AMD Radeon RX 590",
        "NVIDIA GTX 1060 3 GB / AMD Radeon RX 590"
    ],
    "very high": [
        "AMD Radeon RX 5700, NVIDIA GeForce 1070 Ti"
    ]
}

df_game_info = pd.read_csv("data/all_games_info.csv")
gpus_class = []
for gpu in df_game_info['pc_requirements']:
    for cls, gpus in gpu_classification.items():
        if any(g in gpu for g in gpus):
            gpus_class.append({"gpu": gpu, "classification": cls})
            break
        
df_gpu_class = pd.DataFrame(gpus_class)
df_gpu_class.to_csv("data/gpu_classification.csv", index=False)


### Visualizando/Manipulando Dados Coletados

In [152]:
df_info = pd.read_csv('data/all_games_info.csv')
df_review = pd.read_csv('data/all_games_review.csv')
df_time = pd.read_csv('data/all_games_time.csv')
df_gpu_class = pd.read_csv('data/gpu_classification.csv')

df_year_2019 = pd.read_csv('data/most_played_2019.csv')
df_year_2020 = pd.read_csv('data/most_played_2020.csv')
df_year_2021 = pd.read_csv('data/most_played_2021.csv')
df_year_2022 = pd.read_csv('data/most_played_2022.csv')
df_year_2023 = pd.read_csv('data/most_played_2023.csv')
df_year_2024 = pd.read_csv('data/most_played_2024.csv')

In [153]:
df_info

Unnamed: 0,id,game,release_date,price_USD,genres,pc_requirements
0,346110,ARK: Survival Evolved,"Aug 27, 2017",14.99,"Action, Adventure, Indie, Massively Multiplaye...",NVIDIA GTX 670 2GB/AMD Radeon HD 7870 2GB or b...
1,2881650,Content Warning,"Apr 1, 2024",7.99,"Action, Adventure, Indie",NVIDIA GeForce GTX 1050 ti or AMD R9 380
2,2933620,Call of Duty®: Black Ops 6,"Oct 24, 2024",69.99,Action,"AMD Radeonâ¢ RX 470, NVIDIAÂ® GeForceÂ® GTX 9..."
3,1818750,MultiVersus,"Jul 19, 2022",0.00,"Action, Free To Play",Nvidia GeForce GTX 650 Ti / AMD Radeon HD 7850
4,787860,Farming Simulator 19,"Nov 19, 2018",17.99,Simulation,"Nvidia Geforce GTX 650, AMD Radeon HD 7770 gra..."
...,...,...,...,...,...,...
158,976730,Halo: The Master Chief Collection,"Dec 3, 2019",39.99,Action,AMD HD 6850 ; NVIDIA GeForce GTS 450
159,1282730,Loop Hero,"Mar 4, 2021",14.99,"Indie, RPG, Strategy","GeForce 7300 GT (512 MB), Radeon X1300 Pro (25..."
160,1920960,VPet-Simulator,"Aug 13, 2023",0.00,"Casual, Indie, RPG, Simulation, Free To Play",Iris or better
161,1446780,MONSTER HUNTER RISE,"Jan 12, 2022",39.99,Action,NVIDIAÂ® GeForceÂ® GT 1030 (DDR4) or AMD Radeo...


In [154]:
df_review

Unnamed: 0,id,score,total_reviews,total_positive,total_negative
0,346110,8,248825,201021,47804
1,2881650,8,29773,28021,1752
2,2933620,5,8345,3472,4873
3,1818750,6,5317,3765,1552
4,787860,8,23757,22262,1495
...,...,...,...,...,...
158,976730,8,153314,141365,11949
159,1282730,8,15456,14128,1328
160,1920960,0,0,0,0
161,1446780,8,32024,27729,4295


In [155]:
df_time

Unnamed: 0,id,main_story,completionist
0,346110,67.56,507.37
1,2881650,2.23,19.73
2,2933620,8.47,35.21
3,1818750,5.98,88.55
4,787860,113.38,240.38
...,...,...,...
158,976730,43.07,307.94
159,1282730,28.51,55.22
160,1920960,0.00,1293.25
161,1446780,23.11,182.06


In [156]:
df_gpu_class

Unnamed: 0,gpu,classification
0,NVIDIA GeForce GTX 1050 ti or AMD R9 380,Medium
1,Nvidia GeForce GTX 650 Ti / AMD Radeon HD 7850,Low
2,"Nvidia Geforce GTX 650, AMD Radeon HD 7770 gra...",Low
3,Nvidia GeForce GTX 650 2GB or AMD Radeon HD 77...,Low
4,Nvidia Geforce GTX 660 / AMD HD 7870 / Intel A...,High
...,...,...
67,AMD Radeon R9 280X / GeForce GTX 760,Medium
68,DirectX 9 graphics card with 512Mb Video RAM: ...,Very Low
69,NVIDIA GTX 660 2GB or AMD Radeon HD 7850 2GB,Low
70,"NVidia GTX 970, AMD RX 470, OR Intel Arc A380",High


In [157]:
df_year_2019
# df_year_2020, etc...

Unnamed: 0,id
0,779340
1,578080
2,570
3,359550
4,271590
5,814380
6,730
7,230410
8,1085660
9,238960
