In [2]:
import requests
import time
import csv
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime

## Pegar os jogos mais vendidos

In [None]:
# 1. Pega os top 100 jogos mais jogados (de todos os tempos) da SteamSpy
def get_top_100_steamspy():
    url = "https://steamspy.com/api.php?request=top100forever"
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(f"Erro ao acessar SteamSpy: {response.status_code}")
    return response.json()

# 2. Consulta detalhes do jogo na Steam Store API
def get_steam_store_details(app_id):
    url = f"https://store.steampowered.com/api/appdetails?appids={app_id}&cc=us&l=en"
    response = requests.get(url)
    if response.status_code != 200:
        return None
    data = response.json()
    if not data.get(str(app_id), {}).get("success", False):
        return None
    return data[str(app_id)]["data"]

# 3. Junta as informações num dicionário para exportar
def collect_game_data():
    top_games = get_top_100_steamspy()
    consolidated = []

    for app_id_str, game in top_games.items():
        app_id = int(app_id_str)
        name = game.get("name", "")
        owners = game.get("owners", "")
        players_2weeks = game.get("players_2weeks", 0)

        store = get_steam_store_details(app_id)
        if not store:
            continue

        price = 0.0
        if store.get("is_free", False):
            price = 0.0
        elif "price_overview" in store:
            price = store["price_overview"]["final"] / 100.0

        genres = ", ".join(g["description"] for g in store.get("genres", []))
        release_date = store.get("release_date", {}).get("date", "")
        release_year = release_date[-4:] if release_date else ""
        rating = store.get("metacritic", {}).get("score", "N/A")

        consolidated.append({
            "app_id": app_id,
            "name": name,
            "owners": owners,
            "players_2weeks": players_2weeks,
            "price_usd": price,
            "release_year": release_year,
            "genres": genres,
            "rating": rating
        })

        time.sleep(1)  # para respeitar limites da API

    return consolidated

# 4. Exporta tudo para um CSV
def save_to_csv(data, filename="steam_top100_data.csv"):
    if not data:
        print("Nenhum dado para salvar.")
        return
    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=data[0].keys())
        writer.writeheader()
        writer.writerows(data)
    print(f"Arquivo salvo: {filename}")

# Execução
if __name__ == "__main__":
    data = collect_game_data()
    save_to_csv(data)

In [4]:
sales = pd.read_csv("steam_top100_sales.csv")
sales

Unnamed: 0,app_id,name,owners,players_2weeks,price_usd,release_year,genres,rating
0,570,Dota 2,"200,000,000 .. 500,000,000",0,0.00,2013.0,"Action, Strategy, Free To Play",90.0
1,730,Counter-Strike: Global Offensive,"100,000,000 .. 200,000,000",0,0.00,2012.0,"Action, Free To Play",
2,578080,PUBG: BATTLEGROUNDS,"50,000,000 .. 100,000,000",0,0.00,2017.0,"Action, Adventure, Massively Multiplayer, Free...",
3,1623730,Palworld,"50,000,000 .. 100,000,000",0,29.99,2024.0,"Action, Adventure, Indie, RPG, Early Access",
4,1172470,Apex Legends,"50,000,000 .. 100,000,000",0,0.00,2020.0,"Action, Adventure, Free To Play",88.0
...,...,...,...,...,...,...,...,...
95,632360,Risk of Rain 2,"5,000,000 .. 10,000,000",0,24.99,2020.0,"Action, Indie",85.0
96,96000,The Tiny Bang Story,"5,000,000 .. 10,000,000",0,4.99,2011.0,"Adventure, Casual, Indie",63.0
97,275850,No Man's Sky,"5,000,000 .. 10,000,000",0,23.99,2016.0,"Action, Adventure",61.0
98,1811260,EA SPORTS FIFA 23,"5,000,000 .. 10,000,000",0,0.00,2022.0,"Simulation, Sports",


## Pegar os jogos em alta

In [2]:
def scrape_steamcharts_page(page_num=1):
    if page_num == 1:
        url = "https://steamcharts.com/top"
    else:
        url = f"https://steamcharts.com/top/p.{page_num}"
    
    print(f"Raspando página {page_num}: {url}")
    response = requests.get(url)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, "html.parser")

    games = []
    table = soup.find("table", id="top-games")
    rows = table.tbody.find_all("tr")

    for row in rows:
        cols = row.find_all("td")
        rank = cols[0].text.strip().replace(".", "").replace("\xa0", "")
        name_col = cols[1]
        name = name_col.text.strip()
        link = name_col.find("a")["href"]  # ex: "/app/570"
        app_id = link.split("/app/")[1]
        peak_24h = cols[2].text.strip()
        avg_2weeks = cols[3].text.strip()

        games.append({
            "rank": int(rank),
            "app_id": int(app_id),
            "name": name,
            "peak_24h": peak_24h,
            "avg_2weeks": avg_2weeks
        })
    return games

def scrape_steamcharts_top_100():
    all_games = []
    for page in range(1, 5):
        games = scrape_steamcharts_page(page)
        all_games.extend(games)
        time.sleep(1)
    return all_games

def get_steam_store_details(app_id):
    url = f"https://store.steampowered.com/api/appdetails?appids={app_id}&cc=us&l=en"
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        if not data.get(str(app_id), {}).get("success", False):
            return None
        return data[str(app_id)]["data"]
    except Exception as e:
        print(f"Erro ao buscar dados do app {app_id}: {e}")
        return None

def enrich_games_with_store_data(games):
    enriched = []
    scrape_date = datetime.now().strftime("%Y-%m-%d")
    for game in games:
        app_id = game["app_id"]
        store_data = get_steam_store_details(app_id)
        if not store_data:
            enriched.append({**game, "scrape_date": scrape_date})
            continue

        price = 0.0
        if store_data.get("is_free", False):
            price = 0.0
        elif "price_overview" in store_data:
            price = store_data["price_overview"]["final"] / 100.0

        genres = ", ".join(g["description"] for g in store_data.get("genres", []))
        release_date = store_data.get("release_date", {}).get("date", "")
        release_year = release_date[-4:] if release_date else ""
        rating = store_data.get("metacritic", {}).get("score", "N/A")

        enriched.append({
            **game,
            "price_usd": price,
            "genres": genres,
            "release_year": release_year,
            "metacritic_score": rating,
            "scrape_date": scrape_date
        })

        time.sleep(1)

    return enriched

def save_to_csv(data, filename="jogosEmAlta24h.csv"):
    if not data:
        print("Nenhum dado para salvar.")
        return
    keys = data[0].keys()
    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=keys)
        writer.writeheader()
        writer.writerows(data)
    print(f"Arquivo salvo: {filename}")

if __name__ == "__main__":
    print("Coletando top 100 do Steam Charts...")
    top_100 = scrape_steamcharts_top_100()

    print("Buscando dados adicionais da Steam Store API...")
    enriched_top_100 = enrich_games_with_store_data(top_100)

    save_to_csv(enriched_top_100)


Coletando top 100 do Steam Charts...
Raspando página 1: https://steamcharts.com/top
Raspando página 2: https://steamcharts.com/top/p.2
Raspando página 3: https://steamcharts.com/top/p.3
Raspando página 4: https://steamcharts.com/top/p.4
Buscando dados adicionais da Steam Store API...
Arquivo salvo: jogosEmAlta24h.csv


In [3]:
steamCharts = pd.read_csv('trendingJunho.csv')
steamCharts

Unnamed: 0,Unnamed: 0.19,Unnamed: 0.18,Unnamed: 0.17,Unnamed: 0.16,Unnamed: 0.15,Unnamed: 0.14,Unnamed: 0.13,Unnamed: 0.12,Unnamed: 0.11,Unnamed: 0.10,...,rank,app_id,name,peak_24h,avg_2weeks,price_usd,genres,release_year,metacritic_score,scrape_date
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1,730,Counter-Strike 2,1470932,,0.00,"Action, Free To Play",2012.0,,2025-05-29
1,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,2,578080,PUBG: BATTLEGROUNDS,633653,,0.00,"Action, Adventure, Massively Multiplayer, Free...",2017.0,,2025-05-29
2,2,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,...,3,570,Dota 2,523963,,0.00,"Action, Strategy, Free To Play",2013.0,90.0,2025-05-29
3,3,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,3.0,...,4,1172470,Apex Legends,213082,,0.00,"Action, Adventure, Free To Play",2020.0,88.0,2025-05-29
4,4,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,...,5,3419430,Bongo Cat,189752,,0.00,"Casual, Indie, Simulation, Free To Play",2025.0,,2025-05-29
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2095,2095,,,,,,,,,,...,96,291550,Brawlhalla,14995,,0.00,"Action, Indie, Free To Play",2017.0,,2025-06-27
2096,2096,,,,,,,,,,...,97,1905180,OBS Studio,14494,,0.00,"Utilities, Video Production",2022.0,,2025-06-27
2097,2097,,,,,,,,,,...,98,306130,The Elder Scrolls Online,14323,,4.99,"Action, Adventure, Massively Multiplayer, RPG",2017.0,80.0,2025-06-27
2098,2098,,,,,,,,,,...,99,1671210,DELTARUNE,14266,,24.99,"Indie, RPG",2025.0,,2025-06-27


In [4]:
steamCharts2 = pd.read_csv('jogosEmAlta24h.csv')
steamCharts2

Unnamed: 0,rank,app_id,name,peak_24h,avg_2weeks,price_usd,genres,release_year,metacritic_score,scrape_date
0,1,730,Counter-Strike 2,1033046,,0.00,"Action, Free To Play",2012.0,,2025-06-28
1,2,570,Dota 2,473130,,0.00,"Action, Strategy, Free To Play",2013.0,90.0,2025-06-28
2,3,578080,PUBG: BATTLEGROUNDS,188331,,0.00,"Action, Adventure, Massively Multiplayer, Free...",2017.0,,2025-06-28
3,4,3419430,Bongo Cat,150472,,0.00,"Casual, Indie, Simulation, Free To Play",2025.0,,2025-06-28
4,5,1172710,Dune: Awakening,131151,,49.99,"Action, Adventure, Massively Multiplayer, RPG",2025.0,,2025-06-28
...,...,...,...,...,...,...,...,...,...,...
95,96,377160,Fallout 4,15934,,7.99,RPG,2015.0,84.0,2025-06-28
96,97,107410,Arma 3,15835,,2.99,"Action, Simulation, Strategy",2013.0,74.0,2025-06-28
97,98,281990,Stellaris,15600,,9.99,"Simulation, Strategy",2016.0,78.0,2025-06-28
98,99,306130,The Elder Scrolls Online,15576,,4.99,"Action, Adventure, Massively Multiplayer, RPG",2017.0,80.0,2025-06-28


In [5]:
df_completo = pd.concat([steamCharts, steamCharts2], ignore_index=True)
df_completo.to_csv("trendingJunho.csv")

In [16]:
import requests
from bs4 import BeautifulSoup
import time
import csv

HEADERS = {
    "User-Agent": "Mozilla/5.0"
}

def parse_game_row(row):
    try:
        title = row.find("span", class_="title").text.strip()
        app_link = row["href"]
        app_id = app_link.split("/app/")[1].split("/")[0]
        review_info = row.find("span", class_="search_review_summary")["data-tooltip-html"]
        if "Overwhelmingly Positive" not in review_info:
            return None
        reviews_text = review_info.split("<br>")[1]
        reviews_count = int(reviews_text.strip().split(" ")[0].replace(",", "").replace(".", ""))
        return {
            "app_id": int(app_id),
            "title": title,
            "reviews_count": reviews_count,
            "review_summary": "Overwhelmingly Positive"
        }
    except Exception:
        return None

def scrape_overwhelmingly_positive(limit=100):
    results = []
    page = 1

    while len(results) < limit:
        url = f"https://store.steampowered.com/search/?filter=globaltopsellers&sort_by=Reviews_DESC&review_type=positive&category1=998&page={page}"
        print(f"Raspando página {page}...")
        response = requests.get(url, headers=HEADERS)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")
        rows = soup.find_all("a", class_="search_result_row")

        if not rows:
            break

        for row in rows:
            game = parse_game_row(row)
            if game:
                results.append(game)
                if len(results) >= limit:
                    break

        page += 1
        time.sleep(1)

    return results[:limit]

def enrich_game_with_store_data(game):
    app_id = game["app_id"]
    url = f"https://store.steampowered.com/api/appdetails?appids={app_id}&cc=us&l=en"
    try:
        resp = requests.get(url, timeout=10)
        resp.raise_for_status()
        data = resp.json()
        if not data or not data.get(str(app_id), {}).get("success"):
            return game

        details = data[str(app_id)]["data"]

        game["release_date"] = details.get("release_date", {}).get("date")
        game["price"] = details.get("price_overview", {}).get("final_formatted") if details.get("price_overview") else "Free" if details.get("is_free") else "N/A"
        game["genres"] = ", ".join([g["description"] for g in details.get("genres", [])])
        game["metacritic_score"] = details.get("metacritic", {}).get("score", "N/A")

    except Exception:
        pass

    return game

def enrich_all(games):
    enriched = []
    for idx, game in enumerate(games):
        print(f"[{idx+1}/{len(games)}] Enriquecendo: {game['title']}")
        enriched_game = enrich_game_with_store_data(game)
        enriched.append(enriched_game)
        time.sleep(0.5)  # respeitar limites de requisição
    return enriched

def save_to_csv(data, filename="top100_overwhelmingly_positive_enriched.csv"):
    if not data:
        return
    keys = data[0].keys()
    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=keys)
        writer.writeheader()
        writer.writerows(data)
    print(f"Arquivo salvo: {filename}")

if __name__ == "__main__":
    print("Coletando jogos com avaliação 'Extremamente positivas'...")
    top_positive = scrape_overwhelmingly_positive(limit=100)

    print("Enriquecendo com dados da Steam Store API...")
    enriched = enrich_all(top_positive)

    save_to_csv(enriched)


Coletando jogos com avaliação 'Extremamente positivas'...
Raspando página 1...
Raspando página 2...
Raspando página 3...
Raspando página 4...
Raspando página 5...
Raspando página 6...
Raspando página 7...
Raspando página 8...
Raspando página 9...
Raspando página 10...
Raspando página 11...
Raspando página 12...
Raspando página 13...
Raspando página 14...
Raspando página 15...
Raspando página 16...
Raspando página 17...
Raspando página 18...
Raspando página 19...
Raspando página 20...
Raspando página 21...
Raspando página 22...
Raspando página 23...
Raspando página 24...
Raspando página 25...
Raspando página 26...
Raspando página 27...
Raspando página 28...
Raspando página 29...
Raspando página 30...
Raspando página 31...
Raspando página 32...
Raspando página 33...
Raspando página 34...
Raspando página 35...
Raspando página 36...
Raspando página 37...
Raspando página 38...
Raspando página 39...
Raspando página 40...
Raspando página 41...
Raspando página 42...
Raspando página 43...
Raspa

In [40]:
HEADERS = {
    "User-Agent": "Mozilla/5.0"
}

TARGET_GAME = "Your Smile Beyond Twilight:黄昏下的月台上"

def scrape_extremely_positive_games():
    base_url = "https://store.steampowered.com/search/?sort_by=Reviews_DESC&category1=998&page={}"
    games = []
    page = 1
    found_target = False

    while not found_target:
        print(f"Raspando página {page}...")
        url = base_url.format(page)
        try:
            response = requests.get(url, headers=HEADERS)
            response.raise_for_status()
        except requests.exceptions.RequestException as e:
            print(f"Erro ao acessar {url}: {e}")
            break

        soup = BeautifulSoup(response.text, "html.parser")
        game_rows = soup.select("a.search_result_row")

        if not game_rows:
            print("Nenhum resultado encontrado na página. Encerrando...")
            break

        for game_div in game_rows:
            title = game_div.select_one("span.title").text.strip()
            app_id = game_div.get("data-ds-appid")
            if not app_id:
                continue

            games.append({
                "name": title,
                "app_id": app_id
            })

            if title == TARGET_GAME:
                print(f"Jogo alvo encontrado: {title}")
                found_target = True
                break

        page += 1
        time.sleep(1)

    return games

def save_to_csv(games, filename="extremely_positive_games.csv"):
    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["name", "app_id"])
        writer.writeheader()
        writer.writerows(games)

if __name__ == "__main__":
    games = scrape_extremely_positive_games()
    save_to_csv(games)
    print(f"{len(games)} jogos salvos no CSV.")


Raspando página 1...
Raspando página 2...
Raspando página 3...
Raspando página 4...
Raspando página 5...
Raspando página 6...
Raspando página 7...
Raspando página 8...
Raspando página 9...
Raspando página 10...
Raspando página 11...
Raspando página 12...
Raspando página 13...
Raspando página 14...
Raspando página 15...
Raspando página 16...
Raspando página 17...
Raspando página 18...
Raspando página 19...
Raspando página 20...
Raspando página 21...
Raspando página 22...
Raspando página 23...
Raspando página 24...
Raspando página 25...
Raspando página 26...
Raspando página 27...
Raspando página 28...
Raspando página 29...
Raspando página 30...
Raspando página 31...
Raspando página 32...
Raspando página 33...
Raspando página 34...
Raspando página 35...
Raspando página 36...
Raspando página 37...
Raspando página 38...
Raspando página 39...
Raspando página 40...
Raspando página 41...
Raspando página 42...
Raspando página 43...
Raspando página 44...
Raspando página 45...
Raspando página 46.

In [41]:
HEADERS = {
    "User-Agent": "Mozilla/5.0"
}

def load_games_from_csv(filename):
    with open(filename, newline='', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        return list(reader)

def enrich_with_review_count(games):
    enriched = []
    for i, game in enumerate(games):
        app_id = game['app_id']
        print(f"({i + 1}/{len(games)}) Buscando: {game['name']}")
        try:
            url = f"https://store.steampowered.com/api/appdetails?appids={app_id}&cc=us&l=en"
            resp = requests.get(url, headers=HEADERS)
            data = resp.json().get(app_id, {}).get("data")
            if data and isinstance(data, dict):
                game['reviews_count'] = data.get('recommendations', {}).get('total', 0)
                enriched.append(game)
        except Exception as e:
            print(f"Erro ao processar {game['name']}: {e}")
        time.sleep(1)
    return enriched

def save_top_100_by_reviews(games, filename="top_100_extremely_positive.csv"):
    df = pd.DataFrame(games)
    df['reviews_count'] = pd.to_numeric(df['reviews_count'], errors='coerce').fillna(0).astype(int)
    df = df.sort_values(by='reviews_count', ascending=False).head(100)
    df.to_csv(filename, index=False)
    print(f"Top 100 jogos com mais análises salvos em {filename}.")

if __name__ == "__main__":
    games = load_games_from_csv("extremely_positive_games.csv")
    enriched_games = enrich_with_review_count(games)
    save_top_100_by_reviews(enriched_games)


(1/1335) Buscando: HoloCure - Save the Fans!
(2/1335) Buscando: A Short Hike
(3/1335) Buscando: Papa's Freezeria Deluxe
(4/1335) Buscando: Cats Hidden in Jingle Jam
(5/1335) Buscando: The WereCleaner
(6/1335) Buscando: Paper Lily - Chapter 1
(7/1335) Buscando: TOEM: A Photo Adventure
(8/1335) Buscando: Project Kat - Paper Lily Prologue
(9/1335) Buscando: Please, Touch The Artwork 2
(10/1335) Buscando: Ib
(11/1335) Buscando: Lil Gator Game
(12/1335) Buscando: Patrick's Parabox
(13/1335) Buscando: A Castle Full of Cats
(14/1335) Buscando: Look Outside
(15/1335) Buscando: The Upturned
(16/1335) Buscando: A Tower Full of Cats
(17/1335) Buscando: Dialtown: Phone Dating Sim
(18/1335) Buscando: Smushi Come Home
(19/1335) Buscando: South Scrimshaw, Part One
(20/1335) Buscando: Angel at Dusk
(21/1335) Buscando: The Void Rains Upon Her Heart
(22/1335) Buscando: Monster Prom 4: Monster Con
(23/1335) Buscando: Super Lesbian Animal RPG
(24/1335) Buscando: Aokana - Four Rhythms Across the Blue - EXT

In [80]:
df_completo = df_completo.sort_values(by=["review_counts"], ascending= False).reset_index(drop=True)
df_completo.to_csv("top100_extreme_positive_certo.csv", index= False)

In [None]:
df = pd.read_csv("top100_extreme_positive_certo.csv", dtype={"app_id": str})
games = df.to_dict(orient="records")
enriched = enrich_games_with_store_data(games)
save_top_100_by_reviews(enriched)

(1/100) Enriquecendo: Terraria
(2/100) Enriquecendo: Garry's Mod
(3/100) Enriquecendo: Black Myth: Wukong
(4/100) Enriquecendo: The Witcher 3: Wild Hunt
(5/100) Enriquecendo: Stardew Valley
(6/100) Enriquecendo: Left 4 Dead 2
(7/100) Enriquecendo: Euro Truck Simulator 2
(8/100) Enriquecendo: Baldur's Gate 3
(9/100) Enriquecendo: Phasmophobia
(10/100) Enriquecendo: The Forest
(11/100) Enriquecendo: Lethal Company
(12/100) Enriquecendo: Hollow Knight
(13/100) Enriquecendo: Portal 2
(14/100) Enriquecendo: Dying Light
(15/100) Enriquecendo: Don't Starve Together
(16/100) Enriquecendo: Bloons TD 6
(17/100) Enriquecendo: The Binding of Isaac: Rebirth
(18/100) Enriquecendo: BeamNG.drive
(19/100) Enriquecendo: Subnautica
(20/100) Enriquecendo: Deep Rock Galactic
(21/100) Enriquecendo: People Playground
(22/100) Enriquecendo: Hades
(23/100) Enriquecendo: Undertale
(24/100) Enriquecendo: Vampire Survivors
(25/100) Enriquecendo: Sekiro™: Shadows Die Twice - GOTY Edition
(26/100) Enriquecendo: Tit

In [None]:
bemAvaliados = pd.read_csv('top_100_extremely_positive_enriched.csv')
bemAvaliados.head()

Unnamed: 0,name,app_id,reviews_count,price_usd,currency,release_year,genres,short_description
0,Terraria,105600,0,9.99,USD,2011,"Action, Adventure, Indie, RPG","Dig, fight, explore, build! Nothing is impossi..."
1,Garry's Mod,4000,0,5.99,USD,2006,"Casual, Indie, Simulation",Garry's Mod is a physics sandbox. There aren't...
2,Black Myth: Wukong,2358720,0,59.99,USD,2024,"Action, Adventure, RPG",Black Myth: Wukong is an action RPG rooted in ...
3,The Witcher 3: Wild Hunt,292030,0,39.99,USD,2015,RPG,"You are Geralt of Rivia, mercenary monster sla..."
4,Stardew Valley,413150,0,14.99,USD,2016,"Indie, RPG, Simulation",You've inherited your grandfather's old farm p...


## Limpeza

In [14]:
final = pd.read_csv("trendingJunho_com_metacritic.csv")
final

Unnamed: 0,rank,app_id,name,peak_24h,price_usd,genres,release_year,scrape_date,metacritic
0,1,730,Counter-Strike 2,1470932,0.00,"Action, Free To Play",2012.0,2025-05-29,
1,2,578080,PUBG: BATTLEGROUNDS,633653,0.00,"Action, Adventure, Massively Multiplayer, Free...",2017.0,2025-05-29,
2,3,570,Dota 2,523963,0.00,"Action, Strategy, Free To Play",2013.0,2025-05-29,90.0
3,4,1172470,Apex Legends,213082,0.00,"Action, Adventure, Free To Play",2020.0,2025-05-29,88.0
4,5,3419430,Bongo Cat,189752,0.00,"Casual, Indie, Simulation, Free To Play",2025.0,2025-05-29,
...,...,...,...,...,...,...,...,...,...
2195,96,377160,Fallout 4,15934,7.99,RPG,2015.0,2025-06-28,
2196,97,107410,Arma 3,15835,2.99,"Action, Simulation, Strategy",2013.0,2025-06-28,
2197,98,281990,Stellaris,15600,9.99,"Simulation, Strategy",2016.0,2025-06-28,
2198,99,306130,The Elder Scrolls Online,15576,4.99,"Action, Adventure, Massively Multiplayer, RPG",2017.0,2025-06-28,


In [6]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import re
from urllib.parse import quote

def format_game_name_for_url(game_name):
    """Formata o nome do jogo para URL do Metacritic"""
    clean_name = re.sub(r'[™®]', '', game_name)
    clean_name = re.sub(r'[^a-zA-Z0-9]+', '-', clean_name)
    return clean_name.strip('-').lower()

def get_metacritic_scores(game_name):
    """Busca metascore e user score no Metacritic"""
    formatted_name = format_game_name_for_url(game_name)
    base_url = f"https://www.metacritic.com/game/{formatted_name}"
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    metascore = None
    user_score = None
    
    try:
        # Tentar página específica para PC primeiro
        pc_url = f"{base_url}/critic-reviews/?platform=pc"
        response = requests.get(pc_url, headers=headers)
        
        if response.status_code == 404:
            response = requests.get(base_url, headers=headers)
        
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Extrair metascore
        metascore_element = soup.find('span', class_='c-siteReviewScore_background')
        if metascore_element:
            metascore = metascore_element.get_text(strip=True)
        
        # Extrair user score
        user_score_element = soup.find('div', class_='c-siteReviewScore_user')
        if user_score_element:
            user_score = user_score_element.get_text(strip=True)
            user_score = re.sub(r'\s+', '', user_score)
        
        # Se não encontrou na página de PC, tentar página geral
        if metascore is None or user_score is None:
            response = requests.get(base_url, headers=headers)
            soup = BeautifulSoup(response.text, 'html.parser')
            
            if metascore is None:
                metascore_element = soup.find('span', class_='c-siteReviewScore_background')
                if metascore_element:
                    metascore = metascore_element.get_text(strip=True)
            
            if user_score is None:
                user_score_element = soup.find('div', class_='c-siteReviewScore_user')
                if user_score_element:
                    user_score = user_score_element.get_text(strip=True)
                    user_score = re.sub(r'\s+', '', user_score)
    
    except Exception as e:
        print(f"\nErro ao buscar {game_name}: {str(e)}")
    
    return metascore, user_score

# Carregar dataset
df = pd.read_csv("bemAvaliadosCleanGen_withCountry.csv")

# 1. Identificar jogos únicos que precisam de notas
unique_games = df[df['metacritic_metascore'].isna() | df['metacritic_user_score'].isna()]
unique_games = unique_games['name'].unique()

print(f"Total de jogos únicos para buscar: {len(unique_games)}")

# 2. Criar dicionário para armazenar as notas
game_scores = {}

# 3. Buscar notas para cada jogo único (com barra de progresso)
for game_name in tqdm(unique_games, desc="Buscando notas"):
    if game_name not in game_scores:  # Apenas se ainda não foi buscado
        metascore, user_score = get_metacritic_scores(game_name)
        game_scores[game_name] = {'metascore': metascore, 'user_score': user_score}
        time.sleep(8)  # Delay para evitar bloqueio

# 4. Preencher todas as ocorrências de cada jogo no DataFrame
for game_name, scores in game_scores.items():
    mask = df['name'] == game_name
    if scores['metascore'] is not None:
        df.loc[mask, 'metacritic_metascore'] = scores['metascore']
    if scores['user_score'] is not None:
        df.loc[mask, 'metacritic_user_score'] = scores['user_score']

# 5. Salvar o dataset atualizado
df.to_csv('bemAvaliadosCleanCountryMetacritic.csv', index=False)
print("\nProcesso concluído! Todas as ocorrências de cada jogo foram atualizadas.")

KeyError: 'metacritic_metascore'