1. 코드 개요 및 목적 
DB 테이블 생성(필요 시)  

CREATE TABLE IF NOT EXISTS …를 사용해, 해당 테이블이 없으면 자동 생성  
이미 있으면 건너뜀  
스키마 충돌 자동 해결  

“조건부 ALTER TABLE” 로직을 넣어, “컬럼이 없으면 추가”로 DB 스키마를 업그레이드  
예: updated_at 같은 새 컬럼이 과거 테이블엔 없을 때 → 자동으로 ALTER TABLE하여 충돌 방지  
기존 데이터는 그대로 유지  

결과
팀원이 코드를 실행할 때마다, DB 스키마가 코드에서 기대하는 형태와 “동기화”됨
운영환경에서 “Unknown column …” 오류를 예방, 반복 작업에 대응

2. 흐름 요약    
load_dotenv()  
.env 파일에서 dbuser, password, host, port, dbname 등을 로딩  
get_connection()  
PyMySQL로 DB 접속 (UTF-8 설정)  
create_tables()  
CREATE TABLE IF NOT EXISTS LIST_OF_MOBA_INDI / LIST_OF_MOBA_INDI_HISTORY  
테이블이 없으면 생성  
이미 있다면 생략  
DESC LIST_OF_MOBA_INDI (컬럼 목록) → 만약 updated_at 없으면 ALTER TABLE로 추가  
DESC LIST_OF_MOBA_INDI_HISTORY → 만약 changed_at 없으면 ALTER TABLE로 추가  
실행 결과   
콘솔에 “[DB] Added 'updated_at' to LIST_OF_MOBA_INDI” 처럼 뜨면, 새로 컬럼이 추가된 것   
최종 “[DB] Done ensuring…” 메시지로 스키마 동기화 완료   

3. 장점 / 의의  
기존 데이터 보존   
Drop 대신 ALTER TABLE로 부족한 컬럼만 추가하므로, 이미 Insert된 데이터가 없어지지 않음
지속 가능   
앞으로 컬럼이 더 생겨도 비슷한 if col not in existing_cols: ALTER TABLE 로직을 추가 가능  
마이그레이션 툴(Alembic 등)을 쓰지 않아도, 최소한의 자동화가 가능  
협업 환경  
팀원들 중 누가 코드를 실행해도, DB 스키마가 “필요 컬럼”만큼은 자동 업데이트 → 충돌 적음   
“Unknown column …” 같은 오류를 줄여줌   

4. 실제 사용 시 주의점
컬럼이 자주 많이 바뀌면?  
코드가 길어질 수 있음 → 전문 마이그레이션 툴(예: Alembic) 고려  
기존 테이블과 새로 추가되는 컬럼의 default 값, 자료형 등에 유의  
데이터베이스 권한  
ALTER TABLE을 수행하려면 DB 계정에 충분한 권한이 있어야 함(GRANT ALTER ON …)   

결론   
이 코드는 **“기존 테이블과의 충돌을 자동으로 해결”**하면서, **“미래에도 컬럼이 추가될 가능성”**을 염두에 둔 간이 마이그레이션 솔루션입니다.
팀원들은 앞으로 이 코드를 실행하기만 하면, DB 스키마가 필요한 컬럼까지 자동으로 업데이트되므로, 오류 없이 데이터를 Insert/Update/History 관리할 수 있게 됩니다.

In [None]:
import os
import time
import requests
import pymysql
from datetime import datetime
from dotenv import load_dotenv

##########################
# 1) load env variables  #
##########################
load_dotenv()

dbuser = os.getenv('dbuser')
password = os.getenv('password')
host = os.getenv('host')
port = int(os.getenv('port', 3306))
dbname = os.getenv('name')

ALGOLIA_APP_ID = os.getenv("ALGOLIA_APP_ID", "94HE6YATEI")
ALGOLIA_API_KEY = os.getenv("ALGOLIA_API_KEY", "")

##########################
# 2) DB connection       #
##########################
def get_connection():
    conn = pymysql.connect(
        host=host,
        user=dbuser,
        password=password,
        database=dbname,
        port=port,
        charset="utf8mb4"
    )
    return conn

##########################
# 3) CREATE TABLES (조건부 ALTER)
##########################
def create_tables():
    """
    1) If table doesn't exist => CREATE TABLE
    2) If table does exist => DESC ... => if missing columns => ALTER TABLE
    """

    # (A) CREATE TABLE IF NOT EXISTS
    create_current_sql = """
    CREATE TABLE IF NOT EXISTS LIST_OF_MOBA_INDI (
      app_id BIGINT NOT NULL,
      name VARCHAR(255),
      price_us FLOAT,
      releaseYear VARCHAR(10),
      userScore FLOAT,
      updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
      PRIMARY KEY (app_id)
    ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
    """

    create_history_sql = """
    CREATE TABLE IF NOT EXISTS LIST_OF_MOBA_INDI_HISTORY (
      id BIGINT NOT NULL AUTO_INCREMENT,
      app_id BIGINT,
      name VARCHAR(255),
      price_us FLOAT,
      releaseYear VARCHAR(10),
      userScore FLOAT,
      changed_at DATETIME DEFAULT CURRENT_TIMESTAMP,
      PRIMARY KEY (id)
    ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
    """

    conn = get_connection()
    try:
        with conn.cursor() as cur:
            # 1) 테이블 없으면 생성
            cur.execute(create_current_sql)
            cur.execute(create_history_sql)
            conn.commit()

            # 2) LIST_OF_MOBA_INDI: 컬럼 점검
            cur.execute("DESC LIST_OF_MOBA_INDI")
            existing_cols = {row[0] for row in cur.fetchall()}
            if 'updated_at' not in existing_cols:
                alter_sql = """
                ALTER TABLE LIST_OF_MOBA_INDI
                  ADD COLUMN updated_at DATETIME
                    DEFAULT CURRENT_TIMESTAMP
                    ON UPDATE CURRENT_TIMESTAMP
                """
                cur.execute(alter_sql)
                print("[DB] Added 'updated_at' to LIST_OF_MOBA_INDI")
                conn.commit()

            # 3) LIST_OF_MOBA_INDI_HISTORY: 컬럼 점검
            cur.execute("DESC LIST_OF_MOBA_INDI_HISTORY")
            existing_cols_hist = {row[0] for row in cur.fetchall()}
            if 'changed_at' not in existing_cols_hist:
                alter_sql_hist = """
                ALTER TABLE LIST_OF_MOBA_INDI_HISTORY
                  ADD COLUMN changed_at DATETIME
                    DEFAULT CURRENT_TIMESTAMP
                """
                cur.execute(alter_sql_hist)
                print("[DB] Added 'changed_at' to LIST_OF_MOBA_INDI_HISTORY")
                conn.commit()

        print("[DB] Done ensuring LIST_OF_MOBA_INDI & LIST_OF_MOBA_INDI_HISTORY schema.")
    finally:
        conn.close()

##########################
# 4) ALGOLIA Setup       #
##########################
BASE_URL = (
    f"https://{ALGOLIA_APP_ID.lower()}-dsn.algolia.net/1/indexes/*/queries"
    "?x-algolia-agent=Algolia%20for%20JavaScript%20(5.21.0)%3B%20Lite%20(5.21.0)"
    "%3B%20Browser%3B%20instantsearch.js%20(4.78.0)%3B%20JS%20Helper%20(3.24.2)"
    f"&x-algolia-api-key={ALGOLIA_API_KEY}"
    f"&x-algolia-application-id={ALGOLIA_APP_ID}"
)

session = requests.Session()
session.headers.update({
    "Content-Type": "application/json",
    "Accept": "application/json",
    "User-Agent": "Mozilla/5.0",
    "Referer": "https://steamdb.info/instantsearch/",
    "Origin": "https://steamdb.info"
})

##########################
# 5) Crawling Functions  # 
##########################
def fetch_page(page, hits_per_page=50):
    """
    개별 페이지(인디+MOBA+Game) 결과를 Algolia에 요청
    """
    payload = {
        "requests": [
            {
                "indexName": "steamdb",
                "attributesToHighlight": ["name"],
                "attributesToRetrieve": [
                    "lastUpdated",
                    "small_capsule",
                    "name",
                    "price_us",
                    "releaseYear",
                    "userScore"
                ],
                "facetFilters": ["tags:Indie", "tags:MOBA", ["appType:Game"]], #여기 수정하면 태그변경가능
                "facets": [
                    "appType","categories","developer","followers","hardwareCategories",
                    "languages","languagesAudio","languagesSubtitles","multiplayerCategories",
                    "price_us","publisher","releaseYear","reviews","tags","technologies","userScore"
                ],
                "highlightPostTag": "__/ais-highlight__",
                "highlightPreTag": "__ais-highlight__",
                "hitsPerPage": hits_per_page,
                "maxValuesPerFacet": 200,
                "page": page,
                "query": ""
            }
        ]
    }
    try:
        resp = session.post(BASE_URL, json=payload, timeout=10)
        resp.raise_for_status()
        return resp.json()
    except requests.exceptions.RequestException as e:
        print(f"[ERROR] fetch_page({page}) failed: {e}")
        return None

def crawl_all_pages(hits_per_page=100, delay=0.5):
    """
    여러 페이지 순회하며 전체 hits를 수집
    """
    all_hits = []
    page_num = 0
    while True:
        print(f"[INFO] Fetching page={page_num} (hitsPerPage={hits_per_page})")
        data = fetch_page(page_num, hits_per_page=hits_per_page)
        if not data:
            break

        results_array = data.get("results", [])
        if not results_array:
            break

        main_result = results_array[0]
        hits = main_result.get("hits", [])
        if not hits:
            break

        all_hits.extend(hits)
        print(f"[INFO] page={page_num} => {len(hits)} hits (total so far: {len(all_hits)})")

        nb_pages = main_result.get("nbPages", 1)
        page_num += 1
        if page_num >= nb_pages:
            break

        time.sleep(delay)
    return all_hits

##########################
# 6) Upsert + History
##########################
def upsert_game_row(conn, game_data):
    """
    동적 비교 + History Upsert
    columns: [name, price_us, releaseYear, userScore]
    game_data: dict with either 'app_id' or 'objectID'
    """
    columns = ["name", "price_us", "releaseYear", "userScore"]

    # PK
    app_id = game_data.get("app_id") or game_data.get("objectID")
    if not app_id:
        print("[ERROR] No app_id found in data!")
        return

    # new_data dict
    new_data = {}
    for col in columns:
        new_data[col] = game_data.get(col, None)

    with conn.cursor() as cur:
        # 1) SELECT current row from main table
        sel_sql = f"SELECT {', '.join(columns)} FROM LIST_OF_MOBA_INDI WHERE app_id=%s"
        cur.execute(sel_sql, (app_id,))
        row = cur.fetchone()

        if not row:
            # => INSERT
            ins_cols = ["app_id"] + columns + ["updated_at"]
            placeholders = ", ".join(["%s"] * len(ins_cols))
            ins_sql = f"INSERT INTO LIST_OF_MOBA_INDI ({', '.join(ins_cols)}) VALUES ({placeholders})"

            insert_values = [app_id]
            for col in columns:
                insert_values.append(new_data[col])
            insert_values.append(datetime.now())

            cur.execute(ins_sql, insert_values)
            print(f"[INSERT] app_id={app_id}")
        else:
            # => compare old vs new
            old_data = dict(zip(columns, row))
            changed_cols = {}
            for col in columns:
                # convert to string for safer comparison
                if str(old_data[col]) != str(new_data[col]):
                    changed_cols[col] = f"{old_data[col]} -> {new_data[col]}"

            if changed_cols:
                # (A) insert old record into HISTORY table
                hist_cols = ["app_id"] + columns + ["changed_at"]
                hist_sql = f"INSERT INTO LIST_OF_MOBA_INDI_HISTORY ({', '.join(hist_cols)}) VALUES ({', '.join(['%s']*len(hist_cols))})"
                hist_vals = [app_id] + [old_data[c] for c in columns] + [datetime.now()]
                cur.execute(hist_sql, hist_vals)

                # (B) update new row
                set_clause = ", ".join([f"{c}=%s" for c in columns])
                upd_sql = f"UPDATE LIST_OF_MOBA_INDI SET {set_clause}, updated_at=NOW() WHERE app_id=%s"
                upd_vals = [new_data[c] for c in columns] + [app_id]
                cur.execute(upd_sql, upd_vals)

                print(f"[UPDATE] app_id={app_id}, changed={changed_cols}")
            else:
                print(f"[NO CHANGE] app_id={app_id}")

    conn.commit()

##########################
# 7) main()
##########################
def main():
    # 7-1) create or update table schema (조건부 ALTER TABLE)
    create_tables()

    # 7-2) crawl
    hits = crawl_all_pages(hits_per_page=100, delay=0.5)
    print(f"\n[INFO] total crawled: {len(hits)}")

    # 7-3) DB connect
    conn = get_connection()
    try:
        # 7-4) upsert each row
        for h in hits:
            upsert_game_row(conn, h)
        print("[DONE] All hits upserted.")
    finally:
        conn.close()

if __name__ == "__main__":
    main()


In [None]:
# hot release game/ trending game 추가하기 #

import requests
from bs4 import BeautifulSoup

def crawl_trendingfollowers():
    url = "https://steamdb.info/stats/trendingfollowers/?displayOnly=Game&tagid=1718%2C492"

    # 헤더 예시 (User-Agent 등 추가)
    headers = {
        "User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Mobile Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8"
    }

    # 보통 POST form-data나 x-www-form-urlencoded가 필요하면 data=...에 담아야 함
    # 여기서는 content-length=4996이라고 하지만 구체적인 form 파라미터를 모르므로 빈 dict로 시도
    data = {}

    try:
        resp = requests.post(url, headers=headers, data=data, timeout=10)
        resp.raise_for_status()

        # 응답이 text/html 형태라면 resp.text를 파싱
        html = resp.text
        soup = BeautifulSoup(html, "html.parser")

        # 예: 테이블 안에 있는 게임 목록 찾아보기
        table_rows = soup.select("table tbody tr")
        for row in table_rows:
            cols = row.select("td")
            # 각 col에서 텍스트 추출
            # 예시로 첫 번째 col에 게임명, 두 번째 col에 가격, ...
            print([c.get_text(strip=True) for c in cols])

    except requests.exceptions.RequestException as e:
        print(f"[ERROR] {e}")

if __name__ == "__main__":
    crawl_trendingfollowers()


[ERROR] 403 Client Error: Forbidden for url: https://steamdb.info/stats/trendingfollowers/?displayOnly=Game&tagid=1718%2C492


In [4]:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from bs4 import BeautifulSoup

def crawl_trendingfollowers_selenium():
    # 크롤링할 URL (필터: displayOnly=Game, tagid=1718,492)
    url = "https://steamdb.info/stats/trendingfollowers/?displayOnly=Game&tagid=1718,492"
    
    # ChromeOptions 설정 (Headless 모드)
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    # 필요하다면 사용자 에이전트(User-Agent)도 설정 가능
    # options.add_argument("user-agent=Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N)...")
    
    # ChromeDriver 설정
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service, options=options)
    
    try:
        driver.get(url)
        # 페이지 로딩 및 동적 컨텐츠 렌더링 대기 (필요시 시간을 늘리세요)
        time.sleep(5)
        
        # 페이지 소스 가져오기
        html = driver.page_source
        soup = BeautifulSoup(html, "html.parser")
        
        # 예시: 테이블 내 게임 목록 추출
        # (실제 CSS 선택자는 페이지 구조에 따라 달라질 수 있습니다)
        rows = soup.select("table tbody tr")
        print(f"[INFO] Found {len(rows)} rows in the trending table.")
        
        for i, row in enumerate(rows, start=1):
            cols = row.find_all("td")
            data = [col.get_text(strip=True) for col in cols]
            print(f"{i}. {data}")
            
    except Exception as e:
        print(f"[ERROR] {e}")
    finally:
        driver.quit()

if __name__ == "__main__":
    crawl_trendingfollowers_selenium()


[INFO] Found 0 rows in the trending table.


In [None]:
from bs4 import BeautifulSoup

html = """
<tbody>
<tr class="app" data-appid="2444750" data-capsule="02d7e564acfe5b703574a504858f5fe4019c2b92/capsule_231x87.jpg">
<td data-sort="1" class="dt-type-numeric">1.</td>
<td class="applogo">
  <a target="_blank" href="/app/2444750/" tabindex="-1" aria-hidden="true">
    <img src="https://shared.cloudflare.steamstatic.com/store_item_assets/steam/apps/2444750/02d7e564acfe5b703574a504858f5fe4019c2b92/capsule_231x87.jpg" alt="">
  </a>
</td>
<td>
  <a target="_blank" class="b" href="/app/2444750/">Shape of Dreams</a>
  <div class="subinfo">
    <span class="cat">#121 in top wishlisted</span>
  </div>
</td>
<td data-sort="0" class="dt-type-numeric"></td>
<td data-sort="" class="dt-type-numeric">—</td>
<td data-sort="-1" class="dt-type-numeric">—</td>
<td data-sort="1748649599" class="muted dt-type-numeric">May 2025</td>
<td data-sort="12825" class="dt-type-numeric">12,825</td>
<td data-sort="630" class="green dt-type-numeric">+630</td>
</tr>
<tr class="app" data-appid="3267430" data-capsule="86e996f401bb9348d04c61f645095b7175082b66/capsule_231x87.jpg">
<td data-sort="2" class="dt-type-numeric">2.</td>
<td class="applogo">
  <a target="_blank" href="/app/3267430/" tabindex="-1" aria-hidden="true">
    <img src="https://shared.cloudflare.steamstatic.com/store_item_assets/steam/apps/3267430/86e996f401bb9348d04c61f645095b7175082b66/capsule_231x87.jpg" alt="">
  </a>
</td>
<td>
  <a target="_blank" class="b" href="/app/3267430/">Last Remains</a>
  <div class="subinfo">
    <span class="cat">#2653 in top sellers</span>
  </div>
</td>
<td data-sort="0" class="dt-type-numeric"></td>
<td data-sort="2150000" class="dt-type-numeric">₩ 21500</td>
<td data-sort="68.17" class="dt-type-numeric">68.17%</td>
<td data-sort="1739836800" class="dt-type-numeric">Feb 2025</td>
<td data-sort="20247" class="dt-type-numeric">20,247</td>
<td data-sort="238" class="green dt-type-numeric">+238</td>
</tr>
<tr class="app" data-appid="1049590" data-capsule="4f01eaedca464dffc8b589fa4ef67d8cb23529d3/capsule_231x87.jpg">
<td data-sort="3" class="dt-type-numeric">3.</td>
<td class="applogo">
  <a target="_blank" href="/app/1049590/" tabindex="-1" aria-hidden="true">
    <img src="https://shared.cloudflare.steamstatic.com/store_item_assets/steam/apps/1049590/4f01eaedca464dffc8b589fa4ef67d8cb23529d3/capsule_231x87.jpg" alt="">
  </a>
</td>
<td>
  <a target="_blank" class="b" href="/app/1049590/">Eternal Return</a>
  <div class="subinfo">
    <span class="cat">#76 in top sellers</span>
  </div>
</td>
<td data-sort="0" class="dt-type-numeric"></td>
<td data-sort="0" class="dt-type-numeric">Free</td>
<td data-sort="78.62" class="dt-type-numeric">78.62%</td>
<td data-sort="1689811200" class="dt-type-numeric">Jul 2023</td>
<td data-sort="90844" class="dt-type-numeric">90,844</td>
<td data-sort="195" class="green dt-type-numeric">+195</td>
</tr>
</tbody>
"""

soup = BeautifulSoup(html, "lxml")
rows = soup.find_all("tr", class_="app")

for row in rows:
    # AppID는 <tr>의 data-appid 속성에서 추출
    appid = row.get("data-appid")
    # 각 셀(<td>)를 리스트로 추출
    cells = row.find_all("td")
    # 순서대로 각 셀에 해당하는 정보 추출 (인덱스는 HTML 구조에 따름)
    rank = cells[0].get_text(strip=True)            # 1., 2., 3. 등
    # cells[1]는 로고이므로 건너뜁니다.
    # cells[2]: 게임 이름와 링크
    game_name = row.find("a", class_="b").get_text(strip=True)
    # cells[3]은 빈 셀 (무시)
    price = cells[4].get_text(strip=True)             # 가격 (예: "—", "₩ 21500", "Free")
    discount = cells[5].get_text(strip=True)          # 할인율 (예: "—", "68.17%", "78.62%")
    release_date = cells[6].get_text(strip=True)      # 출시일 (예: "May 2025", "Feb 2025", "Jul 2023")
    followers_text = cells[7].get_text(strip=True)    # 팔로워 수 (예: "12,825", "20,247", "90,844")
    # 숫자만 추출 (쉼표 제거)
    try:
        followers = int(followers_text.replace(",", ""))
    except ValueError:
        followers = followers_text
    increase_text = cells[8].get_text(strip=True)     # 7일 증가량 (예: "+630", "+238", "+195")
    if increase_text.startswith('+'):
        increase_text = increase_text[1:]
    try:
        followers_increase = int(increase_text.replace(",", ""))
    except ValueError:
        followers_increase = increase_text

    result = {
        "appid": appid,
        "rank": rank,
        "game_name": game_name,
        "price": price,
        "discount": discount,
        "release_date": release_date,
        "followers": followers,
        "followers_increase": followers_increase
    }
    print(result)


Task exception was never retrieved
future: <Task finished name='Task-8' coro=<Connection.run() done, defined at c:\Users\02105\anaconda3\envs\T1\Lib\site-packages\playwright\_impl\_connection.py:272> exception=NotImplementedError()>
Traceback (most recent call last):
  File "c:\Users\02105\anaconda3\envs\T1\Lib\asyncio\tasks.py", line 314, in __step_run_and_handle_result
    result = coro.send(None)
             ^^^^^^^^^^^^^^^
  File "c:\Users\02105\anaconda3\envs\T1\Lib\site-packages\playwright\_impl\_connection.py", line 279, in run
    await self._transport.connect()
  File "c:\Users\02105\anaconda3\envs\T1\Lib\site-packages\playwright\_impl\_transport.py", line 133, in connect
    raise exc
  File "c:\Users\02105\anaconda3\envs\T1\Lib\site-packages\playwright\_impl\_transport.py", line 120, in connect
    self._proc = await asyncio.create_subprocess_exec(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\02105\anaconda3\envs\T1\Lib\asyncio\subprocess.py", lin

NotImplementedError: 

In [None]:
# 우회 실패!!!!!!!!!!!!!!!!!!!!!!

import requests

# 🔐 STEP 1. 쿠키 수동 설정 (개발자도구 → Application → Cookies 복사)
cookies = {
    "cf_clearance": "kdJWKbxg9zjaJewPkJA.RovRChS577uioOZcNON6yKs-1742904444-1.2.1.1-SkAVMhQ5WOzN.naVeqGLgZqyFYbuQpqdRSDYN32dGiG3gaNaAHvrGskiIMSAT.fsp3u9YCzi3BnhBZqlXp4ROvqKE2x89BuyDAIpZ4ip_0H2NRaCLNnRMERlwdat52M4jQt2VLWNLcy0vkzNw1owYPAGMY.v8Wp1Pmp_LC7IiKXSHjhWYk2ApouaCBDwaeP9xVb2TTx3Ez7Ozrbzz72uhV1Jj49Y06HeZUb5_Tm7aetAO9VcXE_G_Cp2sgDBFmzowdGxqENgT6IF8eSmmlLXWYbs0bvEAT3zCv5Hh0SEmAxOesmpdEB6VQ6psrey_.W9_k66vIcwHiBdsOnXErvuC8GB.9M.aUOSFHQffpG4f.LNyigbw12FBc1HT9FnRJVuvIlbKa_KVsx1m_8VrbUif7qeNs3d.gLszM7K._p2_OI",
    "__Host-steamdb": "7662045-5d16f383bd088efe24f03dbb65c407a17c4de0cd"
}

# 🔧 STEP 2. User-Agent 및 헤더 설정 (실제 브라우저처럼 보이게)
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
}

# 🎯 STEP 3. 요청 보낼 URL
url = "https://steamdb.info/stats/trendingfollowers/?displayOnly=Game&tagid=1718,492"

# 🚀 STEP 4. 요청 보내기
response = requests.get(url, headers=headers, cookies=cookies)

# ✅ STEP 5. 응답 확인
if response.status_code == 200:
    print("[SUCCESS] 정상 응답 수신 ✅")
    # 저장해서 확인해보기
    with open("steamdb_trending.html", "w", encoding="utf-8") as f:
        f.write(response.text)
else:
    print(f"[FAIL] 상태 코드: {response.status_code}")


[FAIL] 상태 코드: 403


In [None]:
# 실패패
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import time
import json

def get_cookies_after_cloudflare():
    url = "https://steamdb.info/stats/trendingfollowers/?displayOnly=Game&tagid=1718,492"

    options = webdriver.ChromeOptions()
    options.add_argument("--start-maximized")  # Headless ❌
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

    try:
        print("[INFO] 브라우저가 열립니다. Cloudflare 인증을 수동으로 통과하세요.")
        driver.get(url)

        input("[WAIT] 인증 통과 후 엔터 누르세요...")

        cookies = driver.get_cookies()
        with open("steamdb_cookies.json", "w", encoding="utf-8") as f:
            json.dump(cookies, f, indent=2)

        print("[✅] 쿠키 저장 완료 → steamdb_cookies.json")
    finally:
        driver.quit()

if __name__ == "__main__":
    get_cookies_after_cloudflare()


[INFO] 브라우저가 열립니다. Cloudflare 인증을 수동으로 통과하세요.
[✅] 쿠키 저장 완료 → steamdb_cookies.json


In [None]:

# save_cookies.py
from playwright.sync_api import sync_playwright
import json

def save_steamdb_cookies():
    url = "https://steamdb.info/stats/trendingfollowers/?displayOnly=Game&tagid=1718,492"

    with sync_playwright() as p:
        browser = p.chromium.launch(headless=False)
        context = browser.new_context()
        page = context.new_page()

        print(f"[INFO] 수동으로 Cloudflare 인증 통과 후 엔터를 누르세요: {url}")
        page.goto(url)
        input("[WAIT] 인증 통과하고 게임 리스트가 보이면 Enter 누르세요...")

        # 쿠키 저장
        cookies = context.cookies()
        with open("steamdb_cookies.json", "w", encoding="utf-8") as f:
            json.dump(cookies, f, indent=2)
        print("[✅] 인증된 쿠키 저장 완료: steamdb_cookies.json")

        browser.close()

if __name__ == "__main__":
    save_steamdb_cookies()

In [None]:
 # crawl_with_cookies.py  못뚫고 ban 당함
from playwright.sync_api import sync_playwright
import json
from bs4 import BeautifulSoup

def crawl_steamdb_with_saved_cookies():
    url = "https://steamdb.info/stats/trendingfollowers/?displayOnly=Game&tagid=1718,492"

    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        context = browser.new_context()

        # 쿠키 로드
        with open("steamdb_cookies.json", "r", encoding="utf-8") as f:
            cookies = json.load(f)
        context.add_cookies(cookies)

        page = context.new_page()
        page.goto(url, timeout=60000)
        page.wait_for_timeout(5000)

        html = page.content()
        soup = BeautifulSoup(html, "html.parser")

        rows = soup.select("table tbody tr")
        print(f"[INFO] 테이블 행 수: {len(rows)}")

        for i, row in enumerate(rows, start=1):
            cols = row.find_all("td")
            data = [col.get_text(strip=True) for col in cols]
            print(f"{i}. {data}")

        page.screenshot(path="result.png")
        browser.close()

if __name__ == "__main__":
    crawl_steamdb_with_saved_cookies()