In [None]:
import requests

url = "https://api.steampowered.com/ISteamApps/GetAppList/v2/"
response = requests.get(url)
data = response.json()
apps = data["applist"]["apps"]
data

In [None]:
appid = 150
details_url = f"https://store.steampowered.com/api/appdetails?appids={appid}"
details_resp = requests.get(details_url)
details_data = details_resp.json()

print(details_data)
if details_data[str(appid)]['success']:
    app_info = details_data[str(appid)]["data"]
    if app_info.get('type') == 'game':
        print('게임입니다')
    else : 
        print('제외대상입니다')

In [None]:
import csv
import time
import random
import requests
from bs4 import BeautifulSoup

def get_korean_store_info(appid):
    url = f"https://store.steampowered.com/app/{appid}/?l=korean"
    try:
        resp = requests.get(url, timeout=10)
        resp.raise_for_status()
        soup = BeautifulSoup(resp.text, 'html.parser')
        
        # 이름
        name_kr = soup.find("div", {"id": "appHubAppName"}).text.strip()

        # 설명
        desc_tag = soup.find("div", {"class": "game_description_snippet"})
        desc_kr = desc_tag.text.strip() if desc_tag else ""

        return name_kr, desc_kr

    except Exception as e:
        print(f"[WARN] 한국어 크롤링 실패 - appid {appid}: {e}")
        return "", ""


# ✅ 스팀 전체 AppID 가져오기
list_url = "https://api.steampowered.com/ISteamApps/GetAppList/v2/"
resp = requests.get(list_url)
app_list_data = resp.json()
apps = app_list_data["applist"]["apps"]
print(f"전체 앱ID 수: {len(apps)}")

fail_log = []
count = 0

with open('Steam_games_full_korean.csv', 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['appid', 'name', 'name_kr', 'genres', 'release_date', 'is_coming_soon',
                  'initial_price', 'final_price', 'discount_percent', 'description', 'description_kr']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for app in apps:
        appid = app['appid']
        try:
            # ① AppDetails API
            url = f"https://store.steampowered.com/api/appdetails?appids={appid}"
            resp = requests.get(url, timeout=10)
            parsing = resp.json()

            if not parsing.get(str(appid), {}).get('success'):
                continue

            info = parsing[str(appid)]['data']

            # ② 필터: 타입이 'game' 인가
            if info.get('type') != 'game':
                continue

            # ③ 가격 정보
            price_info = info.get("price_overview", {})
            initial_price = price_info.get("initial", 0) / 100
            final_price = price_info.get("final", 0) / 100
            discount = price_info.get("discount_percent", 0)

            # ④ 출시일
            release_date_info = info.get("release_date", {})
            release_date = release_date_info.get("date", "")
            is_coming_soon = release_date_info.get("coming_soon", False)

            # ⑤ 장르
            genres_list = [g['description'] for g in info.get('genres', [])]
            genres = ', '.join(genres_list)

            # ⑥ 영어 정보
            name = info.get('name', '')
            description = info.get('short_description', '')

            # ⑦ 한국어 크롤링
            name_kr, description_kr = get_korean_store_info(appid)

            # ⑧ CSV 저장
            writer.writerow({
                'appid': appid,
                'name': name,
                'name_kr': name_kr,
                'genres': genres,
                'release_date': release_date,
                'is_coming_soon': is_coming_soon,
                'initial_price': initial_price,
                'final_price': final_price,
                'discount_percent': discount,
                'description': description,
                'description_kr': description_kr
            })

            count += 1
            if count % 100 == 0:
                csvfile.flush()
                print(f"✅ {count} games saved so far.")

            # ⑨ 서버 부하 분산
            time.sleep(random.uniform(0.5, 1.5))

        except Exception as e:
            print(f"❌ Error for {appid}: {e}")
            fail_log.append(appid)

# 실패 로그 저장
with open('fail_log.txt', 'w') as f:
    for fid in fail_log:
        f.write(str(fid) + '\n')

print("✅ All done!")


In [1]:
import csv
import time
import random
import requests
from bs4 import BeautifulSoup

# ---------- 설정 ----------
FIELDNAMES = ['appid', 'name', 'name_kr', 'genres', 'release_date',
               'is_coming_soon', 'initial_price', 'final_price',
               'discount_percent', 'description', 'description_kr']

FAIL_LOG_FILE = 'fail_log.txt'
CSV_FILE = 'Steam_games_full_korean.csv'

# ---------- Helper ----------
def get_korean_store_info(appid):
    url = f"https://store.steampowered.com/app/{appid}/?l=korean"
    try:
        resp = requests.get(url, timeout=10)
        resp.raise_for_status()
        soup = BeautifulSoup(resp.text, 'html.parser')

        name_kr_tag = soup.find("div", {"id": "appHubAppName"})
        name_kr = name_kr_tag.text.strip() if name_kr_tag else ""

        desc_tag = soup.find("div", {"class": "game_description_snippet"})
        desc_kr = desc_tag.text.strip() if desc_tag else ""

        return name_kr, desc_kr
    except Exception as e:
        print(f"[WARN] 한국어 크롤링 실패 - appid {appid}: {e}")
        return "", ""

def load_id_file(filename):
    ids = set()
    try:
        with open(filename, 'r') as f:
            for line in f:
                line = line.strip()
                if line.isdigit():
                    ids.add(int(line))
    except FileNotFoundError:
        pass
    return ids

def append_to_fail_log(appid):
    with open(FAIL_LOG_FILE, 'a') as f:
        f.write(f"{appid}\n")

# ---------- 1. 완료된 appid 읽기 ----------
saved_ids = set()
try:
    with open(CSV_FILE, newline='', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            raw = row.get('appid', '').strip().replace('"', '')
            if raw.isdigit():
                saved_ids.add(int(raw))
except FileNotFoundError:
    pass

print(f"✅ 이미 저장된 게임 수: {len(saved_ids)}")

# ---------- 2. 실패했던 appid 읽기 ----------
failed_ids = load_id_file(FAIL_LOG_FILE)
print(f"✅ 이전 실패 AppID 수: {len(failed_ids)}")

# ---------- 3. Steam 전체 AppID 가져오기 ----------
list_url = "https://api.steampowered.com/ISteamApps/GetAppList/v2/"
resp = requests.get(list_url)
app_list_data = resp.json()
apps = app_list_data["applist"]["apps"]
print(f"✅ 전체 AppID 수: {len(apps)}")

# ---------- 4. 필터링 ----------
to_process_apps = [
    app for app in apps
    if app['appid'] not in saved_ids
    and app['appid'] not in failed_ids
]
print(f"✅ 이번에 새로 크롤링할 AppID 수: {len(to_process_apps)}")

# ---------- 5. 크롤링 ----------
count = 0

with open(CSV_FILE, 'a', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=FIELDNAMES)

    # 새 파일이면 헤더 쓰기
    if csvfile.tell() == 0:
        writer.writeheader()

    for app in to_process_apps:
        appid = app['appid']
        try:
            # AppDetails API
            url = f"https://store.steampowered.com/api/appdetails?appids={appid}"
            resp = requests.get(url, timeout=10)
            parsing = resp.json()

            # 안전한 응답 체크
            result = parsing.get(str(appid))
            if not result or not result.get('success'):
                print(f"❌ API 응답 실패 for {appid}")
                append_to_fail_log(appid)
                continue

            info = result.get('data')
            if not info:
                print(f"❌ API 데이터 None for {appid}")
                append_to_fail_log(appid)
                continue

            if info.get('type') != 'game':
                print(f"❌ 타입이 'game'이 아님: {appid} → 즉시 실패 로그에 기록")
                append_to_fail_log(appid)
                continue

            # 가격 정보
            price_info = info.get("price_overview", {})
            initial_price = price_info.get("initial", 0) / 100
            final_price = price_info.get("final", 0) / 100
            discount = price_info.get("discount_percent", 0)

            # 출시일
            release_date_info = info.get("release_date", {})
            release_date = release_date_info.get("date", "")
            is_coming_soon = release_date_info.get("coming_soon", False)

            # 장르
            genres_list = [g['description'] for g in info.get('genres', [])]
            genres = ', '.join(genres_list)

            # 영어 정보
            name = info.get('name', '')
            description = info.get('short_description', '')

            # 한국어 페이지 크롤링
            name_kr, description_kr = get_korean_store_info(appid)

            # CSV 저장
            writer.writerow({
                'appid': appid,
                'name': name,
                'name_kr': name_kr,
                'genres': genres,
                'release_date': release_date,
                'is_coming_soon': is_coming_soon,
                'initial_price': initial_price,
                'final_price': final_price,
                'discount_percent': discount,
                'description': description,
                'description_kr': description_kr
            })

            count += 1
            if count % 100 == 0:
                csvfile.flush()
                print(f"✅ {count} games saved so far.")

            # 서버 부하 분산
            time.sleep(random.uniform(0.5, 1.5))

        except Exception as e:
            print(f"❌ Error for {appid}: {e}")
            append_to_fail_log(appid)

print(f"✅ 크롤링 완료! 이번 세션에서 저장한 게임 수: {count}")


✅ 이미 저장된 게임 수: 5703
✅ 이전 실패 AppID 수: 20532
✅ 전체 AppID 수: 256704
✅ 이번에 새로 크롤링할 AppID 수: 230469
❌ 타입이 'game'이 아님: 497890 → 즉시 실패 로그에 기록
❌ API 응답 실패 for 497900
❌ 타입이 'game'이 아님: 497910 → 즉시 실패 로그에 기록
❌ 타입이 'game'이 아님: 497930 → 즉시 실패 로그에 기록
❌ API 응답 실패 for 497960
❌ 타입이 'game'이 아님: 497970 → 즉시 실패 로그에 기록
❌ API 응답 실패 for 497990
❌ 타입이 'game'이 아님: 498010 → 즉시 실패 로그에 기록
❌ API 응답 실패 for 498020
❌ API 응답 실패 for 498100
❌ API 응답 실패 for 498101
❌ API 응답 실패 for 498102
❌ API 응답 실패 for 498140
❌ API 응답 실패 for 498141
❌ API 응답 실패 for 498142
❌ 타입이 'game'이 아님: 498200 → 즉시 실패 로그에 기록
❌ API 응답 실패 for 498220
❌ API 응답 실패 for 498221
❌ API 응답 실패 for 498222
❌ API 응답 실패 for 498223
❌ 타입이 'game'이 아님: 498290 → 즉시 실패 로그에 기록
❌ API 응답 실패 for 498390
❌ 타입이 'game'이 아님: 498400 → 즉시 실패 로그에 기록
❌ 타입이 'game'이 아님: 498410 → 즉시 실패 로그에 기록
❌ 타입이 'game'이 아님: 498420 → 즉시 실패 로그에 기록
❌ 타입이 'game'이 아님: 498520 → 즉시 실패 로그에 기록
❌ 타입이 'game'이 아님: 498540 → 즉시 실패 로그에 기록
❌ 타입이 'game'이 아님: 498590 → 즉시 실패 로그에 기록
❌ 타입이 'game'이 아님: 498640 → 즉시 실패 로그에 기록
❌

KeyboardInterrupt: 

In [4]:
df = pd.read_csv('Steam_games_full_korean.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7331 entries, 0 to 7330
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   appid             7331 non-null   object
 1   name              7331 non-null   object
 2   name_kr           7330 non-null   object
 3   genres            7307 non-null   object
 4   release_date      7286 non-null   object
 5   is_coming_soon    7331 non-null   object
 6   initial_price     7331 non-null   object
 7   final_price       7331 non-null   object
 8   discount_percent  7331 non-null   object
 9   description       7323 non-null   object
 10  description_kr    7322 non-null   object
dtypes: object(11)
memory usage: 630.1+ KB


In [1]:
import pandas as pd
import requests
import time
import os

INPUT_CSV = "Steam_games_full_korean.csv"
OUTPUT_CSV = "스팀게임파일.csv"
API_DELAY_SEC = 1

def fetch_steam_data(appid, delay=API_DELAY_SEC):
    """
    Steam Storefront API에서 카테고리, 설명(짧은/본문), 스크린샷을 가져온다 (한국어)
    """
    url = f"https://store.steampowered.com/api/appdetails?appids={appid}&cc=kr&l=koreana"
    try:
        resp = requests.get(url, timeout=5)
        time.sleep(delay)
        if resp.status_code != 200:
            print(f"[ERROR] HTTP {resp.status_code} for appid {appid}")
            return None

        data = resp.json()
        if not data.get(str(appid), {}).get('success'):
            print(f"[ERROR] No success flag for appid {appid}")
            return None

        app_data = data[str(appid)]['data']

        # 카테고리
        categories = [c['description'] for c in app_data.get('categories', [])]
        categories_str = '|'.join(categories) if categories else None

        # 한국어 짧은 설명
        short_desc = app_data.get('short_description')

        # 본문 상세 설명
        detailed_desc = app_data.get('detailed_description')

        # 스크린샷
        screenshots = [s['path_full'] for s in app_data.get('screenshots', [])]
        screenshots_str = '|'.join(screenshots) if screenshots else None

        return {
            "category": categories_str,
            "description_kr": short_desc,
            "new_description": detailed_desc,
            "screenshots": screenshots_str
        }

    except Exception as e:
        print(f"[EXCEPTION] {appid}: {e}")
        return None


def main():
    print("\n=== CSV Enrichment Script Start ===")

    # ① 입력 CSV 읽기
    print(f"Loading input: {INPUT_CSV}")
    df_all = pd.read_csv(INPUT_CSV)
    print(f"Total rows in input: {len(df_all)}")

    # ② 기존 출력 CSV가 있으면 → 이미 처리된 appid 추출
    if os.path.exists(OUTPUT_CSV):
        df_done = pd.read_csv(OUTPUT_CSV)
        done_appids = set(df_done['appid'].astype(str))
        print(f"[INFO] Found existing output: {len(done_appids)} rows already processed.")
    else:
        done_appids = set()
        print("[INFO] No existing output found. Starting fresh.")

    # ③ 처리할 대상 필터링
    df_pending = df_all[~df_all['appid'].astype(str).isin(done_appids)]
    print(f"[INFO] Remaining to process: {len(df_pending)} rows.")

    if df_pending.empty:
        print("[DONE] Nothing to process!")
        return

    # ④ 메인 처리 루프
    for idx, row in df_pending.iterrows():
        appid = str(row['appid'])
        print(f"\n--- Processing appid: {appid} ---")

        # Steam API 호출
        result = fetch_steam_data(appid)
        if result is None:
            result = {
                "category": None,
                "description_kr": None,
                "new_description": None,
                "screenshots": None
            }

        # 기존에서 제거할 컬럼 제거
        columns_to_drop = ['name', 'final_price', 'discount_percent', 'description']
        for col in columns_to_drop:
            if col in row:
                row = row.drop(labels=[col])

        new_row = row.to_dict()

        # 새 필드 추가
        new_row.update(result)

        # ⑤ 한 줄 DataFrame → 바로 저장
        new_df = pd.DataFrame([new_row])
        if os.path.exists(OUTPUT_CSV):
            new_df.to_csv(OUTPUT_CSV, mode='a', header=False, index=False)
        else:
            new_df.to_csv(OUTPUT_CSV, mode='w', header=True, index=False)

        print(f"[SAVED] appid {appid}")

    print("\n=== All done! ===")


if __name__ == "__main__":
    main()



=== CSV Enrichment Script Start ===
Loading input: Steam_games_full_korean.csv
Total rows in input: 7331
[INFO] Found existing output: 1015 rows already processed.
[INFO] Remaining to process: 6266 rows.

--- Processing appid: 278080 ---
[SAVED] appid 278080

--- Processing appid: 278100 ---
[SAVED] appid 278100

--- Processing appid: 278190 ---
[SAVED] appid 278190

--- Processing appid: 278360 ---
[SAVED] appid 278360

--- Processing appid: 278420 ---
[SAVED] appid 278420

--- Processing appid: 278440 ---
[SAVED] appid 278440

--- Processing appid: 278460 ---
[SAVED] appid 278460

--- Processing appid: 278490 ---
[SAVED] appid 278490

--- Processing appid: 278510 ---
[SAVED] appid 278510

--- Processing appid: 278530 ---
[SAVED] appid 278530

--- Processing appid: 278570 ---
[SAVED] appid 278570

--- Processing appid: 278590 ---
[SAVED] appid 278590

--- Processing appid: 278620 ---
[SAVED] appid 278620

--- Processing appid: 278640 ---
[SAVED] appid 278640

--- Processing appid: 27

KeyboardInterrupt: 