In [1]:
import requests
import pandas as pd

def get_all_steam_apps():
    url = "https://api.steampowered.com/ISteamApps/GetAppList/v2/"
    response = requests.get(url)
    app_list = response.json()['applist']['apps']
    df_apps = pd.DataFrame(app_list)
    return df_apps

df_apps = get_all_steam_apps()
df_apps.to_csv("steam_applist.csv", index=False)
df_apps.head()

Unnamed: 0,appid,name
0,2006441,
1,2005791,
2,216938,Pieterw test app76 ( 216938 )
3,660010,test2
4,660130,test3


In [5]:
import time

def get_app_details(appid):
    url = f"https://store.steampowered.com/api/appdetails?appids={appid}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if data[str(appid)]['success']:
            return data[str(appid)]['data']
    return None

# 테스트: 440번은 Team Fortress 2
app_detail = get_app_details(440)
print(app_detail['name'], app_detail['type'], app_detail['release_date']['date'])


Team Fortress 2 game 10. Okt. 2007


In [7]:
appids = df_apps['appid'].tolist()[:100]  # 우선 100개만 테스트

data_list = []

for appid in appids:
    details = get_app_details(appid)
    if details and details.get('type') == 'game':
        data_list.append({
            'appid': appid,
            'name': details.get('name'),
            'release_date': details.get('release_date', {}).get('date'),
            'is_free': details.get('is_free'),
            'type': details.get('type'),
            'genres': details.get('genres'),
            'price_overview': details.get('price_overview'),
        })
    time.sleep(0.2)  # 서버 부하 방지

df_details = pd.DataFrame(data_list)
df_details.to_csv("steam_app_details_sample.csv", index=False)
df_details.head()

Unnamed: 0,appid,name,release_date,is_free,type,genres,price_overview
0,835110,America's Retribution,"21 Apr, 2018",False,game,"[{'id': '1', 'description': 'Action'}, {'id': ...","{'currency': 'KRW', 'initial': 440000, 'final'..."
1,835130,Last Stanza,"9 Nov, 2018",False,game,"[{'id': '4', 'description': 'Casual'}, {'id': ...","{'currency': 'KRW', 'initial': 890000, 'final'..."
2,835040,美少女夏日欢乐!,"24 May, 2018",False,game,"[{'id': '71', 'description': 'Sexual Content'}...","{'currency': 'KRW', 'initial': 198000, 'final'..."
3,834880,Acro FS,"23 May, 2018",True,game,"[{'id': '28', 'description': 'Simulation'}, {'...",
4,834910,ATLAS,"22 Dec, 2018",False,game,"[{'id': '1', 'description': 'Action'}, {'id': ...","{'currency': 'KRW', 'initial': 3100000, 'final..."


In [9]:
df_details.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36 entries, 0 to 35
Data columns (total 7 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   appid           36 non-null     int64 
 1   name            36 non-null     object
 2   release_date    36 non-null     object
 3   is_free         36 non-null     bool  
 4   type            36 non-null     object
 5   genres          36 non-null     object
 6   price_overview  28 non-null     object
dtypes: bool(1), int64(1), object(5)
memory usage: 1.9+ KB


In [13]:
df_details.describe()

Unnamed: 0,appid
count,36.0
mean,834419.722222
std,418.715629
min,833810.0
25%,833987.5
50%,834430.0
75%,834767.5
max,835130.0


In [17]:
df_details.dtypes

appid              int64
name              object
release_date      object
is_free             bool
type              object
genres            object
price_overview    object
dtype: object

In [19]:
df_details.count()

appid             36
name              36
release_date      36
is_free           36
type              36
genres            36
price_overview    28
dtype: int64

In [21]:
df_details

Unnamed: 0,appid,name,release_date,is_free,type,genres,price_overview
0,835110,America's Retribution,"21 Apr, 2018",False,game,"[{'id': '1', 'description': 'Action'}, {'id': ...","{'currency': 'KRW', 'initial': 440000, 'final'..."
1,835130,Last Stanza,"9 Nov, 2018",False,game,"[{'id': '4', 'description': 'Casual'}, {'id': ...","{'currency': 'KRW', 'initial': 890000, 'final'..."
2,835040,美少女夏日欢乐!,"24 May, 2018",False,game,"[{'id': '71', 'description': 'Sexual Content'}...","{'currency': 'KRW', 'initial': 198000, 'final'..."
3,834880,Acro FS,"23 May, 2018",True,game,"[{'id': '28', 'description': 'Simulation'}, {'...",
4,834910,ATLAS,"22 Dec, 2018",False,game,"[{'id': '1', 'description': 'Action'}, {'id': ...","{'currency': 'KRW', 'initial': 3100000, 'final..."
5,834920,Operation Osama Bin Laden,"24 May, 2021",False,game,"[{'id': '1', 'description': 'Action'}, {'id': ...","{'currency': 'KRW', 'initial': 220000, 'final'..."
6,834930,Bishi and the Alien Slime Invasion!,"23 Apr, 2018",True,game,"[{'id': '1', 'description': 'Action'}, {'id': ...",
7,834730,Crazy Justice,Coming soon,True,game,"[{'id': '1', 'description': 'Action'}, {'id': ...",
8,834740,Circle Empires,"8 Aug, 2018",False,game,"[{'id': '4', 'description': 'Casual'}, {'id': ...","{'currency': 'KRW', 'initial': 990000, 'final'..."
9,834750,Crucible Falls: Together Forever,"21 Apr, 2018",False,game,"[{'id': '25', 'description': 'Adventure'}]","{'currency': 'KRW', 'initial': 550000, 'final'..."


In [23]:
import requests
import pandas as pd
import time

def get_app_details(appid):
    url = f"https://store.steampowered.com/api/appdetails?appids={appid}"
    try:
        response = requests.get(url, timeout=5)
        if response.status_code == 200:
            data = response.json()
            if data.get(str(appid), {}).get('success'):
                return data[str(appid)]['data']
    except Exception as e:
        print(f"[ERROR] appid {appid} - {e}")
    return None

def collect_game_data(df_apps, target_count=1000, save_interval=50):
    collected = []
    appids = df_apps['appid'].tolist()
    checked = 0

    for appid in appids:
        details = get_app_details(appid)
        checked += 1

        if details and details.get('type') == 'game':
            collected.append({
                'appid': appid,
                'name': details.get('name'),
                'release_date': details.get('release_date', {}).get('date'),
                'is_free': details.get('is_free'),
                'type': details.get('type'),
                'genres': details.get('genres'),
                'price_overview': details.get('price_overview'),
            })

            # 중간 저장
            if len(collected) % save_interval == 0:
                df_partial = pd.DataFrame(collected)
                df_partial.to_csv("steam_game_data_partial.csv", index=False)
                print(f"[INFO] Collected {len(collected)} valid games after checking {checked} apps")

            # 목표 수량 도달
            if len(collected) >= target_count:
                break

        # 요청 속도 제한
        time.sleep(0.2)

    df_result = pd.DataFrame(collected)
    df_result.to_csv("steam_game_data_final.csv", index=False)
    print(f"[DONE] Final saved: {len(df_result)} games from {checked} appids checked")
    return df_result


In [25]:
# 예: 목표 1000개, 50개마다 저장
df_result = collect_game_data(df_apps, target_count=1000, save_interval=50)

# 수집 데이터 미리보기
df_result.head()

[INFO] Collected 50 valid games after checking 120 apps
[INFO] Collected 100 valid games after checking 185 apps
[INFO] Collected 150 valid games after checking 677 apps
[INFO] Collected 200 valid games after checking 770 apps
[INFO] Collected 250 valid games after checking 845 apps
[INFO] Collected 300 valid games after checking 1228 apps
[INFO] Collected 350 valid games after checking 1301 apps
[INFO] Collected 400 valid games after checking 1396 apps
[INFO] Collected 450 valid games after checking 1861 apps
[INFO] Collected 500 valid games after checking 1945 apps


KeyboardInterrupt: 

In [27]:
df_result.head()

NameError: name 'df_result' is not defined