## 코드 설명  
  
웹크롤링, 스팀API Key, steam 라이브러리 이용
  
- input: 게임이름(영어) 리스트  
- output: 게임이름, 게임ID, 게임장르, 게임카테고리, 게임출시일 데이터셋  
  
#### URL = 'https://store.steampowered.com/search/results/?query&start='+str(start)+'&count=50&sort_by=Released_DESC&snr=1_7_7_popularnew_7&filter=popularnew&os=win&infinite=1'  

- URL 부분을 저희가 원하는 순서대로(인기순위) 바꾼다면 해당 코드 사용 가능  
  

### 인기게임 이름 가져오기

In [1]:
import requests
from bs4 import BeautifulSoup

**한국어 게임이름 리스트는 해당부분으로 수정하여 사용할 수 있으나 이후 정보를 가져올 수 없기 때문에 영어리스트 이용함**   
res = requests.get(URL, cookies={
        'Steam_Language':'koreana'
    })

In [28]:
result = []
rank_length = 200
# 50: 숨겨진 리스트가 50개씩 출력되기 때문
for start in range(0, rank_length, 50):
    URL = 'https://store.steampowered.com/search/results/?query&start='+str(start)+'&count=50&sort_by=Released_DESC&snr=1_7_7_popularnew_7&filter=popularnew&os=win&infinite=1'
    res = requests.get(URL)
    json_data = res.json()
    soup = BeautifulSoup(json_data['results_html'], 'html.parser')
    # steam = soup.select_one('#search_resultsRows')
    titles = soup.select('.col.search_name.ellipsis .title')
    
    for title in titles:
        result.append(title.text)


In [None]:
result[:20]

### 데이터셋 가져오기

In [3]:
import os, json

from dotenv import load_dotenv
from steam import Steam
from datetime import datetime

from tqdm import tqdm

In [4]:
load_dotenv('.env')
KEY = os.getenv('STEAM_API_KEY')
steam = Steam(KEY)

In [29]:
%%time
cat_dict = {}
gen_dict = {}
data_list = []

for game_name in tqdm(result):
    # 데이터 정보가 없는 경우도 있음
    try:
        # 게임ID 가져오기
        game_id = str(steam.apps.search_games(game_name)['apps'][0]['id'])
        # 게임정보 가져오기
        game_info = steam.apps.get_app_details(game_id)
        g_info = json.loads(game_info)
        # 해당 게임의 장르리스트 가져오기
        gens_list= []
        gens = g_info[game_id]['data']['genres']
        for gen in gens:
            gen_name = gen['description']
            gens_list.append(gen_name)

            # 전체 게임 장르 카운트
            key = gen['description']
            try:
                gen_dict[key] += 1
            except:
                gen_dict[key] = 1

        cats = g_info[game_id]['data']['categories']
        
        cats_list= []
        
        for cat in cats:
            cat_name = cat['description']
            cats_list.append(cat_name)
            try:
                cat_dict[cat_name] += 1
            except:
                cat_dict[cat_name] = 1


        # 출시일 정보 가져오기
        g_date_str = g_info[game_id]['data']['release_date']['date']

        # date타입으로 변환
        g_date = datetime.strptime(g_date_str,'%d %b, %Y')

        # 우리가 원하는 date 시작, 끝 구간
        d1 = datetime(2018, 6, 1)
        d2 = datetime(2023, 6, 30)

        # 그 사이에 있는 날짜
        if (g_date-d1).days >0 and (g_date-d2).days < 0:
            # 자료
            record = (game_name, game_id, gens_list, cats_list, g_date)

            data_list.append(record)
        
        
        
    except:
        pass

100%|████████████████████████████████████████████████████████████████████████████████| 200/200 [04:08<00:00,  1.24s/it]

CPU times: total: 21.1 s
Wall time: 4min 8s





In [27]:
len(data_list)

87

In [22]:
cat_dict

{'Single-player': 78,
 'Steam Achievements': 59,
 'Full controller support': 43,
 'Steam Cloud': 52,
 'Steam Trading Cards': 15,
 'Downloadable Content': 1,
 'Steam Workshop': 6,
 'Multi-player': 30,
 'Co-op': 23,
 'Online Co-op': 22,
 'Stats': 3,
 'Steam Leaderboards': 4,
 'PvP': 18,
 'Online PvP': 18,
 'Shared/Split Screen PvP': 6,
 'Shared/Split Screen Co-op': 3,
 'Shared/Split Screen': 6,
 'VR Supported': 2,
 'Remote Play Together': 4,
 'MMO': 5,
 'Partial Controller Support': 14,
 'Cross-Platform Multiplayer': 8,
 'In-App Purchases': 18,
 'Includes level editor': 2,
 'Remote Play on Phone': 2,
 'Remote Play on Tablet': 2,
 'Remote Play on TV': 3,
 'LAN PvP': 1,
 'Captions available': 2,
 'Tracked Controller Support': 2,
 'LAN Co-op': 2}

### 데이터셋으로 데이터프레임 만들기

In [25]:
import pandas as pd

In [23]:
column_name = ['game_name','game_id','game_genres','game_category','game_rel_d']

In [26]:
result_df = pd.DataFrame(data_list, columns = column_name)
result_df

Unnamed: 0,game_name,game_id,game_genres,game_category,game_rel_d
0,Ghost Trick: Phantom Detective,1967430,"[Action, Adventure]","[Single-player, Steam Achievements, Full contr...",2023-06-29
1,DAVE THE DIVER Digital Extra,2492320,"[Adventure, Casual, Indie, RPG, Simulation]","[Single-player, Downloadable Content, Full con...",2023-06-28
2,DAVE THE DIVER,1868140,"[Adventure, Casual, Indie, RPG, Simulation]","[Single-player, Steam Achievements, Full contr...",2023-06-28
3,Shogun Showdown,2084000,"[Indie, RPG, Strategy, Early Access]","[Single-player, Steam Achievements, Steam Cloud]",2023-06-27
4,Kingdom Eighties,1956040,"[Action, Strategy]","[Single-player, Steam Achievements, Full contr...",2023-06-26
...,...,...,...,...,...
82,CRISIS CORE –FINAL FANTASY VII– REUNION,1608070,"[Action, RPG]","[Single-player, Steam Achievements, Full contr...",2022-12-13
83,PROJECT: PLAYTIME,1961460,"[Action, Free to Play, Indie, Early Access]","[Multi-player, Co-op, Online Co-op, In-App Pur...",2022-12-12
84,Chained Echoes,1229240,"[Indie, RPG]","[Single-player, Steam Achievements, Full contr...",2022-12-08
85,Knights of Honor II: Sovereign,736820,"[Simulation, Strategy]","[Single-player, Multi-player, PvP, Online PvP,...",2022-12-06
