## 从IGDB数据库爬取游戏种类和发行时间

In [None]:
## 提取所有游戏的列表
import pandas as pd

file_path = "Twitch_game_data.csv"
df = pd.read_csv(file_path, encoding='ISO-8859-1')
unique_games = df["Game"].unique()

unique_games_df = pd.DataFrame(unique_games, columns=["Game"])
unique_games_path = "unique_games.csv"
unique_games_df.to_csv(unique_games_path, index=False, encoding='ISO-8859-1')

print(f"Extracted {len(unique_games)} unique games. Saved to {unique_games_path}.")

Extracted 2360 unique games. Saved to unique_games.csv.


In [4]:
import requests
import pandas as pd
import time

# 你的 IGDB API 凭据
CLIENT_ID = "e1cvubjaunqp8g8rhyxmuog7ld4tsy"  # 这里替换成你的 Client ID
CLIENT_SECRET = "tnettt8ysiuybxxi1g8dlkzlwbzice"  # 这里替换成你的 Client Secret

# 获取 OAuth 令牌
def get_access_token():
    url = "https://id.twitch.tv/oauth2/token"
    params = {
        "client_id": CLIENT_ID,
        "client_secret": CLIENT_SECRET,
        "grant_type": "client_credentials"
    }
    response = requests.post(url, params=params)
    response.raise_for_status()
    return response.json()["access_token"]

# 查询 IGDB API 获取游戏分类和上市日期
def get_game_info(access_token, game_name):
    url = "https://api.igdb.com/v4/games"
    headers = {
        "Client-ID": CLIENT_ID,
        "Authorization": f"Bearer {access_token}",
        "Accept": "application/json"
    }
    data = f'fields name, genres.name, first_release_date; where name ~ "{game_name}"*; limit 1;'
    response = requests.post(url, headers=headers, data=data)
    response.raise_for_status()
    results = response.json()
    
    if results:
        game = results[0]
        genres = game.get("genres", [])
        genre_names = ", ".join([genre["name"] for genre in genres]) if genres else "Unknown"
        release_date = game.get("first_release_date")
        release_year_month = time.strftime('%Y-%m', time.gmtime(release_date)) if release_date else "Unknown"
        return genre_names, release_year_month
    return "Unknown", "Unknown"

# 读取唯一游戏列表
unique_games_path = "unique_games.csv"
df_games = pd.read_csv(unique_games_path, encoding='ISO-8859-1')

# 获取访问令牌
access_token = get_access_token()

# 创建游戏信息字典
game_info_dict = {}
for game in df_games["Game"].tolist():
    genre, release_date = get_game_info(access_token, game)
    game_info_dict[game] = {"Genre": genre, "Release Date": release_date}

# 生成包含游戏信息的 DataFrame
df_game_info = pd.DataFrame.from_dict(game_info_dict, orient='index').reset_index()
df_game_info.columns = ["Game", "Genre", "Release Date"]

# 保存更新后的游戏信息文件
output_path = "game_info.csv"
df_game_info.to_csv(output_path, index=False, encoding='ISO-8859-1')
print(f"Updated file saved: {output_path}")

Updated file saved: game_info.csv


In [None]:
import requests
import pandas as pd
import time
import re
from tqdm import tqdm

# IGDB API 凭据
CLIENT_ID = "e1cvubjaunqp8g8rhyxmuog7ld4tsy"
CLIENT_SECRET = "tnettt8ysiuybxxi1g8dlkzlwbzice"

# 获取 OAuth 令牌
def get_access_token():
    url = "https://id.twitch.tv/oauth2/token"
    params = {
        "client_id": CLIENT_ID,
        "client_secret": CLIENT_SECRET,
        "grant_type": "client_credentials"
    }
    response = requests.post(url, params=params)
    response.raise_for_status()
    return response.json()["access_token"]

# 查询 IGDB API 获取游戏信息
def get_game_info(access_token, game_name):
    url = "https://api.igdb.com/v4/games"
    headers = {
        "Client-ID": CLIENT_ID,
        "Authorization": f"Bearer {access_token}",
        "Accept": "application/json"
    }
    
    # 首先尝试精确匹配
    data = f'fields name, genres.name, first_release_date; where name = "{game_name}"; limit 5;'
    response = requests.post(url, headers=headers, data=data)
    response.raise_for_status()
    results = response.json()
    
    # 如果没有精确匹配结果，使用搜索功能
    if not results:
        data = f'fields name, genres.name, first_release_date; search "{game_name}"; limit 5;'
        response = requests.post(url, headers=headers, data=data)
        response.raise_for_status()
        results = response.json()
    
    if results:
        # 输出所有返回的结果，用于调试
        print(f"\n游戏名称: {game_name} 找到 {len(results)} 个结果:")
        for i, game in enumerate(results):
            release_date = game.get("first_release_date")
            release_str = time.strftime('%Y-%m', time.gmtime(release_date)) if release_date else "Unknown"
            print(f"  {i+1}. {game['name']} - 发布日期: {release_str}")
        
        # 保留第一个结果作为最佳匹配
        game = results[0]
        genres = game.get("genres", [])
        genre_names = ", ".join([genre["name"] for genre in genres]) if genres else "Unknown"
        release_date = game.get("first_release_date")
        
        if release_date:
            release_year = time.strftime('%Y', time.gmtime(release_date))
            release_month = time.strftime('%m', time.gmtime(release_date))
            return {
                "Genre": genre_names, 
                "Release Year": release_year,
                "Release Month": release_month,
                "Game Name in IGDB": game["name"]
            }
    
    return {
        "Genre": "Unknown", 
        "Release Year": "Unknown",
        "Release Month": "Unknown",
        "Game Name in IGDB": "Not Found"
    }

# 主函数：测试前50个游戏
def test_first_50_games():
    # 读取唯一游戏列表
    unique_games_path = "unique_games.csv"
    try:
        df_games = pd.read_csv(unique_games_path, encoding='UTF-8')
    except UnicodeDecodeError:
        df_games = pd.read_csv(unique_games_path, encoding='ISO-8859-1')
    
    # 获取前50个游戏
    test_games = df_games["Game"].head(50).tolist()
    
    # 获取访问令牌
    print("获取API访问令牌...")
    access_token = get_access_token()
    
    # 测试前50个游戏
    print("测试前50个游戏的信息获取...")
    test_results = []
    
    for game in tqdm(test_games, desc="测试游戏"):
        info = get_game_info(access_token, game)
        test_results.append({
            "Game": game,
            "Genre": info["Genre"],
            "Release Year": info["Release Year"],
            "Release Month": info["Release Month"],
            "Game Name in IGDB": info["Game Name in IGDB"]
        })
        time.sleep(0.3)  # 防止API限制
        
    df_test = pd.DataFrame(test_results)
    df_test.to_csv("test_first_50_games.csv", index=False, encoding='UTF-8')
    print("测试完成，结果已保存至 test_first_50_games.csv")
    
    # 显示测试结果摘要
    print("\n测试结果摘要:")
    print(df_test[["Game", "Release Year", "Release Month", "Game Name in IGDB"]].head(10))
    
    unknown_count = sum(df_test["Release Year"] == "Unknown")
    print(f"\n总共测试了 {len(df_test)} 个游戏，其中 {unknown_count} 个没有找到发行日期 ({unknown_count/len(df_test)*100:.1f}%)")

if __name__ == "__main__":
    test_first_50_games()

获取API访问令牌...
测试前50个游戏的信息获取...


测试游戏:   0%|          | 0/50 [00:00<?, ?it/s]


游戏名称: League of Legends 找到 1 个结果:
  1. League of Legends - 发布日期: 2009-10


测试游戏:   2%|▏         | 1/50 [00:00<00:32,  1.49it/s]


游戏名称: Counter-Strike: Global Offensive 找到 1 个结果:
  1. Counter-Strike: Global Offensive - 发布日期: 2012-08


测试游戏:   4%|▍         | 2/50 [00:01<00:30,  1.60it/s]


游戏名称: Dota 2 找到 1 个结果:
  1. Dota 2 - 发布日期: 2013-07


测试游戏:   6%|▌         | 3/50 [00:01<00:28,  1.65it/s]


游戏名称: Hearthstone 找到 1 个结果:
  1. Hearthstone - 发布日期: 2014-03


测试游戏:   8%|▊         | 4/50 [00:02<00:27,  1.68it/s]


游戏名称: Call of Duty: Black Ops III 找到 2 个结果:
  1. Call of Duty: Black Ops III - 发布日期: 2015-11
  2. Call of Duty: Black Ops III - 发布日期: 2015-11


测试游戏:  10%|█         | 5/50 [00:03<00:27,  1.65it/s]


游戏名称: Minecraft 找到 1 个结果:
  1. Minecraft - 发布日期: 2016-12


测试游戏:  12%|█▏        | 6/50 [00:03<00:25,  1.72it/s]


游戏名称: World of Warcraft 找到 1 个结果:
  1. World of Warcraft - 发布日期: 2004-11


测试游戏:  14%|█▍        | 7/50 [00:04<00:25,  1.70it/s]


游戏名称: Z1: Battle Royale 找到 1 个结果:
  1. Z1: Battle Royale - 发布日期: 2018-02


测试游戏:  18%|█▊        | 9/50 [00:05<00:28,  1.44it/s]


游戏名称: FIFA 16 找到 3 个结果:
  1. FIFA 16 - 发布日期: 2015-09
  2. FIFA 16 - 发布日期: 2015-09
  3. FIFA 16 - 发布日期: 2015-09


测试游戏:  20%|██        | 10/50 [00:06<00:26,  1.53it/s]


游戏名称: StarCraft II 找到 5 个结果:
  1. StarCraft II: Wings of Liberty - 发布日期: 2010-07
  2. StarCraft II: Heart of the Swarm - 发布日期: 2013-03
  3. StarCraft II: Legacy of the Void - 发布日期: 2015-11
  4. StarCraft II: Trilogy - 发布日期: 2015-12
  5. StarCraft II: Battle Chest - 发布日期: 2016-11


测试游戏:  22%|██▏       | 11/50 [00:07<00:27,  1.40it/s]


游戏名称: Diablo III 找到 1 个结果:
  1. Diablo III - 发布日期: 2012-05


测试游戏:  24%|██▍       | 12/50 [00:07<00:25,  1.50it/s]


游戏名称: Destiny 找到 3 个结果:
  1. Destiny - 发布日期: 1985-12
  2. Destiny - 发布日期: 1996-12
  3. Destiny - 发布日期: 2014-09


测试游戏:  26%|██▌       | 13/50 [00:08<00:23,  1.60it/s]


游戏名称: Blade & Soul 找到 1 个结果:
  1. Blade & Soul - 发布日期: 2012-06


测试游戏:  28%|██▊       | 14/50 [00:08<00:21,  1.68it/s]


游戏名称: World of Tanks 找到 1 个结果:
  1. World of Tanks - 发布日期: 2011-04


测试游戏:  30%|███       | 15/50 [00:09<00:20,  1.69it/s]


游戏名称: Tom Clancy's Rainbow Six Siege 找到 1 个结果:
  1. Tom Clancy's Rainbow Six Siege - 发布日期: 2015-12


测试游戏:  32%|███▏      | 16/50 [00:09<00:19,  1.72it/s]


游戏名称: RuneScape 找到 1 个结果:
  1. RuneScape - 发布日期: 2001-01


测试游戏:  34%|███▍      | 17/50 [00:10<00:18,  1.77it/s]


游戏名称: Tom Clancy's The Division 找到 1 个结果:
  1. Tom Clancy's The Division - 发布日期: 2016-03


测试游戏:  36%|███▌      | 18/50 [00:10<00:18,  1.77it/s]


游戏名称: Grand Theft Auto V 找到 3 个结果:
  1. Grand Theft Auto V - 发布日期: 2022-06
  2. Grand Theft Auto V - 发布日期: 2013-09
  3. Grand Theft Auto V - 发布日期: 2022-04


测试游戏:  38%|███▊      | 19/50 [00:11<00:17,  1.77it/s]


游戏名称: SMITE 找到 5 个结果:
  1. Smite - 发布日期: 2014-03
  2. Smite 2 - 发布日期: 2024-08
  3. Smite Blitz - 发布日期: Unknown
  4. Smite Rivals - 发布日期: Unknown
  5. Smite Tactics - 发布日期: Unknown


测试游戏:  40%|████      | 20/50 [00:12<00:20,  1.47it/s]


游戏名称: Creative 找到 5 个结果:
  1. Creative Runner - 发布日期: 2020-03
  2. Creative Console - 发布日期: 2023-01
  3. Hamidashi Creative - 发布日期: 2021-06
  4. Creative Kill Chamber - 发布日期: 2010-10
  5. Creative Contraptions - 发布日期: 1985-01


测试游戏:  42%|████▏     | 21/50 [00:13<00:21,  1.37it/s]


游戏名称: Poker 找到 5 个结果:
  1. HD Poker - 发布日期: 2018-09
  2. Jacks or Better Video Poker - 发布日期: 2024-02
  3. Poker 1 - 发布日期: Unknown
  4. Strip Poker Three - 发布日期: 1991-12
  5. Poker Poker Magic - 发布日期: 2024-06


测试游戏:  44%|████▍     | 22/50 [00:14<00:21,  1.32it/s]


游戏名称: Super Mario Maker 找到 1 个结果:
  1. Super Mario Maker - 发布日期: 2015-09


测试游戏:  46%|████▌     | 23/50 [00:14<00:18,  1.44it/s]


游戏名称: Heroes of the Storm 找到 1 个结果:
  1. Heroes of the Storm - 发布日期: 2015-06


测试游戏:  48%|████▊     | 24/50 [00:15<00:17,  1.52it/s]


游戏名称: Darkest Dungeon 找到 1 个结果:
  1. Darkest Dungeon - 发布日期: 2016-01


测试游戏:  50%|█████     | 25/50 [00:15<00:15,  1.59it/s]


游戏名称: Arma 3 找到 1 个结果:
  1. Arma 3 - 发布日期: 2013-09


测试游戏:  52%|█████▏    | 26/50 [00:16<00:14,  1.65it/s]


游戏名称: DayZ 找到 1 个结果:
  1. DayZ - 发布日期: 2018-12


测试游戏:  54%|█████▍    | 27/50 [00:16<00:13,  1.68it/s]


游戏名称: NBA 2K16 找到 1 个结果:
  1. NBA 2K16 - 发布日期: 2015-09


测试游戏:  56%|█████▌    | 28/50 [00:17<00:12,  1.72it/s]


游戏名称: Magic: The Gathering 找到 2 个结果:
  1. Magic: The Gathering - 发布日期: 1997-03
  2. Magic: The Gathering - 发布日期: 2001-12


测试游戏:  58%|█████▊    | 29/50 [00:18<00:12,  1.75it/s]


游戏名称: Ark: Survival Evolved 找到 1 个结果:
  1. Ark: Survival Evolved - 发布日期: 2017-08


测试游戏:  60%|██████    | 30/50 [00:18<00:11,  1.72it/s]


游戏名称: Halo 5: Guardians 找到 1 个结果:
  1. Halo 5: Guardians - 发布日期: 2015-10


测试游戏:  62%|██████▏   | 31/50 [00:19<00:10,  1.75it/s]


游戏名称: Super Smash Bros. Melee 找到 1 个结果:
  1. Super Smash Bros. Melee - 发布日期: 2001-11


测试游戏:  64%|██████▍   | 32/50 [00:19<00:10,  1.78it/s]


游戏名称: Music 找到 5 个结果:
  1. Music: Music Creation for the PlayStation - 发布日期: 1998-11
  2. Uta no Prince-sama: Music - 发布日期: 2011-11
  3. Uta no Prince-sama: Music 3 - 发布日期: 2016-01
  4. Uta no Prince-sama: Music 2 - 发布日期: 2013-09
  5. Touhou Spell Bubble: Shinra-Bansho Music Pack - 发布日期: 2020-08


测试游戏:  66%|██████▌   | 33/50 [00:20<00:11,  1.53it/s]


游戏名称: Super Smash Bros. for Wii U 找到 1 个结果:
  1. Super Smash Bros. for Wii U - 发布日期: 2014-11


测试游戏:  68%|██████▊   | 34/50 [00:21<00:09,  1.65it/s]


游戏名称: Path of Exile 找到 1 个结果:
  1. Path of Exile - 发布日期: 2013-10


测试游戏:  70%|███████   | 35/50 [00:21<00:08,  1.71it/s]


游戏名称: Fallout 4 找到 1 个结果:
  1. Fallout 4 - 发布日期: 2015-11


测试游戏:  72%|███████▏  | 36/50 [00:22<00:08,  1.70it/s]


游戏名称: Rocket League 找到 1 个结果:
  1. Rocket League - 发布日期: 2015-07


测试游戏:  74%|███████▍  | 37/50 [00:22<00:07,  1.76it/s]


游戏名称: Dragon's Dogma: Dark Arisen 找到 1 个结果:
  1. Dragon's Dogma: Dark Arisen - 发布日期: 2013-04


测试游戏:  76%|███████▌  | 38/50 [00:23<00:07,  1.68it/s]


游戏名称: The Legend of Zelda: Ocarina of Time 找到 1 个结果:
  1. The Legend of Zelda: Ocarina of Time - 发布日期: 1998-11


测试游戏:  78%|███████▊  | 39/50 [00:23<00:06,  1.69it/s]


游戏名称: Punch Club 找到 1 个结果:
  1. Punch Club - 发布日期: 2016-01


测试游戏:  80%|████████  | 40/50 [00:24<00:05,  1.70it/s]


游戏名称: The Binding of Isaac: Repentance 找到 3 个结果:
  1. The Binding of Isaac: Repentance - 发布日期: 2021-03
  2. The Binding of Isaac: Repentance - 发布日期: 2021-03
  3. The Binding of Isaac: Repentance - 发布日期: 2021-11


测试游戏:  82%|████████▏ | 41/50 [00:25<00:05,  1.71it/s]


游戏名称: Rust 找到 2 个结果:
  1. Rust - 发布日期: 2024-04
  2. Rust - 发布日期: 2018-02


测试游戏:  84%|████████▍ | 42/50 [00:25<00:04,  1.76it/s]


游戏名称: Bloodborne 找到 1 个结果:
  1. Bloodborne - 发布日期: 2015-03


测试游戏:  86%|████████▌ | 43/50 [00:26<00:03,  1.76it/s]


游戏名称: The Elder Scrolls V: Skyrim 找到 2 个结果:
  1. The Elder Scrolls V: Skyrim - 发布日期: 2011-11
  2. The Elder Scrolls V: Skyrim - 发布日期: 2017-11


测试游戏:  88%|████████▊ | 44/50 [00:26<00:03,  1.78it/s]


游戏名称: Madden NFL 16 找到 2 个结果:
  1. Madden NFL 16 - 发布日期: 2015-08
  2. Madden NFL 16 - 发布日期: 2015-08


测试游戏:  90%|█████████ | 45/50 [00:27<00:02,  1.82it/s]


游戏名称: Dark Souls 找到 1 个结果:
  1. Dark Souls - 发布日期: 2011-09


测试游戏:  92%|█████████▏| 46/50 [00:27<00:02,  1.85it/s]


游戏名称: XCOM: Enemy Within 找到 2 个结果:
  1. XCOM: Enemy Within - 发布日期: 2013-11
  2. XCOM: Enemy Within - 发布日期: 2013-11


测试游戏:  94%|█████████▍| 47/50 [00:28<00:01,  1.85it/s]


游戏名称: Lineage II: The Chaotic Throne - The 1st Throne: The Kamael 找到 1 个结果:
  1. Lineage II: The Chaotic Throne - The 1st Throne: The Kamael - 发布日期: Unknown


测试游戏:  96%|█████████▌| 48/50 [00:28<00:01,  1.83it/s]


游戏名称: Euro Truck Simulator 2 找到 1 个结果:
  1. Euro Truck Simulator 2 - 发布日期: 2012-10


测试游戏:  98%|█████████▊| 49/50 [00:29<00:00,  1.80it/s]


游戏名称: The Legend of Zelda: Majora's Mask 找到 1 个结果:
  1. The Legend of Zelda: Majora's Mask - 发布日期: 2000-04


测试游戏: 100%|██████████| 50/50 [00:30<00:00,  1.66it/s]

测试完成，结果已保存至 test_first_50_games.csv

测试结果摘要:
                               Game Release Year Release Month  \
0                 League of Legends         2009            10   
1  Counter-Strike: Global Offensive         2012            08   
2                            Dota 2         2013            07   
3                       Hearthstone         2014            03   
4       Call of Duty: Black Ops III         2015            11   
5                         Minecraft         2016            12   
6                 World of Warcraft         2004            11   
7                 Z1: Battle Royale         2018            02   
8             Talk Shows & Podcasts      Unknown       Unknown   
9                           FIFA 16         2015            09   

                  Game Name in IGDB  
0                 League of Legends  
1  Counter-Strike: Global Offensive  
2                            Dota 2  
3                       Hearthstone  
4       Call of Duty: Black Ops III  





In [None]:
import requests
import pandas as pd
import time
import re
from tqdm import tqdm

# IGDB API 凭据
CLIENT_ID = "e1cvubjaunqp8g8rhyxmuog7ld4tsy"
CLIENT_SECRET = "tnettt8ysiuybxxi1g8dlkzlwbzice"

# 获取 OAuth 令牌
def get_access_token():
    url = "https://id.twitch.tv/oauth2/token"
    params = {
        "client_id": CLIENT_ID,
        "client_secret": CLIENT_SECRET,
        "grant_type": "client_credentials"
    }
    response = requests.post(url, params=params)
    response.raise_for_status()
    return response.json()["access_token"]

# 查询 IGDB API 获取游戏信息，使用更精确的查询，并返回多个搜索结果数量
def get_game_info(access_token, game_name):
    url = "https://api.igdb.com/v4/games"
    headers = {
        "Client-ID": CLIENT_ID,
        "Authorization": f"Bearer {access_token}",
        "Accept": "application/json"
    }
    
    # 使用更精确的查询条件，并请求更多相关信息
    # 首先尝试完全匹配
    data = f'fields name, genres.name, first_release_date, rating, status, platforms.name; where name = "{game_name}"; limit 5;'
    response = requests.post(url, headers=headers, data=data)
    response.raise_for_status()
    exact_results = response.json()
    
    # 如果没有完全匹配的结果，尝试模糊匹配
    search_results = []
    if not exact_results:
        # 清理游戏名称，删除特殊字符
        clean_name = re.sub(r'[^\w\s]', '', game_name)
        data = f'fields name, genres.name, first_release_date, rating, status, platforms.name; search "{clean_name}"; limit 5;'
        response = requests.post(url, headers=headers, data=data)
        response.raise_for_status()
        search_results = response.json()
    
    # 合并结果 - 如果有精确匹配，就用精确匹配；否则用搜索结果
    results = exact_results if exact_results else search_results
    results_count = len(results)
    
    if results:
        # 选择评分最高或发布最早的游戏
        best_match = None
        highest_rating = -1
        
        for game in results:
            # 确保名称相似度高
            if game["name"].lower() == game_name.lower() or game["name"].lower() in game_name.lower() or game_name.lower() in game["name"].lower():
                rating = game.get("rating", 0)
                if rating > highest_rating:
                    highest_rating = rating
                    best_match = game
        
        # 如果没有找到好的匹配，选择第一个结果
        if not best_match and results:
            best_match = results[0]
            
        if best_match:
            genres = best_match.get("genres", [])
            genre_names = ", ".join([genre["name"] for genre in genres]) if genres else "Unknown"
            release_date = best_match.get("first_release_date")
            
            if release_date:
                # 将Unix时间戳转换为年份和月份
                release_year = time.strftime('%Y', time.gmtime(release_date))
                release_month = time.strftime('%m', time.gmtime(release_date))
                return {
                    "Genre": genre_names, 
                    "Release Year": release_year,
                    "Release Month": release_month,
                    "Game Name in IGDB": best_match["name"],
                    "Multiple Results": "是" if results_count > 1 else "否",
                    "Results Count": results_count
                }
    
    return {
        "Genre": "Unknown", 
        "Release Year": "Unknown",
        "Release Month": "Unknown",
        "Game Name in IGDB": "Not Found",
        "Multiple Results": "否",
        "Results Count": 0
    }

# 主函数：处理所有游戏
def process_all_games(df_games, access_token):
    print(f"开始处理全部 {len(df_games)} 个游戏...")
    game_info_list = []
    
    for game in tqdm(df_games["Game"].tolist(), desc="获取游戏信息"):
        try:
            info = get_game_info(access_token, game)
            game_info_list.append({
                "Game": game,
                "Genre": info["Genre"],
                "Release Year": info["Release Year"],
                "Release Month": info["Release Month"],
                "Game Name in IGDB": info["Game Name in IGDB"],
                "Multiple Results": info["Multiple Results"],
                "Results Count": info["Results Count"]
            })
            # 添加随机延迟0.2-0.5秒，防止API限制
            time.sleep(0.2 + 0.3 * (time.time() % 1))
        except Exception as e:
            print(f"\n处理游戏 '{game}' 时出错: {str(e)}")
            # 如果出错，添加空结果并继续
            game_info_list.append({
                "Game": game,
                "Genre": "Error",
                "Release Year": "Error",
                "Release Month": "Error",
                "Game Name in IGDB": f"Error: {str(e)}",
                "Multiple Results": "否",
                "Results Count": 0
            })
            # 发生错误时稍微多等一下
            time.sleep(1)
        
        # 每处理100个游戏保存一次中间结果，防止意外中断导致数据丢失
        if len(game_info_list) % 100 == 0:
            temp_df = pd.DataFrame(game_info_list)
            temp_df.to_csv(f"game_info_temp_{len(game_info_list)}.csv", index=False, encoding='UTF-8')
            print(f"\n已处理 {len(game_info_list)} 个游戏，临时保存结果")
            
    df_game_info = pd.DataFrame(game_info_list)
    output_path = "game_info_full.csv"
    df_game_info.to_csv(output_path, index=False, encoding='UTF-8')
    print(f"处理完成，文件已保存至 {output_path}")
    
    # 输出统计信息
    multiple_results = df_game_info[df_game_info["Multiple Results"] == "是"].shape[0]
    not_found = df_game_info[df_game_info["Game Name in IGDB"] == "Not Found"].shape[0]
    errors = df_game_info[df_game_info["Release Year"] == "Error"].shape[0]
    
    print("\n处理统计:")
    print(f"总游戏数: {len(df_game_info)}")
    print(f"有多个搜索结果的游戏: {multiple_results} ({multiple_results/len(df_game_info)*100:.1f}%)")
    print(f"未找到的游戏: {not_found} ({not_found/len(df_game_info)*100:.1f}%)")
    print(f"处理出错的游戏: {errors} ({errors/len(df_game_info)*100:.1f}%)")
    
    return df_game_info

if __name__ == "__main__":
    # 读取唯一游戏列表
    unique_games_path = "unique_games.csv"
    try:
        df_games = pd.read_csv(unique_games_path, encoding='UTF-8')
    except UnicodeDecodeError:
        df_games = pd.read_csv(unique_games_path, encoding='ISO-8859-1')
    
    # 获取访问令牌
    print("获取API访问令牌...")
    access_token = get_access_token()
    
    # 询问用户是否想处理整个文件或部分文件
    while True:
        process_option = input("请选择处理方式:\n1. 处理全部游戏\n2. 处理指定数量的游戏\n请输入选择 (1/2): ")
        if process_option == '1':
            # 处理所有游戏
            df_game_info = process_all_games(df_games, access_token)
            break
        elif process_option == '2':
            try:
                num_games = int(input("请输入要处理的游戏数量: "))
                if num_games > 0 and num_games <= len(df_games):
                    # 处理指定数量的游戏
                    df_partial = df_games.head(num_games)
                    df_game_info = process_all_games(df_partial, access_token)
                    break
                else:
                    print(f"请输入有效数字 (1-{len(df_games)})")
            except ValueError:
                print("请输入有效数字")
        else:
            print("无效选择，请重新输入")
    
    print("处理完成！")

获取API访问令牌...
开始处理全部 2360 个游戏...


获取游戏信息:   4%|▍         | 100/2360 [01:10<27:25,  1.37it/s]


已处理 100 个游戏，临时保存结果


获取游戏信息:   8%|▊         | 200/2360 [02:19<23:24,  1.54it/s]


已处理 200 个游戏，临时保存结果


获取游戏信息:  13%|█▎        | 300/2360 [03:31<22:09,  1.55it/s]


已处理 300 个游戏，临时保存结果


获取游戏信息:  17%|█▋        | 400/2360 [04:42<20:08,  1.62it/s]


已处理 400 个游戏，临时保存结果


获取游戏信息:  21%|██        | 500/2360 [05:54<19:30,  1.59it/s]


已处理 500 个游戏，临时保存结果


获取游戏信息:  25%|██▌       | 600/2360 [07:05<23:06,  1.27it/s]


已处理 600 个游戏，临时保存结果


获取游戏信息:  30%|██▉       | 700/2360 [08:14<17:37,  1.57it/s]


已处理 700 个游戏，临时保存结果


获取游戏信息:  34%|███▍      | 800/2360 [09:25<17:48,  1.46it/s]


已处理 800 个游戏，临时保存结果


获取游戏信息:  38%|███▊      | 900/2360 [10:33<16:07,  1.51it/s]


已处理 900 个游戏，临时保存结果


获取游戏信息:  42%|████▏     | 1000/2360 [11:41<16:25,  1.38it/s]


已处理 1000 个游戏，临时保存结果


获取游戏信息:  47%|████▋     | 1100/2360 [12:51<14:18,  1.47it/s]


已处理 1100 个游戏，临时保存结果


获取游戏信息:  51%|█████     | 1200/2360 [14:02<13:04,  1.48it/s]


已处理 1200 个游戏，临时保存结果


获取游戏信息:  52%|█████▏    | 1218/2360 [14:14<15:42,  1.21it/s]


处理游戏 'nan' 时出错: expected string or bytes-like object, got 'float'


获取游戏信息:  55%|█████▌    | 1300/2360 [15:15<11:54,  1.48it/s]


已处理 1300 个游戏，临时保存结果


获取游戏信息:  59%|█████▉    | 1400/2360 [16:25<11:32,  1.39it/s]


已处理 1400 个游戏，临时保存结果


获取游戏信息:  64%|██████▎   | 1500/2360 [17:34<09:12,  1.56it/s]


已处理 1500 个游戏，临时保存结果


获取游戏信息:  68%|██████▊   | 1600/2360 [18:46<08:03,  1.57it/s]


已处理 1600 个游戏，临时保存结果


获取游戏信息:  72%|███████▏  | 1700/2360 [19:57<08:34,  1.28it/s]


已处理 1700 个游戏，临时保存结果


获取游戏信息:  76%|███████▋  | 1800/2360 [21:08<05:43,  1.63it/s]


已处理 1800 个游戏，临时保存结果


获取游戏信息:  81%|████████  | 1900/2360 [22:15<05:12,  1.47it/s]


已处理 1900 个游戏，临时保存结果


获取游戏信息:  85%|████████▍ | 2000/2360 [23:23<04:34,  1.31it/s]


已处理 2000 个游戏，临时保存结果


获取游戏信息:  89%|████████▉ | 2100/2360 [24:35<03:28,  1.25it/s]


已处理 2100 个游戏，临时保存结果


获取游戏信息:  93%|█████████▎| 2200/2360 [25:45<01:56,  1.37it/s]


已处理 2200 个游戏，临时保存结果


获取游戏信息:  97%|█████████▋| 2300/2360 [26:56<00:37,  1.62it/s]


已处理 2300 个游戏，临时保存结果


获取游戏信息: 100%|██████████| 2360/2360 [27:38<00:00,  1.42it/s]

处理完成，文件已保存至 game_info_full.csv

处理统计:
总游戏数: 2360
有多个搜索结果的游戏: 397 (16.8%)
未找到的游戏: 144 (6.1%)
处理出错的游戏: 1 (0.0%)
处理完成！





In [1]:
import requests
import pandas as pd
import time
import re
from tqdm import tqdm

# IGDB API 凭据
CLIENT_ID = "e1cvubjaunqp8g8rhyxmuog7ld4tsy"
CLIENT_SECRET = "tnettt8ysiuybxxi1g8dlkzlwbzice"

# 获取 OAuth 令牌
def get_access_token():
    url = "https://id.twitch.tv/oauth2/token"
    params = {
        "client_id": CLIENT_ID,
        "client_secret": CLIENT_SECRET,
        "grant_type": "client_credentials"
    }
    response = requests.post(url, params=params)
    response.raise_for_status()
    return response.json()["access_token"]

# 查询 IGDB API 获取游戏信息
def get_game_info(access_token, game_name):
    url = "https://api.igdb.com/v4/games"
    headers = {
        "Client-ID": CLIENT_ID,
        "Authorization": f"Bearer {access_token}",
        "Accept": "application/json"
    }
    
    data = f'fields name, genres.name, first_release_date, rating, status, platforms.name; where name = "{game_name}"; limit 5;'
    response = requests.post(url, headers=headers, data=data)
    response.raise_for_status()
    exact_results = response.json()
    
    search_results = []
    if not exact_results:
        clean_name = re.sub(r'[^\w\s]', '', game_name)
        data = f'fields name, genres.name, first_release_date, rating, status, platforms.name; search "{clean_name}"; limit 5;'
        response = requests.post(url, headers=headers, data=data)
        response.raise_for_status()
        search_results = response.json()
    
    results = exact_results if exact_results else search_results
    results_count = len(results)
    
    if results:
        best_match = None
        highest_rating = -1
        
        for game in results:
            if game["name"].lower() == game_name.lower() or game["name"].lower() in game_name.lower() or game_name.lower() in game["name"].lower():
                rating = game.get("rating", 0)
                if rating > highest_rating:
                    highest_rating = rating
                    best_match = game
        
        if not best_match and results:
            best_match = results[0]
            
        if best_match:
            genres = best_match.get("genres", [])
            genre_names = ", ".join([genre["name"] for genre in genres]) if genres else "Unknown"
            release_date = best_match.get("first_release_date")
            
            if release_date:
                release_year = time.strftime('%Y', time.gmtime(release_date))
                release_month = time.strftime('%m', time.gmtime(release_date))
                return {
                    "Genre": genre_names, 
                    "Release Year": release_year,
                    "Release Month": release_month,
                    "Game Name in IGDB": best_match["name"],
                    "Multiple Results": "是" if results_count > 1 else "否",
                    "Results Count": results_count
                }
    
    return {
        "Genre": "Unknown", 
        "Release Year": "Unknown",
        "Release Month": "Unknown",
        "Game Name in IGDB": "Not Found",
        "Multiple Results": "否",
        "Results Count": 0
    }

# 处理所有游戏
def process_all_games(df_games, access_token):
    print(f"开始处理全部 {len(df_games)} 个游戏...")
    game_info_list = []
    
    for game in tqdm(df_games["Game"].tolist(), desc="获取游戏信息"):
        try:
            info = get_game_info(access_token, game)
            game_info_list.append({
                "Game": game,
                "Genre": info["Genre"],
                "Release Year": info["Release Year"],
                "Release Month": info["Release Month"],
                "Game Name in IGDB": info["Game Name in IGDB"],
                "Multiple Results": info["Multiple Results"],
                "Results Count": info["Results Count"]
            })
            time.sleep(0.2 + 0.3 * (time.time() % 1))
        except Exception as e:
            print(f"\n处理游戏 '{game}' 时出错: {str(e)}")
            game_info_list.append({
                "Game": game,
                "Genre": "Error",
                "Release Year": "Error",
                "Release Month": "Error",
                "Game Name in IGDB": f"Error: {str(e)}",
                "Multiple Results": "否",
                "Results Count": 0
            })
            time.sleep(1)
            
    df_game_info = pd.DataFrame(game_info_list)
    output_path = "igdb_data.csv"
    df_game_info.to_csv(output_path, index=False, encoding='UTF-8')
    print(f"处理完成，文件已保存至 {output_path}")
    
    return df_game_info

if __name__ == "__main__":
    unique_games_path = "unique_games.csv"
    try:
        df_games = pd.read_csv(unique_games_path, encoding='UTF-8')
    except UnicodeDecodeError:
        df_games = pd.read_csv(unique_games_path, encoding='ISO-8859-1')
    
    print("获取API访问令牌...")
    access_token = get_access_token()
    
    while True:
        process_option = input("请选择处理方式:\n1. 处理全部游戏\n2. 处理指定数量的游戏\n请输入选择 (1/2): ")
        if process_option == '1':
            df_game_info = process_all_games(df_games, access_token)
            break
        elif process_option == '2':
            try:
                num_games = int(input("请输入要处理的游戏数量: "))
                if num_games > 0 and num_games <= len(df_games):
                    df_partial = df_games.head(num_games)
                    df_game_info = process_all_games(df_partial, access_token)
                    break
                else:
                    print(f"请输入有效数字 (1-{len(df_games)})")
            except ValueError:
                print("请输入有效数字")
        else:
            print("无效选择，请重新输入")
    
    print("处理完成！")

获取API访问令牌...
开始处理全部 2360 个游戏...


获取游戏信息:  52%|█████▏    | 1218/2360 [14:25<16:05,  1.18it/s]


处理游戏 'nan' 时出错: expected string or bytes-like object, got 'float'


获取游戏信息: 100%|██████████| 2360/2360 [27:35<00:00,  1.43it/s]

处理完成，文件已保存至 igdb_data.csv
处理完成！



