In [2]:
import requests
import pandas as pd

## 總表讀取

In [3]:
url = "http://192.168.31.130:32327/games_reviews/_search?scroll=1m"  # 使用 Scroll，scroll=1m 表示 Scroll 保持活躍1分鐘
headers = {"Content-Type": "application/json"}

# 初始化 Scroll 查詢，指定要返回的欄位
data = {
    "_source": [
        "steamId",
        "tags",
        "genres",
        "features",
        "languages",
        "developers",
        "publishers",
    ],  # 查詢指定的欄位
    "query": {"match_all": {}},  # 匹配所有文件
    "size": 1000,  # 每次返回1000筆資料
}

response = requests.get(url, headers=headers, json=data)
response_json = response.json()

# 提取初始查詢的 hits 和 scroll_id
hits = response_json["hits"]["hits"]
scroll_id = response_json["_scroll_id"]

# 存儲資料的列表
data_list = [
    {
        "steamId": hit["_source"].get("steamId"),
        "tags": hit["_source"].get("tags"),
        "genres": hit["_source"].get("genres"),
        "features": hit["_source"].get("features"),
        "languages": hit["_source"].get("languages"),
        "developers": hit["_source"].get("developers"),
        "publishers": hit["_source"].get("publishers"),
    }
    for hit in hits
]

# 使用 Scroll API 繼續獲取剩餘的資料
while len(hits) > 0:
    scroll_url = "http://192.168.31.130:32327/_search/scroll"
    scroll_data = {
        "scroll": "1m",  # Scroll 保持活躍1分鐘
        "scroll_id": scroll_id,  # 使用前次查詢返回的 scroll_id 繼續查詢
    }

    scroll_response = requests.get(scroll_url, headers=headers, json=scroll_data)
    scroll_response_json = scroll_response.json()

    hits = scroll_response_json["hits"]["hits"]
    scroll_id = scroll_response_json["_scroll_id"]

    # 將新獲取的 hits 資料追加到列表中
    data_list.extend(
        [
            {
                "steamId": hit["_source"].get("steamId"),
                "tags": hit["_source"].get("tags"),
                "genres": hit["_source"].get("genres"),
                "features": hit["_source"].get("features"),
                "languages": hit["_source"].get("languages"),
                "developers": hit["_source"].get("developers"),
                "publishers": hit["_source"].get("publishers"),
            }
            for hit in hits
        ]
    )

# 將提取的資料轉換為 DataFrame
df = pd.DataFrame(data_list)

## 整理儲存格資料

In [4]:
feature = df.columns[1:]

df_combined = pd.DataFrame({"steamId": df["steamId"]})

for column in feature:

    # 將 tags 欄位的列表轉換為字符串，並去除 , 和 [] 等符號

    df_re = df[column].apply(
        lambda x: ",".join(x).replace("[", "").replace("]", "").strip().strip('"')
    )

    df_combined[column] = df_re

df_combined

Unnamed: 0,steamId,tags,genres,features,languages,developers,publishers
0,240,"Shooter,FPS,Action,Multiplayer,Team-Based,Firs...",Action,"Cross-Platform Multiplayer,Steam Achievements,...","English,French,German,Italian,Japanese,Korean,...",Valve,Valve
1,80,"Action,FPS,Shooter,Multiplayer,First-Person,Si...",Action,"Single-player,Valve Anti-Cheat enabled,Family ...","English,French,German,Italian,Spanish - Spain,...",Valve,Valve
2,262280,"Strategy,RPG,Management,Fantasy,Dungeon Crawle...","RPG,Simulation,Strategy","Single-player,Cross-Platform Multiplayer,Steam...","English,German,French,Italian,Spanish - Spain,...",Realmforge Studios,Kalypso Media Digital
3,1179680,"Free to Play,Sandbox,Massively Multiplayer,RPG...","Adventure,Free To Play,Indie,Massively Multipl...","MMO,Online PvP,Online Co-op,Cross-Platform Mul...",English,Code Club AB,Code Club AB
4,688130,"Psychological Horror,Precision Platformer,Diff...","Action,Adventure,Casual,Indie","Single-player,Online PvP,Online Co-op,Steam Ac...","English,French,Italian,German,Spanish - Spain,...",Hendrik Felix Pohl,Hendrik Felix Pohl
...,...,...,...,...,...,...,...
10331,2195410,"Early Access,Atmospheric,Singleplayer,Multipla...","Indie,Strategy,Early Access","Single-player,Online PvP,Online Co-op,Steam Ac...","English,French,German,Spanish - Spain,Japanese...",Chimera Entertainment,"Chimera Entertainment,H2 Interactive (Korea an..."
10332,2293680,"Survival,Crafting,Adventure,Open World Surviva...","Action,Adventure,Indie,Massively Multiplayer,R...","Single-player,MMO,Online PvP,Online Co-op,Fami...",English,Bloodbath Studios,Bloodbath Studios
10333,262100,"Action,Casual,Indie,Pixel Graphics,Destruction...","Action,Casual,Indie","Single-player,Online PvP,Shared/Split Screen P...",English,Transhuman Design,Transhuman Design
10334,1277870,"Competitive,Action,Casual,Physics,Colorful,Cut...","Action,Casual,Sports","Single-player,Online PvP,Shared/Split Screen P...","English,Japanese,Simplified Chinese",VV-LABO,Phoenixx Inc.


## 取出特定資料

In [11]:
top_100 = pd.read_json('./top_100_followers.json', lines=True)

In [12]:
top_100

Unnamed: 0,steamId,name,price,reviews,reviewsSteam,followers,avgPlaytime,reviewScore,releaseDate,earlyAccess,wishlists,copiesSold,revenue,players,owners,steamPercent,itemType,totalRevenue,dlc
0,218620,PAYDAY 2,9.99,643508,431157,8278969,130.894073,90,1376366400000,False,634700,17383397,7.732352e+07,25944969,35264283,0.670010,game,1.230356e+08,2
1,578080,PUBG: BATTLEGROUNDS,0.00,2437472,2437472,3290838,292.369584,58,1513832400000,False,1345300,114183189,4.396945e+09,114183189,138872888,1.000000,game,,0
2,271590,Grand Theft Auto V,0.00,1748928,1720183,3227853,190.294729,87,1428897600000,False,3975400,43411442,5.387370e+08,43411442,48914193,0.983564,game,,1
3,381210,Dead by Daylight,19.99,706325,541459,1542924,163.016052,80,1465876800000,False,1694000,12441456,1.220329e+08,16229689,18961263,0.766586,game,4.576770e+08,2
4,359550,Tom Clancy's Rainbow Six® Siege,19.99,1238727,1124284,1242981,217.958171,85,1448946000000,False,2335300,21924953,2.118792e+08,24156736,27447527,0.907612,game,4.929578e+08,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,349040,NARUTO SHIPPUDEN: Ultimate Ninja STORM 4,19.99,81112,68208,214755,21.561872,91,1454562000000,False,694100,3241227,3.265382e+07,3854422,4711860,0.840911,game,5.560984e+07,2
96,2406770,Bodycam,33.32,22123,22123,213173,9.698743,74,1717821426102,True,2674100,607363,1.705330e+07,607363,607363,1.000000,game,,0
97,376210,The Isle,19.99,77157,77157,210768,125.833854,82,1448946000000,True,696300,1540309,2.061464e+07,1540309,1773984,1.000000,game,,0
98,10,Counter-Strike,9.99,232758,152877,210767,184.912729,97,973054800000,False,268400,7011176,2.944835e+07,10674642,16122482,0.656807,game,,0


In [36]:
top_100_id = top_100['steamId']

In [37]:
top_100_id

0      218620
1      578080
2      271590
3      381210
4      359550
       ...   
95     349040
96    2406770
97     376210
98         10
99    2139460
Name: steamId, Length: 100, dtype: int64

In [38]:
df_combined['steamId'] = df_combined['steamId'].astype(int)

In [40]:
top_100_B = df_combined[df_combined['steamId'].isin(top_100_id)].reset_index(drop=True)

Unnamed: 0,steamId,tags,genres,features,languages,developers,publishers
0,240,"Shooter,FPS,Action,Multiplayer,Team-Based,Firs...",Action,"Cross-Platform Multiplayer,Steam Achievements,...","English,French,German,Italian,Japanese,Korean,...",Valve,Valve
1,1245620,"Souls-like,Open World,Dark Fantasy,RPG,Difficu...","Action,RPG","Single-player,Online PvP,Online Co-op,Steam Ac...","English,French,Italian,German,Spanish - Spain,...","FromSoftware, Inc.","FromSoftware, Inc.,Bandai Namco Entertainment"
2,427520,"Automation,Base Building,Resource Management,S...","Casual,Indie,Simulation,Strategy","Single-player,Online Co-op,LAN Co-op,Cross-Pla...","English,French,Italian,German,Spanish - Spain,...",Wube Software LTD.,Wube Software LTD.
3,1149460,"Survival,Open World Survival Craft,Base Buildi...","Action,Adventure,Simulation","Single-player,Online Co-op,Family Sharing","English,French,German,Portuguese - Brazil,Russ...",RocketWerkz,RocketWerkz
4,105600,"Open World Survival Craft,Sandbox,Survival,2D,...","Action,Adventure,Indie,RPG","Single-player,Online PvP,Online Co-op,Steam Ac...","English,French,Italian,German,Spanish - Spain,...",Re-Logic,Re-Logic
...,...,...,...,...,...,...,...
95,223750,"Simulation,Flight,Free to Play,Military,Multip...","Simulation,Free To Play","Single-player,Online PvP,LAN PvP,Online Co-op,...","English,German,Spanish - Spain,Russian,Czech,F...",Eagle Dynamics SA,Eagle Dynamics SA
96,1174180,"Open World,Story Rich,Western,Adventure,Multip...","Action,Adventure","Single-player,Online PvP,Online Co-op,Steam Ac...","English,French,Italian,German,Spanish - Spain,...",Rockstar Games,Rockstar Games
97,813780,"Strategy,RTS,City Builder,Multiplayer,Base Bui...",Strategy,"Single-player,Online PvP,LAN PvP,Online Co-op,...","English,French,Italian,German,Japanese,Korean,...","Forgotten Empires,Tantalus Media,Wicked Witch,...",Xbox Game Studios
98,1142710,"Strategy,Turn-Based Strategy,Grand Strategy,RT...","Action,Strategy","Single-player,Online PvP,LAN PvP,Online Co-op,...","English,French,Italian,German,Spanish - Spain,...","CREATIVE ASSEMBLY,Feral Interactive","SEGA,Feral Interactive"


In [None]:
top_100_B.to_json('')