In [1]:
import requests
import uuid
from urllib.parse import urljoin, urlencode, urlparse, parse_qs, urlunparse
import time
from itertools import islice, tee
import json
import ast
import pandas as pd

In [2]:
BASE_URL = "https://www.taptap.cn/webapiv2/"
USER_AGENT = (
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
    "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"
)

PLATFORM = {
    "ios": "iOS",
    "android": "Android"
}

# Supporting functions

In [3]:
def beautify_json_string(json_str: str) -> str:
    try:
        obj = json.loads(json_str)
        pretty = json.dumps(obj, indent=2, ensure_ascii=False)
        return pretty
    except json.JSONDecodeError as e:
        return f"Invalid JSON: {e}"

In [4]:
def beautify_dict_string(input_str: str) -> str:
    try:
        obj = ast.literal_eval(input_str)  # Convert Python-style dict string to actual dict
        pretty = json.dumps(obj, indent=2, ensure_ascii=False)
        return pretty
    except (ValueError, SyntaxError) as e:
        return f"Invalid input: {e}"

In [5]:
def categorize_ratings_by_value(ratings: list) -> dict:
    result = {'up': [], 'down': []}

    for rating in ratings:
        value = rating.get('value')
        rating_type = rating.get('type')
        if value in result:
            result[value].append(rating_type)

    return result

In [6]:
def convert_and_stream_reviews(raw_reviews, review_count, out_file="concise_reviews.jsonl"):
    with open(out_file, "w", encoding="utf-8") as f:
        for index, review in enumerate(raw_reviews, start=1):
            print(f"Processing review number {index}/{review_count}")
            r = review.get('review', {})
            a = review.get('author', {}).get('user', {})
            app = review.get('app', {})
            stat = review.get('stat', {})

            concise_review = {
                'app_id': app.get('id'),
                'app_title': app.get('title'),
                'review_id': r.get('id'),
                'review_publish_time': review.get('publish_time'),
                'review_score': r.get('score'),
                'review_author_user_id': a.get('id'),
                'review_author_user_name': a.get('name'),
                'device': review.get('device'),
                'review_played_spent': r.get('played_spent'),
                'up_rating_aspects': categorize_ratings_by_value(r.get('ratings', []))['up'],
                'down_rating_aspects': categorize_ratings_by_value(r.get('ratings', []))['down'],
                'review_content_raw_text': r.get('contents', {}).get('raw_text'),
                'comments': stat.get('comments'),
                'review_up_votes': stat.get('ups'),
                'review_down_votes': stat.get('downs')
            }

            f.write(json.dumps(concise_review, ensure_ascii=False) + "\n")

# Main Functions and Classes

In [7]:
class TapTapClient:
    def __init__(self):
        self.session = requests.Session()
        self.session.headers.update({
            "user-agent": USER_AGENT
        })

    def _build_url(self, path: str, params: dict, platform: str = "android") -> str:
        full_url = urljoin(BASE_URL, path)
        parsed = urlparse(full_url)
        query = parse_qs(parsed.query)

        # Merge params into query
        for k, v in params.items():
            query[k] = v

        # Handle platform override
        platform = platform.lower()
        device_platform = PLATFORM.get(platform, "Android")
        query["X-UA"] = (
            f"V=1&PN=WebApp&LANG=zh_CN&VN_CODE=102&LOC=CN&PLT=PC&"
            f"DS={device_platform}&UID={uuid.uuid4()}&OS=Mac+OS&OSV=10.15.7&DT=PC"
        )

        encoded_query = urlencode(query, doseq=True)
        final_url = urlunparse(parsed._replace(query=encoded_query))
        return final_url

    def get(self, path: str, params: dict = None, platform: str = "android"):
        if params is None:
            params = {}

        url = self._build_url(path, params, platform)
        response = self.session.get(url)
        response.raise_for_status()

        data = response.json()
        if not data.get("success", False):
            raise Exception("Request failed")

        return data.get("data", {})

    def list(self, path: str, params: dict = None, platform: str = "android"):
        if params is None:
            params = {}
        params.setdefault("from", 0)
        params.setdefault("limit", 10)

        while True:
            data = self.get(path, params, platform)
            total = data.get("total", 0)
            items = data.get("list", [])
            next_page = data.get("next_page")

            for item in items:
                yield item

            if not next_page or params["from"] + len(items) >= total:
                break

            params["from"] += len(items)
            time.sleep(1)

    def get_app(self, app_id, platform: str = "android", **params):
        params["id"] = app_id
        return self.get("app/v4/detail", params, platform)

    def list_apps(self, type_name="reserve", platform: str = "android", **params):
        params.setdefault("type_name", type_name)
        for row in self.list("app-top/v2/hits", params, platform):
            if not row.get("is_add") and row.get("type") == "app":
                yield row["app"]

    def list_reviews(self, app_id, sort="new", platform: str = "android", **params):
        params.update({
            "app_id": app_id,
            "sort": sort
        })
        for row in self.list("review/v2/list-by-app", params, platform):
            if row.get("type") == "moment":
                yield row["moment"]

# Parameters

In [8]:
client = TapTapClient()

In [11]:
app_id = 629215

# Execute: Get Individual App Info

In [12]:
# Get app details
app_data_android = client.get_app(app_id)

In [13]:
app_data_android

{'id': 629215,
 'identifier': 'com.ycf.herosland.tt',
 'itunes_id': '6480160896',
 'title': '英勇之地',
 'title_labels': [],
 'icon': {'url': 'https://img.tapimg.com/market/images/c8ec67020a90a75c0b14f18380a07a44.jpg/appicon?t=1',
  'medium_url': 'https://img.tapimg.com/market/images/c8ec67020a90a75c0b14f18380a07a44.jpg/appicon_m?t=1',
  'small_url': 'https://img.tapimg.com/market/images/c8ec67020a90a75c0b14f18380a07a44.jpg/appicon_s?t=1',
  'original_url': 'https://img.tapimg.com/market/images/c8ec67020a90a75c0b14f18380a07a44.jpg',
  'original_format': 'jpeg',
  'width': 270,
  'height': 270,
  'color': '0x907332',
  'original_size': 209058},
 'style': 0,
 'update_time': 1749085200,
 'hidden_button': False,
 'has_moment_rec': False,
 'is_deny_minors': False,
 'is_exclusive': False,
 'app_videos': [{'type': 'app_detail',
   'id': 4620609,
   'thumbnail': {'url': 'https://img.tapimg.com/market/images/898774d8fb54c3bedecbd7d830cf1534.jpg?imageView2/0/w/1080/h/608/format/jpg/interlace/1/ignor

In [14]:
app_data_ios = client.get_app(app_id, platform = "ios")

In [15]:
app_data_ios

{'id': 629215,
 'identifier': 'com.ycf.herosland.tt',
 'itunes_id': '6480160896',
 'title': '英勇之地',
 'title_labels': [],
 'icon': {'url': 'https://img.tapimg.com/market/images/c8ec67020a90a75c0b14f18380a07a44.jpg/appicon?t=1',
  'medium_url': 'https://img.tapimg.com/market/images/c8ec67020a90a75c0b14f18380a07a44.jpg/appicon_m?t=1',
  'small_url': 'https://img.tapimg.com/market/images/c8ec67020a90a75c0b14f18380a07a44.jpg/appicon_s?t=1',
  'original_url': 'https://img.tapimg.com/market/images/c8ec67020a90a75c0b14f18380a07a44.jpg',
  'original_format': 'jpeg',
  'width': 270,
  'height': 270,
  'color': '0x907332',
  'original_size': 209058},
 'style': 0,
 'update_time': 1749085200,
 'hidden_button': False,
 'has_moment_rec': False,
 'is_deny_minors': False,
 'is_exclusive': False,
 'app_videos': [{'type': 'app_detail',
   'id': 4620609,
   'thumbnail': {'url': 'https://img.tapimg.com/market/images/77c9cd809b5ba5a7fa27369323d1eae1.jpg?imageView2/0/w/1080/h/608/format/jpg/interlace/1/ignor

# Execute: Get Reviews of an App

In [41]:
# List reviews for a game Android
reviews = client.list_reviews(app_id, platform = "ios")

In [42]:
preview = list(islice(reviews, 20))  # First items in the list only

In [46]:
preview

[{'id_str': '680028812808816218',
  'created_time': 1749184303,
  'edited_time': 1749184303,
  'publish_time': 1749184303,
  'commented_time': 1749184303,
  'author': {'user': {'id': 720601731,
    'name': 'li    ji',
    'avatar': 'https://img3.tapimg.com/avatars/etag/4da0c5697b3309d0df8b819000c0a5df.jpg?imageMogr2/auto-orient/strip/thumbnail/!270x270r/gravity/Center/crop/270x270/format/jpg/interlace/1/quality/80',
    'medium_avatar': 'https://img3.tapimg.com/avatars/etag/4da0c5697b3309d0df8b819000c0a5df.jpg?imageMogr2/auto-orient/strip/thumbnail/!180x180r/gravity/Center/crop/180x180/format/jpg/interlace/1/quality/40',
    'avatar_pendant': ''}},
  'device': '荣耀9X Pro',
  'can_show_history': True,
  'review': {'id': 45186772,
   'score': 5,
   'contents': {'text': '可以', 'raw_text': '可以'},
   'stage': 1,
   'stage_label': '玩过',
   'hidden_spent': True,
   'source': 1},
  'stat': {},
  'app': {'id': 629215,
   'identifier': 'com.ycf.herosland.tt',
   'itunes_id': '6480160896',
   'titl

In [35]:
# For checking if a review Id is contained in the pulled data

# saved_review = dict()

# for review in reviews:
#     print(review['review']['id'])
#     if review['review']['id'] == 42401154:
#         print(review)
#         saved_review = review
#         break

In [63]:
saved_review

{'id_str': '584009292084938563',
 'created_time': 1726291466,
 'edited_time': 1726292583,
 'publish_time': 1726291466,
 'commented_time': 1727354428,
 'edited': True,
 'author': {'user': {'id': 392835410,
   'name': '宅草',
   'avatar': 'https://img3.tapimg.com/third_avatars/5c2f9a71aeee1677e93651a2a99181c8.jpg?imageMogr2/auto-orient/strip/thumbnail/!270x270r/gravity/Center/crop/270x270/format/jpg/interlace/1/quality/80',
   'medium_avatar': 'https://img3.tapimg.com/third_avatars/5c2f9a71aeee1677e93651a2a99181c8.jpg?imageMogr2/auto-orient/strip/thumbnail/!180x180r/gravity/Center/crop/180x180/format/jpg/interlace/1/quality/40',
   'avatar_pendant': ''}},
 'device': '华为 Nova 8',
 'can_show_history': True,
 'review': {'id': 42401154,
  'score': 2,
  'played_spent': 1055880,
  'contents': {'text': '游戏还是挺不错的，我也是梦幻老玩家，对养成回合制的游戏还是挺有兴趣的，玩的时间也可看得出来。说下缺点和建议吧吧，虽然现在的游戏各种变动只会往金主那边靠，平民玩家的建议也都石沉大海。<br />1.运营对游戏的改动大，还经常进行改动，一个职业一个宠可能一次改动差别就天差地别，可能会导致一个人一个月养成的东西瞬间吃灰。<br />2.日日常（师门，抓鬼，海事，六道，护送，宗门竞技，帮派跑商）<

In [47]:
concise_reviews = convert_and_stream_reviews(reviews, 1808, out_file="heroes_land_concise_reviews_250606_ios.jsonl")

Processing review number 1/1808
Processing review number 2/1808
Processing review number 3/1808
Processing review number 4/1808
Processing review number 5/1808
Processing review number 6/1808
Processing review number 7/1808
Processing review number 8/1808
Processing review number 9/1808
Processing review number 10/1808
Processing review number 11/1808
Processing review number 12/1808
Processing review number 13/1808
Processing review number 14/1808
Processing review number 15/1808
Processing review number 16/1808
Processing review number 17/1808
Processing review number 18/1808
Processing review number 19/1808
Processing review number 20/1808
Processing review number 21/1808
Processing review number 22/1808
Processing review number 23/1808
Processing review number 24/1808
Processing review number 25/1808
Processing review number 26/1808
Processing review number 27/1808
Processing review number 28/1808
Processing review number 29/1808
Processing review number 30/1808
Processing review n

In [39]:
df_reviews_android = pd.read_json("heroes_land_concise_reviews_250606.jsonl", lines=True)

In [49]:
df_reviews_ios = pd.read_json("heroes_land_concise_reviews_250606_ios.jsonl", lines=True)

In [55]:
df_combined = pd.concat([df_reviews_android, df_reviews_ios], ignore_index=True)

In [56]:
df_combined

Unnamed: 0,app_id,app_title,review_id,review_publish_time,review_score,review_author_user_id,review_author_user_name,device,review_played_spent,up_rating_aspects,down_rating_aspects,review_content_raw_text,comments,review_up_votes,review_down_votes
0,629215,英勇之地,45183843,2025-06-06 00:18:36,1,420559404,另一个伊甸,华为 nova 6 5G,,[visual_music],"[gameplay, operation]",什么时候修复什么时候改，一早上起来游戏登不上去，搞了一个小时，删了下，下了删，加载本地资源加...,2.0,2.0,
1,629215,英勇之地,45183823,2025-06-06 00:15:43,1,481592221,nxjxkx,一加 Ace 2 pro,149580.0,[],"[gameplay, operation, visual_music]",什么破游戏，大早上更新完进不去了,3.0,1.0,
2,629215,英勇之地,45183808,2025-06-06 00:13:30,1,542516616,宝贝,小米Mi 10S,,[],"[gameplay, operation, visual_music]",真够垃圾，一天进不去？圈完前就要跑路？游戏都不知道修复下，就会出充值活动,1.0,,
3,629215,英勇之地,45183807,2025-06-06 00:13:17,1,720636863,*nkmn,小米24117RK2CC,,[],"[gameplay, operation, visual_music]",游戏客服效率奇慢无比，只能消息沟通回复一句需要一天半的时间，运行bug多，会突然卡屏，明明网...,1.0,1.0,
4,629215,英勇之地,45183786,2025-06-06 00:09:33,1,690377480,User690377480,Redmi K60,,[],[],一到周末卡得狗一样，游戏都进不去,1.0,1.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7532,629215,英勇之地,40322368,2024-04-25 03:27:50,5,541440613,莫名其妙比你帅,Huawei P50 Pro,,[],[],端游1000小时玩家，说良心话，游戏好玩上头，但是商家圈钱套路太明显且下手特别狠，游戏尽头武...,3.0,8.0,
7533,629215,英勇之地,40322093,2024-04-25 03:09:21,3,33207624,不识君,红米 K70,,[],[],端游感觉一般般，手游期待一下吧😏,1.0,2.0,2.0
7534,629215,英勇之地,40321528,2024-04-25 02:25:33,5,418698653,User9527,iPhone 14 Pro,,[],[],居然出手游了！！！？,1.0,8.0,2.0
7535,629215,英勇之地,40321446,2024-04-25 02:19:21,5,484340668,白兰地,Vivo V2072A,,[],[],看画风挺喜欢，期待一波,1.0,13.0,1.0


In [57]:
df_combined.to_csv("output/heroes_land_concise_reviews_250606.csv", index=False, encoding="utf-8-sig")

# Execute: Get Top App List

In [14]:
# List top apps Android
top_apps_android = client.list_apps()

In [15]:
for app in top_apps_android:
    print(app["title"])

异人之下
代号：界
三千幻世
掌门下山
黑色信标
胜利女神：新的希望
杖剑传说
银与绯
Project Rene-模拟人生
洛克王国：世界
英勇之地
盗墓笔记：启程
暴吵萌厨
梦的第七章
渔帆暗涌
七日世界
赛尔号巅峰之战
龙魂旅人
口袋吉伊卡哇 (Chiikawa Pocket)
望月
怪物之家2：勋章
荒野起源
我独自升级 Arise
唱舞星计划
骗子酒馆
异环
华夏千秋
凝渊
二重螺旋
蜡笔小新之小帮手大作战
代号：诡秘
魔法少女まどか☆マギカ Magia Exedra
鹅鸭杀
星痕共鸣
此间山海（TapTap测试版）
落日山丘
无限大
镭明闪击
百相世界
明日方舟：终末地
宝可梦 冠军
逆战：未来
群星纪元
哀鸿：城破十日记
代号：芙娅之魂
火山的女儿
可口的咖啡
腐蚀(Rust)手游
从军
未来之役
Trainee Death Simulator
斗罗大陆：猎魂世界
伍六七：暗影交锋
代号：撤离（TapTap测试版）
远光84
幻想少女公会
洛伊的移动要塞
方舟：生存进化
波斯王子：失落的王冠
崩溃大陆2
潜水员戴夫
代号：JUMP
Notanote（TapTap测试版）
白日梦想屋
苍蓝避风港
地狱之吻
蓝色星原：旅谣
踏风行
怪物乐土
重生之最强输出
卡拉彼丘
对决！剑之川
纸上谈亲
米姆米姆哈
代号:速降(Descenders)
奥特曼：超时空英雄
杀青
Mixlody
不良英雄谭
NBA 2K25梦幻球队
火影忍者：木叶高手
桃源记2
菜鸡梦想家
山海进化录
斗罗大陆：诛邪传说
饥困荒野
山海仙路
辉烬Embers
我在末日囤物资
花花与幕间剧
幻兽帕鲁手游
帝国游戏
烣境
Arcaea
边狱巴士
群星低语-Whispers from the Star
星际战甲Warframe手游
NOeSIS_诉说谎言的记忆之物语
百面千相
数码宝贝：源码
萤火夜话
你来嘛英雄
PuffPals: Island Skies
Astropulse
失落城堡2（TapTap测试版）
境·界 刀鸣
星之破晓
荒原曙光
火影忍者：究极忍者风暴
使命召唤®：战争地带™手游
虚拟化学实验室
冰冬冬小镇
古镇闲居
咒印链接
蛙蛙豹豹的树屋
筑城与探险
彩虹六号
现代战舰
TIRBE NINE：战极死游
Break My Case
异世界∞异世界
武娘
冰与火之舞
源序空间
欺

In [18]:
# List top apps iOS
top_apps_ios = client.list_apps(platform='ios')

In [19]:
for app in top_apps_ios:
    print(app["title"])

渔帆暗涌
Project Rene-模拟人生
口袋吉伊卡哇 (Chiikawa Pocket)
异人之下
从军
洛克王国：世界
掌门下山
逆战：未来
骗子酒馆
潜水员戴夫
英勇之地
代号：界
可口的咖啡
三千幻世
胜利女神：新的希望
腐蚀(Rust)手游
战地1939
方舟：生存进化
菜鸡梦想家
黑色信标
龙魂旅人
NBA 2K25梦幻球队
未来之役
时间对称性破缺
暴吵萌厨
代号:速降(Descenders)
异环
筑梦事务所
落日山丘
米姆米姆哈
冰冬冬小镇
黎明飞驰
伍六七：暗影交锋
七日世界
我独自升级 Arise
白日梦想屋
古镇闲居
望月
无限大
悠星大陆
软泥模拟器
NB 实验室
荒原曙光
星际战甲Warframe手游
梦的第七章
宝可梦 冠军
使命召唤:战区
失落城堡2（TapTap测试版）
PuffPals: Island Skies
最强追逐
火山的女儿
崩溃大陆2
治愈小镇
魔法少女まどか☆マギカ Magia Exedra
迷城陆区
Woodo
火影忍者：究极忍者风暴
露比的打工日记
咒印链接
桃源记2
Phigrim（TapTap测试版）
捣蛋鹅
地下城：我独自爆装
鹅鸭杀
山海奇旅
好想躺平啊，但是
老家奇妙日记
死亡之夜
银与绯
奥特曼：超时空英雄
魔法：精灵世界
对决！剑之川
代号：JUMP
明日方舟：终末地
地狱之吻
战地手游
饥困荒野
幻兽帕鲁手游
龙眠的夏天
黑神话像素版
蛙蛙豹豹的树屋
拾光小院
恐龙岛：沙盒进化
杀青
阿比斯少年冒险团
卡拉彼丘
虚拟化学实验室
花花与幕间剧
搬家模拟器
模拟农场20
洛伊的移动要塞
拼装小窝
三国志幻想大陆2：枭之歌
国潮沙威玛
女武神战纪
Townscaper
小骨：英雄杀手
启示录
阿蛋等什么（while waiting）
Hades
修仙时代
怪物乐土
代号：撤离（TapTap测试版）
代号肉鸽
月影别墅
彩虹六号
Notanote（TapTap测试版）
苍蓝避风港
奇异园艺
老鼠大冒险
杖剑传说
田舍生活
萌宅物语
妖之乡
中国式相亲2
代号：诡秘
侏罗纪岛
我的可爱村
NOeSIS_诉说谎言的记忆之物语
宝可梦TCG口袋版
长生：天机降世
冰与火之舞
董事长模拟器
小小的岛
魔王终局
手办模特
残秽的我们2
荒野起源
冲突2·闪电战
心岛
江湖英雄志
护核纪元
咒术回战 幻影夜行
蓝

In [114]:
# Manually fetch the first page to get total count Android
data = client.get("review/v2/list-by-app", {"app_id": 209601}, platform="android")
print(data["total"])

6481


In [33]:
# Manually fetch the first page to get total count iOS
data = client.get("review/v2/list-by-app", {"app_id": 209601}, platform="ios")
print(data["total"])

6481
