In [2]:
import os
import pandas as pd
import json
import shutil
import numpy as np

In [4]:
import json
from origin_academic.crazy_utils import request_gpt_model_in_new_thread

llm_kwargs = {
    "llm_model": "qwen-max", 
              "max_length": 6000,
              "temperature": 0.9,
    "top_p": 0.99
}

def load_data(path, used_fp):
    res = {}
    json_fps = os.listdir(path)
    used = pd.read_csv(used_fp)['md5'].to_list()
    used = set(used)
    for jfp in json_fps:
        # print(f"jfp.split('/')[-1],jfp.split('/'): {jfp.split('/')[-1],jfp.split('/')}")
        k = jfp.split('/')[-1].replace('.json','')
        if k in used:
            res[k] = os.path.join(path, jfp)
    # print(f"ref: {res}")
    return res

def analyze_activity(activity, src_activities_desc=""):
    chatbot = []
    
    analysis_prompt = f"""
    分析以下活动并提供见解：

    活动名称：{activity['name']}
    布局信息：{activity['layouts']}

    {f'前置活动描述：{src_activities_desc}' if src_activities_desc else ''}

    请提供以下信息：
    1. 这个活动可能的用途
    2. 布局的可能结构
    3. 如何与前置活动（如果有）进行连接和交互
    4. 可能的用户交互流程
    """

    analysis_result = request_gpt_model_in_new_thread(
        analysis_prompt,
        llm_kwargs,
        chatbot,
        history=[],
        sys_prompt="你是一位经验丰富的移动应用程序分析师，专门分析应用程序结构和流程。",
        key_index=0
    )
    analysis_result2 = request_gpt_model_in_new_thread(
        analysis_result,
        llm_kwargs,
        chatbot,
        history=["""{f'前置活动描述：{src_activities_desc}' if src_activities_desc else ''}"""],
        sys_prompt="你是一位经验丰富的阅读理解大师，你阅读上面activity描述，用一句话概括activity功能. 例如：“splashActivity页面功能是启动这个健康APP”",
        key_index=0
    )

    return analysis_result2

def ana_json(json_data):
    activity_insights = {}

    # 创建一个字典来存储每个activity的前置activities
    activity_sources = {activity['name']: [] for activity in json_data['activities']}
    for transition in json_data['transitions']:
        if transition['dest'] not in activity_sources or transition['scr'] not in activity_sources:
            continue
        activity_sources[transition['dest']].append(transition['scr'] for activity in json_data['activities'])
    activity_contents = {activity['name']: activity["layouts"]}

    analyzed_activities = set()

    def analyze_activity_recursive(activity_name):
        if activity_name in analyzed_activities:
            return activity_insights[activity_name]

        src_activities = activity_sources[activity_name]
        src_desc = ""
        if src_activities:
            src_desc = "; ".join([analyze_activity_recursive(src) for src in src_activities])

        activity = next(act for act in json_data['activities'] if act['name'] == activity_name)
        insight = analyze_activity(activity, src_desc)
        activity_insights[activity_name] = insight
        analyzed_activities.add(activity_name)
        print(f"已分析 {activity_name}:")
        print(insight)
        print("\n" + "="*50 + "\n")

        return insight

    # 开始分析所有活动
    for activity in json_data['activities']:
        analyze_activity_recursive(activity['name'])

    return activity_insights


def main():
    d_dir = "/mnt/maldetect_NAS/aibot/oppo/oppo"
    jfp = os.path.join(d_dir, "native_stat_json_res")
    ufp = os.path.join(d_dir, "actual_used_scamware_native_f_SeTG.csv")
    ana_scam_j = load_data(jfp, ufp)
    for k in ana_scam_j:
        # p = ana_scam_j[k]
        p = ana_scam_j['1b9edaa86ddf0cfec866463e7201b5f3']
        json_data = {}
        with open(p,mode="r",encoding="utf-8") as f:
            json_data = json.load(f)
        # print(json_data)
        ana_json(json_data)
if __name__ == "__main__":
    main()

已分析 funlife.stepcounter.real.cash.free.shop.ShopActivity:
"ShopActivity是一个允许用户使用步数、喝水任务和日常打卡积累的奖励在内置商店兑换商品或服务的功能模块，通过各种互动界面和用户账户集成，促进用户的参与


已分析 funlife.stepcounter.real.cash.free.shop.step.TargetActivity:
部分。


已分析 funlife.stepcounter.real.cash.free.activity.splash.SplashLauncherActivity:
过渡过程。


已分析 funlife.stepcounter.real.cash.free.activity.splash.LaunchTransferActivity:
实际应用程序的需求


已分析 funlife.stepcounter.real.cash.free.activity.splash.SplashActivity:
`SplashActivity` 功能概括：启动页面，展示品牌并初始化必要资源，为用户进入步数计




# 草稿



sudo mount -t nfs 10.177.35.161:/volume1/maldetect_nas /home/congxy/nas_tmp -o proto=tcp -o nolock



In [None]:
ana_scam = ["1b9edaa86ddf0cfec866463e7201b5f3", "46a0f7212a6eb37671c6a6a2b9038ddf","d05cf88e935571e591e6b90761168d90", "41170871f305d905fba7513fdc4d7e77", "6b58cfa6a670f8dfa4aa1fa050d6a4b8"]
# ana_scam = ["1b9edaa86ddf0cfec866463e7201b5f3"一键计步, "46a0f7212a6eb37671c6a6a2b9038ddf"金牛记步,"d05cf88e935571e591e6b90761168d90"速借分期贷, "41170871f305d905fba7513fdc4d7e77"维借信用贷, "6b58cfa6a670f8dfa4aa1fa050d6a4b8"E分期贷 ]
ana_scam = set(ana_scam)

p = "/home/aibot/workspace/SquiDroidAgent/data/hit_public_data/hit_app_static_filesuseful_json"
ana_scam_j = {}
for root, dirs, files in os.walk(p):
    # print(files)
    for f in files:
        rf = f.replace('.json','')
        if rf in ana_scam:
            print(rf)
            ana_scam_j[rf] = os.path.join(p, f)
ana_scam_j

# 开发日志

1. 需要的对名称混淆的apk分析结果另行处理，重点获取value和layout信息推测页面功能