In [None]:
import os
import pandas as pd
import pickle
from androguard.misc import AnalyzeAPK
from multiprocessing import Pool

def extract_api_calls(apk_path):
    try:
        a, d, dx = AnalyzeAPK(apk_path)
        api_calls = set()
        for method in dx.get_methods():
            for _, call, _ in method.get_xref_to():
                api_calls.add(str(call.class_name) + '->' + str(call.name) + str(call.descriptor))
        return apk_path, list(api_calls)[:1000]
    except Exception as e:
        print(f"Failed to analyze {apk_path}: {e}")
        return apk_path, []

def process_apks(apk_dir):
    apk_files = [os.path.join(apk_dir, f) for f in os.listdir(apk_dir) if f.endswith('.apk')]
    with Pool() as pool:
        results = pool.map(extract_api_calls, apk_files)
    return results

def save_to_pickle(data, output_file):
    with open(output_file, 'wb') as f:
        pickle.dump(data, f)

def main(apk_dir, output_file):
    results = process_apks(apk_dir)
    df = pd.DataFrame(results, columns=['file_name', 'api_call_sequence'])
    save_to_pickle(df, output_file)
    print(f"Saved data to {output_file}")


apk_directory = "path/to/apk/directory"
output_pickle_file = "api_calls_sequences.pkl"
main(apk_directory, output_pickle_file)