In [1]:
import os
import json
import pandas as pd
from typing import List, Dict, Any
import re
import openpyxl
import sys

sys.path.append('../scan_detector/')

import util


In [21]:

matching_file = "../../data/dataset/all_matching.json"
ios_top_apps = "../../data/dataset/ios/ios_popular.txt"
ios_random_apps = "../../data/dataset/ios/ios_random.txt"


all_permission =  "../../data/plist_results/2024_01_20_all.njson"



In [3]:
def load_file(path):
    result = []
    with open(path, "r") as f:
        for line in f.readlines():
            result.append(json.loads(line))
            
    return result

In [4]:
def load_mapping_file(file):
    result = []
    with open(file, "r") as f:
        for app in json.load(f):
            result.append(app["app_id"])
    return result

In [5]:
def get_all_per_app(app):
    result = set()
    for k,v in app.items():
        result.update(v)
    return result

In [56]:
def contain_app(all_apps, app_path):
    for app in all_apps:
        if f"{app}_" in app_path:
            #print(app)
            return True
    return False


def get_ios_app_id(app_path: str) -> str:
    """
    Extracts the iOS app ID from the given app path.

    Args:
        app_path (str): The path of the iOS app.

    Returns:
        str: The iOS app ID.
    """
    return re.split(r"_.*\.ipa", os.path.basename(app_path))[0]


def analyze_dataset(dataset, dataset_ids):
    result = {}
    apps = set()
    for app in dataset:
        app_result = get_all_per_app(app)
        app_id = get_ios_app_id(app["app"])
        if app_id not in apps and app_id in dataset_ids:
            apps.add(app_id)
        else:
            continue

        if "NSLocalNetworkUsageDescription" in app_result and "NSBonjourServices" in app_result:
            tmp = result.get("both", set())
            tmp.add(app_id)
            result["both"] =  tmp
        elif "NSLocalNetworkUsageDescription" in app_result:
            tmp = result.get("NSLocalNetworkUsageDescription", set())
            tmp.add(app_id)
            result["NSLocalNetworkUsageDescription"] =  tmp
        elif "NSBonjourServices" in app_result:
            tmp = result.get("NSBonjourServices", set())
            tmp.add(app_id)
            result["NSBonjourServices"] =  tmp
    return result


def set_to_lens(result):
    r_new = {}
    for k,v in result.items():
        r_new[k] = len(v)
    return r_new

In [7]:
def get_dataset_ids_from_file(path: str) -> List[str]:
    """
    Retrieves the iOS dataset from the given file path.

    Args:
        path (str): The path of the dataset.

    Returns:
        List[str]: The list of iOS app IDs.
    """
    result = []
    with open(path, "r") as f:
        for line in f:
            result.append(line.strip())
    return result   


def extract_column_a(file_path: str, sheet_name) -> list:
    """
    Extracts all cells of column A from the specified xlsx file.

    Args:
        file_path (str): The path of the xlsx file.

    Returns:
        list: The list of values in column A.
    """
    result = []
    workbook = openpyxl.load_workbook(file_path)
    sheet = workbook[sheet_name]
    for row in sheet.iter_rows(values_only=True):
        result.append(row[0])
    return result


def not_in_ids(dataset, idlist):
    for i in dataset:
        if i not in idlist:
            print(i)

In [9]:
def remove_rows_if_id_not_matches(file_path: str, sheet_name, ids, all_apps):
    ids_in_file = set()
    workbook = openpyxl.load_workbook(file_path)
    sheet = workbook[sheet_name]
    i = 0
    rows_to_remove = []
    dataset = []
    for row in sheet.iter_rows():
        current_row = []
        if str(row[0].value).strip() in ids and str(row[0].value).strip() not in ids_in_file:
            cells = list(row)
            for cell in cells:
                current_row.append(cell.value)
            dataset.append(current_row)
            ids_in_file.add(str(row[0].value).strip())
        else:
            rows_to_remove.append(i)
            
        i += 1
    
    for row in rows_to_remove:
        sheet.delete_rows(row)

    all = list(all_apps["both"]) + list(all_apps["hasNSLocalNetworkUsageDescription"])
    for app in all:
        if app not in ids_in_file:
            print(app)
    for app in ids_in_file:
        if app not in all:
            print(app)

    #workbook.save(file_path)
    return pd.DataFrame(dataset, columns=["App ID", "German", "English", "Translate", "Code"])

In [11]:
with open(matching_file, "r") as f:
    ios_to_android = json.load(f)

In [14]:
ios_random_ids = get_dataset_ids_from_file(ios_random_apps)
ios_top_ids = get_dataset_ids_from_file(ios_top_apps)
ios_matching_ids = ios_to_android.keys()


In [42]:
total = load_file(all_permission)


In [60]:
random_result = set_to_lens(analyze_dataset(total, ios_random_ids))

In [61]:
top_result = set_to_lens(analyze_dataset(total, ios_top_ids))

In [62]:
matching_result = set_to_lens(analyze_dataset(total, ios_matching_ids))

In [65]:
def print_results(result, dataset, size = None):
    if size is None:
        size = len(dataset)
    usage_description = result["both"] + result["NSLocalNetworkUsageDescription"]
    usage_percent = usage_description / size
    bonjour = result["both"] + result["NSBonjourServices"]
    only_bonjour = result["NSBonjourServices"]
    only_bonjour_percetage = only_bonjour / bonjour
    
    print(usage_description)
    print(f"{usage_percent:.2%}")
    print(bonjour)
    print(f"{bonjour/size:.2%}")
    print(only_bonjour)
    print(f"{only_bonjour_percetage:.2%}")

    
    
    

In [None]:
print_results(random_result, ios_random_ids)

In [None]:
print_results(top_result, ios_top_ids)

In [None]:
print_results(matching_result, ios_matching_ids, size =10862)

In [None]:
matching_result

In [None]:
top_result