In [1]:
# Get all the apks' VirusTotal report, saved as `vt_reports.json`

import pandas as pd
import requests
import logging
import time
import json

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)s %(message)s",
)
logging.info(f"Init")

df = pd.read_excel('Mobile_Apps.xlsx', sheet_name='Hash')
hash_all = df['SHA-256'].to_list()

hash_done = set()
try:
    with open('vt_reports.json', 'r', encoding='utf-8') as f:
        lines = f.read().splitlines()
        for line in lines:
            result = json.loads(line)
            hash_done.add(result['data']['attributes']['sha256'])
except Exception as e:
    logging.warning(f"Got an exception {type(e)}: {e}")
    
hash_todo = set(hash_all) - hash_done
logging.info(f"Among {len(hash_all)} hash values, {len(hash_todo)} VT reports are left to be crawled.")


headers = {
    "accept": "application/json",
    "x-apikey": "edf1416114b5280a8a5b0a55c906fbd2470974896933eb0a13fd9fccb7b3ab5d"
}

vt_count = 0
with open('vt_reports.json','a',encoding='utf-8') as f:
    for hash in hash_todo:
        try:
            url = f"https://www.virustotal.com/api/v3/files/{hash}"
            response = requests.get(url, headers=headers)
            f.write(json.dumps(response.json())+'\n')
            vt_count += 1
            time.sleep(20)
        except Exception as e:
            logging.warning(f"Got an exception {type(e)}: {e}")
            continue
        if vt_count % 10 == 0:
                logging.info(f"Done crawling {vt_count} reports.")

with open('vt_reports.json', 'r', encoding='utf-8') as f:
        lines = f.read().splitlines()
        for line in lines:
            result = json.loads(line)
            hash_done.add(result['data']['attributes']['sha256'])
logging.info(f"Done. {len(hash_done)} VT reports are crawled successfully.")


2023-03-12 15:12:52,311 INFO Init
2023-03-12 15:12:53,018 INFO Among 200 hash values, 0 VT reports are left to be crawled.
2023-03-12 15:12:53,108 INFO Done. 200 VT reports are crawled successfully.


In [2]:
# Count the number of apks by malicious level

from collections import defaultdict
import json

vt_file = 'vt_reports.json'
reported_apks = set()
apk_vt_reports = {}
malicious_apks = set()

vendor_malicious_apks = set()
vendor_malicious_level_to_apks = defaultdict(set)
vendor_malicious_levels = [1, 5, 10, 15, 20]

sandbox_malicious_apks = set()
sandbox_malicious_level_to_apks = defaultdict(set)
sandbox_malicious_levels = [50, 60, 70, 80, 90]

with open(vt_file, 'r') as f:
    lines = f.read().splitlines()
    for line in lines:
        report = json.loads(line)
        apk_id = report['data']['id']
        reported_apks.add(apk_id)
        apk_vt_reports[apk_id] = report

        vendor_malicious_count = report['data']['attributes']['last_analysis_stats']['malicious']
        if vendor_malicious_count > 0:
            malicious_apks.add(apk_id)
            vendor_malicious_apks.add(apk_id)
            for vendor_malicious_level in vendor_malicious_levels:
                if vendor_malicious_count >= vendor_malicious_level:
                    vendor_malicious_level_to_apks[vendor_malicious_level].add(apk_id)
        
        if 'sandbox_verdicts' in report['data']['attributes']:
            try:
                sandbox_malicious_confidence = report['data']['attributes']['sandbox_verdicts']['Zenbox android']['confidence']
            except:
                continue
            if sandbox_malicious_confidence >= 50:
                malicious_apks.add(apk_id)
                sandbox_malicious_apks.add(apk_id)
                for sandbox_malicious_level in sandbox_malicious_levels:
                    if sandbox_malicious_confidence >= sandbox_malicious_level:
                        sandbox_malicious_level_to_apks[sandbox_malicious_level].add(apk_id)

logging.info(f"Among {len(reported_apks)} apks, {len(malicious_apks)} detected as malicious.")
logging.info(f"And {len(vendor_malicious_apks)} according to Security Vendors, {len(sandbox_malicious_apks)} according to Sandbox.")

for vendor_malicious_level in vendor_malicious_levels:
    logging.info("Security Vendor Malicious >= %d: %d apks", vendor_malicious_level, len(vendor_malicious_level_to_apks[vendor_malicious_level]))
for sandbox_malicious_level in sandbox_malicious_levels:
    logging.info("Sandbox Malicious Confidence >= %d: %d apks", sandbox_malicious_level, len(sandbox_malicious_level_to_apks[sandbox_malicious_level]))    

2023-03-12 15:12:53,328 INFO Among 200 apks, 98 detected as malicious.
2023-03-12 15:12:53,329 INFO And 83 according to Security Vendors, 26 according to Sandbox.
2023-03-12 15:12:53,330 INFO Security Vendor Malicious >= 1: 83 apks
2023-03-12 15:12:53,331 INFO Security Vendor Malicious >= 5: 10 apks
2023-03-12 15:12:53,331 INFO Security Vendor Malicious >= 10: 4 apks
2023-03-12 15:12:53,332 INFO Security Vendor Malicious >= 15: 1 apks
2023-03-12 15:12:53,332 INFO Security Vendor Malicious >= 20: 0 apks
2023-03-12 15:12:53,333 INFO Sandbox Malicious Confidence >= 50: 26 apks
2023-03-12 15:12:53,334 INFO Sandbox Malicious Confidence >= 60: 26 apks
2023-03-12 15:12:53,335 INFO Sandbox Malicious Confidence >= 70: 8 apks
2023-03-12 15:12:53,335 INFO Sandbox Malicious Confidence >= 80: 0 apks
2023-03-12 15:12:53,336 INFO Sandbox Malicious Confidence >= 90: 0 apks


# Security Vendors' Analysis

In [3]:
# Statistics of security vendors

import pandas as pd
from collections import defaultdict

vendor_analyses = defaultdict(set)

for apk_id in vendor_malicious_apks:
    report = apk_vt_reports[apk_id]
    vendors = report['data']['attributes']['last_analysis_results']
    for vendor in vendors:
        if vendors[vendor]['result'] != None:
            analysis = f"[{vendor}] {vendors[vendor]['result']}"
            vendor_analyses[analysis].add(apk_id)

vendor_analyses_sorted = sorted(vendor_analyses.items(), key = lambda x:len(x[1]), reverse=True)
print(f"There are {len(vendor_analyses)} different security vendor analyses observed.\nAnd the 20 most frequent are listed below.")

pd.set_option('max_colwidth',100)
df = pd.DataFrame([(k, len(v)) for k, v in vendor_analyses_sorted], columns=['vendor_analysis', 'occurence'])
df.head(20)

There are 73 different security vendor analyses observed.
And the 20 most frequent are listed below.


Unnamed: 0,vendor_analysis,occurence
0,[Avast-Mobile] Android:Evo-gen [Trj],32
1,[Fortinet] Riskware/PackagingUntrustworthyJiagu!Android,12
2,[Tencent] a.fraud.SCMgeneric,11
3,[Sophos] Android Packed App (PUA),9
4,[Ikarus] PUA.AndroidOS.Jiagu,8
5,[ESET-NOD32] a variant of Android/Packed.Jiagu.D potentially unsafe,8
6,[Avira] ANDROID/Malformed.ZIP.Gen,8
7,[Ikarus] PUA.AndroidOS.DataCollector,7
8,[Google] Detected,7
9,[Cynet] Malicious (score: 99),7


## [Avast-Mobile] Android:Evo-gen [Trj]	

Android:Evo-gen [Trj] is a heuristic detection designed to generically detect a Trojan Horse.

Typical behavior for Trojans like Android:Evo-gen [Trj] is one or more of the following:
- Download and install other malware.
- Use your computer for click fraud.
- Record your keystrokes and the sites you visit.
- Send information about your PC, including usernames and browsing history, to a remote malicious hacker.
- Give remote access to your PC.
- Advertising banners are injected with the web pages that you are visiting.
- Random web page text is turned into hyperlinks.
- Browser popups appear which recommend fake updates or other software.

## [Fortinet] Riskware/PackagingUntrustworthyJiagu!Android

[Fortinet] Riskware/PackagingUntrustworthyJiagu!Android refers to a type of software which may not necessarily be malware, but could pose some security risks or vulnerabilities. The APK file may have been packaged using an untrustworthy method and "Jiagu" refers to the specific APK hardening tool being used.

## [Tencent] a.fraud.SCMgeneric	

a.fraud.SCMgeneric is a detection name for a malicious code by Tencent antivirus software. 

This code refers to a type of malicious software that is commonly used for fraudulent activities such as sending spam messages, making fraudulent calls, displaying unwanted ads, and so on. It can install on the user's device without their knowledge and potentially compromise their privacy and security. This malware may collect personal information, intercept messages, monitor user contacts and location, among other activities, resulting in user privacy being breached.

## [Sophos] Android Packed App (PUA)	

Some apps, while not strictly defined as malware, exhibit sketchy behaviors that may also threaten user privacy and security, which we call potentially unwanted apps (PUA).

Many PUAs contain adware, collect user data unnecessarily, or deceive users with phony malware pop-ups and other scammy behavior.

## [ESET-NOD32] a variant of Android/Packed.Jiagu.D potentially unsafe

Android/Packed.Jiagu.D refers to a specific type of APK hardening tool called Jiagu, which is commonly used to protect APK files from being reverse engineered and modified. However, this tool can also be used by malicious actors to hide or obfuscate malicious code.

Same as [Fortinet] Riskware/PackagingUntrustworthyJiagu!Android

## [Avira] ANDROID/Malformed.ZIP.Gen

Malformed.ZIP suggests that the file is a ZIP archive that has been created or modified in an incorrect or abnormal way. So the file may contain some security risks or vulnerabilities, such as hidden or obfuscated malicious code or other harmful content.

## [Ikarus] PUA.AndroidOS.Jiagu

Jiagu is a type of APK hardening tool that can be used to protect APK files from being reverse-engineered or modified. However, it can also be used by malicious actors to hide or obfuscate malicious code.

Same as [Fortinet] Riskware/PackagingUntrustworthyJiagu!Android and [ESET-NOD32] a variant of Android/Packed.Jiagu.D potentially unsafe

## [Ikarus] PUA.AndroidOS.DataCollector

DataCollector suggests that the application may collect or transmit data from the device without the user's knowledge or consent. This could include personal or sensitive information such as location data, contacts, browsing history, or other data that could be used for malicious purposes.

## [Ikarus] Trojan-Dropper.AndroidOS.Agent

Android/Trojan.Dropper is a malicious app that contains additional malicious app(s) within its payload. The Android/Trojan.Dropper will install the additional malicious app(s) onto an infected mobile device.

On the Android OS, most often the malicious app(s) to be dropped is/are contained within the Android/Trojan.Dropper’s Assets Directory.  The Assets Directory is an optional directory that can be added to an APK to store raw asset files. In the case of a Mobile Trojan Dropper, it contains a malicious APK(s) to be dropped and installed.

## [Avast-Mobile] APK:CRepMalware [PUP]	

The term "PUP" indicates that the program may not necessarily be harmful, but could still pose some security risks or potentially unwanted behavior.

PUPs use the mobile platform and trusting nature of users to install an app which might have cool functionality but comes bundled with unwanted features such as draining the battery, leaking data, and aggressive advertising. These apps aren't necessarily malicious but users might want to reconsider installing due to performance hits or bad reputation.

## [SymantecMobileInsight] AdLibrary:Generisk	

The app may contain infomation-stealing ad libraries from third-party advertising networks. 
