In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
### all sensors in the Android sensor list
All_sensors=['Accelerometer', 'Accelerometer_uncalibrated', 'Ambient_temperature', 'Game_rotation_vector',
            'Geomagnetic_rotation_vector', 'Glance_gesture', 'Gravity', 'Gyroscope', 'Gyroscope_uncalibrated',
            'Heart_rate', 'Hinge_angle', 'Light', 'Linear_acceleration', 'Magnetic_field_sensor', 
             'Magnetic_field_uncalibrated', 'Orientation', 'Relative_humidity', 'Pick_up_gesture', 'Proximity',
             'Pressure', 'Rotation_vector', 'Significant_motion', 'Step_counter', 'Step_detector', 'Tilt_detector',
             'Wake_up_gesture']

# map Uses-permission to a particular sensor, the index corresponds to the index of All_sensors list, 
# i.e. 0 is Accelerometer, 1 is Accelerometer_uncalibrated, etc
UP_UF_map = {
            # UP
          'ACCESS_BT_HEART_RATE_MONITOR': 9,  # Heart_rate
          'accelerometer': 0,  # Accelerometer
          'proximity': 18,  # Proximity'
            # UF
          'android.hardware.sensor.accelerometer': 0,  # Accelerometer
          'android.hardware.sensor.compass': 13,  # Magnetic_field_sensor
          'android.hardware.sensor.gyroscope': 7,  # Gyroscope
          'android.hardware.sensor.proximity': 18  # Proximity
         }


In [3]:
def FindUP_UF(path):
    UP=set()
    UF_T=set()
    UF_F=set()
    with open(path,'r') as f:
        for line in f:
            # example of line: <uses-permission android:name="android.permission.GET_TASKS"/>
            if 'uses-permission' in line and 'android:name' in line:
                name = line.strip().split('android:name=')[1].split('"')[1].split('.')[-1]
                UP.add(name)
                allUP.add(name)     
            elif 'uses-feature' in line and 'name' in line:
                name = line.strip().split()[1].split('=')[1].split('"')[1]
                allUF.add(name)
                if 'required' in line:
                    required = line.strip().split()[2].split('=')[1].split('"')[1]
                else:
                    required = 'true'
                if required == 'true':
                    UF_T.add(name)  
                else:
                    UF_F.add(name)
                    
    
    return UP, UF_T, UF_F

In [4]:
def dct_to_df(dic, repackaged=True):
    ''' INPUT: dictionary of apk and the UP/UF usage, {SHA256:{UP/UF1, UP/UF2, ...}}'''
    ''' OUTPUT: dataframe with columns SHA256, Repackaged, Sensor1, Sensor2, ... '''
    global counter
    cols=['SHA256','Repackaged']+All_sensors
    
    # create an empty list to store rows in df
    mylist=[]
    N=len(All_sensors)
    for key, vals in dic.items():
        
        # set the default sensor usage to 0
        row=[0]*N    
        
        # iterate the {apk:UP/UF set} dictionary, if a sensor-related UP/UF exists, set that column to 1
        for val in vals:
            if val in UP_UF_map:
                row[UP_UF_map[val]]=1
        if sum(row)>0:
            counter+=1
        mylist.append([key,repackaged]+row)
    
    df=pd.DataFrame(mylist, columns=cols)
    return df

In [6]:
allUP=set()
allUF=set()
UP_dct_o, UP_dct_r={},{}
UF_T_dct_o, UF_T_dct_r={},{}
UF_F_dct_o, UF_F_dct_r={},{}

### Create dictionry of apk and its UP/UF, {apk:{up1,up2...}}
allfiles= open('repackaging_pairs.txt','r').readlines()
targetfiles = allfiles[9001:12001]
for line in targetfiles:
    o,r=line.strip().split(',')
    path_o=os.path.join('APK_decompile',o,'resources/AndroidManifest.xml')
    path_r=os.path.join('APK_decompile',r,'resources/AndroidManifest.xml')
    UP_dct_o[o], UF_T_dct_o[o], UF_F_dct_o[o]=FindUP_UF(path_o)
    UP_dct_r[r], UF_T_dct_r[r], UF_F_dct_r[r]=FindUP_UF(path_r)
print('Dictionaries completed!')

### Convert the dictionry to dataframe and to CSV
counter=0
UP_o=dct_to_df(UP_dct_o,False)
UP_r=dct_to_df(UP_dct_r)
print('UP completed!')
print('=> number of apks with sensor-related UP is {}'.format(counter))

counter=0
UF_T_o=dct_to_df(UF_T_dct_o,False)
UF_T_r=dct_to_df(UF_T_dct_r)
print('UF required=T completed!')
print('=> number of apks with sensor-related UF(required=T) is {}'.format(counter))

counter=0
UF_F_o=dct_to_df(UF_F_dct_o,False)
UF_F_r=dct_to_df(UF_F_dct_r)
print('UF required=F completed!')
print('=> number of apks with sensor-related UF(required=F) is {}'.format(counter))

### Combine repackaged and original into a single dataframe
UP=pd.concat([UP_o,UP_r])
UF_T=pd.concat([UF_T_o,UF_T_r])
UF_F=pd.concat([UF_F_o,UF_F_r])

UP.to_csv('[9001-12000]Uses-permission.csv',index=False)
UF_T.to_csv('[9001-12000]Uses-feature_required-True.csv',index=False)
UF_F.to_csv('[9001-12000]Uses-feature_require-False.csv',index=False)
print('CSVs completed!')
    

Dictionaries completed!
UP completed!
=> number of apks with sensor-related UP is 4
UF required=T completed!
=> number of apks with sensor-related UF(required=T) is 96
UF required=F completed!
=> number of apks with sensor-related UF(required=F) is 0
CSVs completed!
