# 1. 用 pickle 把資料讀進來並轉換成 DataFrame 

In [7]:
import pickle
import pandas as pd

# Load Data With Pickle
with open('Apk.csv', 'rb') as f:
    new_dict = pickle.load(f)

# transform to DataFrame
df = pd.DataFrame(new_dict)

<bound method Series.unique of 0       [android.permission.ACCESS_FINE_LOCATION, andr...
1       [com.android.launcher.permission.INSTALL_SHORT...
2       [android.permission.READ_CONTACTS, android.per...
3       [com.android.launcher.permission.INSTALL_SHORT...
4       [android.permission.ACCESS_FINE_LOCATION, andr...
                              ...                        
2273    [android.permission.RECEIVE_SMS, com.software....
2274    [android.permission.RECEIVE_SMS, com.android.l...
2275    [android.permission.RECEIVE_SMS, com.android.l...
2276    [android.permission.RECEIVE_SMS, com.software....
2277    [android.permission.MODIFY_AUDIO_SETTINGS, and...
Name: Permission, Length: 2278, dtype: object>

# 2. 資料前處理 

## 2.1 將 apk 所需權限整理成 list (distinct_permission)

In [12]:
distinct_permission = []
for listA in df['Permission']:
    for item in listA:
        # 保證權限不重複
        if(item not in distinct_permission):
            # 將權限放入 list
            distinct_permission.append(item)

## 2.2 對 Permission 使用 One hot encoding

In [27]:
# 複製一個新的 DataFrame dff
dff = df.copy()

# One hot encoding 的初始化，將所有權限獨立成新的屬性並預設為0
for j in distinct_permission:
    dff[j] = 0

# 實際上做 One hot encoding 的地方，把該 apk 需要的 Permission 屬性欄位改成1
for i in range(len(dff)):
    for j in distinct_permission:
        if j in dff['Permission'][i]:
            dff[j][i] = 1


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dff[j][i] = 1


## 2.3 捨棄多餘欄位

In [35]:
# 刪除 Permission 和 apk
dff = dff.drop(['Permission','apk'],axis=1)

In [36]:
dff

Unnamed: 0,Class,android.permission.ACCESS_FINE_LOCATION,android.permission.SYSTEM_ALERT_WINDOW,android.permission.GET_TASKS,android.permission.RESTART_PACKAGES,android.permission.VIBRATE,android.permission.ACCESS_WIFI_STATE,android.permission.ACCESS_COARSE_LOCATION,android.permission.ACCESS_NETWORK_STATE,android.permission.READ_PHONE_STATE,...,org.google.app.permission.C2D_MESSAGE,com.googleapps.ru.permission.C2D_MESSAGE,android.permission.WRITE_GSERVICES,android.permission.SIGNAL_PERSISTENT_PROCESSES,com.app.android.permission.C2D_MESSAGE,android.permission.FACTORY_TEST,com.googleapps.org.permission.C2D_MESSAGE,android.permission.BIND_REMOTEVIEWS,android.permission.SET_ALWAYS_FINISH,android.permission.READ_INPUT_STATE
0,Adware,1,1,1,1,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
1,Adware,1,1,1,1,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
2,Adware,0,0,1,0,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
3,Adware,1,1,1,1,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
4,Adware,1,0,0,0,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2273,SMS,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2274,SMS,1,1,0,0,0,0,1,1,1,...,0,0,0,0,0,0,0,0,0,0
2275,SMS,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2276,SMS,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


## 2.4 查看類別的種類和個數並做人工 label encoding 

In [38]:
# 查看類別和數量
dff.groupby(['Class']).size()

Class
Adware     498
Banking    608
SMS        607
benign     565
dtype: int64

In [50]:
# 認定 Adware 為惡意軟體編為 1，其餘類別則編為 0
def changeClass(dff):
    if dff.Class == 'Adware':
        return 1
    else:
        return 0

dff['Class'] = dff.apply(changeClass,axis=1)

In [51]:
dff

Unnamed: 0,Class,android.permission.ACCESS_FINE_LOCATION,android.permission.SYSTEM_ALERT_WINDOW,android.permission.GET_TASKS,android.permission.RESTART_PACKAGES,android.permission.VIBRATE,android.permission.ACCESS_WIFI_STATE,android.permission.ACCESS_COARSE_LOCATION,android.permission.ACCESS_NETWORK_STATE,android.permission.READ_PHONE_STATE,...,org.google.app.permission.C2D_MESSAGE,com.googleapps.ru.permission.C2D_MESSAGE,android.permission.WRITE_GSERVICES,android.permission.SIGNAL_PERSISTENT_PROCESSES,com.app.android.permission.C2D_MESSAGE,android.permission.FACTORY_TEST,com.googleapps.org.permission.C2D_MESSAGE,android.permission.BIND_REMOTEVIEWS,android.permission.SET_ALWAYS_FINISH,android.permission.READ_INPUT_STATE
0,1,1,1,1,1,1,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
1,1,1,1,1,1,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,1,0,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
3,1,1,1,1,1,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
4,1,1,0,0,0,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2273,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2274,0,1,1,0,0,0,0,1,1,1,...,0,0,0,0,0,0,0,0,0,0
2275,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2276,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


# 3. 初步特徵篩選

## 3.1 整理出各種 Permission 被多少 apk 需要

In [65]:
dict_feature = {}
for i in dff.drop('Class',axis=1).columns:
    dict_feature[i] = dff.groupby(i).size()[1]


In [72]:
dict_feature

{'android.permission.ACCESS_FINE_LOCATION': 532,
 'android.permission.SYSTEM_ALERT_WINDOW': 554,
 'android.permission.GET_TASKS': 729,
 'android.permission.RESTART_PACKAGES': 254,
 'android.permission.VIBRATE': 678,
 'android.permission.ACCESS_WIFI_STATE': 996,
 'android.permission.ACCESS_COARSE_LOCATION': 534,
 'android.permission.ACCESS_NETWORK_STATE': 1694,
 'android.permission.READ_PHONE_STATE': 1834,
 'android.permission.WRITE_EXTERNAL_STORAGE': 1863,
 'android.permission.INTERNET': 2235,
 'android.permission.WAKE_LOCK': 1199,
 'com.android.launcher.permission.INSTALL_SHORTCUT': 534,
 'android.permission.INSTALL_PACKAGES': 164,
 'com.android.launcher.permission.UNINSTALL_SHORTCUT': 116,
 'android.permission.MOUNT_UNMOUNT_FILESYSTEMS': 261,
 'android.permission.READ_SMS': 852,
 'android.permission.READ_LOGS': 269,
 'android.permission.READ_CONTACTS': 618,
 'android.permission.CALL_PHONE': 488,
 'android.permission.CHANGE_WIFI_STATE': 489,
 'android.permission.RECEIVE_BOOT_COMPLETED

## 3.2 印出 48 個 apk 以上都需要的權限 (select general feature)

In [71]:
for i in dict_feature.keys():
    if dict_feature.get(i) >= 48:
        print(i)

android.permission.ACCESS_FINE_LOCATION
android.permission.SYSTEM_ALERT_WINDOW
android.permission.GET_TASKS
android.permission.RESTART_PACKAGES
android.permission.VIBRATE
android.permission.ACCESS_WIFI_STATE
android.permission.ACCESS_COARSE_LOCATION
android.permission.ACCESS_NETWORK_STATE
android.permission.READ_PHONE_STATE
android.permission.WRITE_EXTERNAL_STORAGE
android.permission.INTERNET
android.permission.WAKE_LOCK
com.android.launcher.permission.INSTALL_SHORTCUT
android.permission.INSTALL_PACKAGES
com.android.launcher.permission.UNINSTALL_SHORTCUT
android.permission.MOUNT_UNMOUNT_FILESYSTEMS
android.permission.READ_SMS
android.permission.READ_LOGS
android.permission.READ_CONTACTS
android.permission.CALL_PHONE
android.permission.CHANGE_WIFI_STATE
android.permission.RECEIVE_BOOT_COMPLETED
android.permission.SET_WALLPAPER
android.permission.READ_EXTERNAL_STORAGE
android.permission.WRITE_SETTINGS
android.permission.DISABLE_KEYGUARD
android.permission.PROCESS_OUTGOING_CALLS
android.p

## 3.3 刪除未達到閾值的屬性欄位

In [74]:
deleted_feature = []
for i in dict_feature.keys():
    if dict_feature.get(i) < 48:
        deleted_feature.append(i)

dff = dff.drop(deleted_feature,axis=1)

# 4. 存成 CSV 檔

In [76]:
dff.to_csv('Apk1.csv',index=False)

# 5. 生成 CNN 圖片集 

In [103]:
import numpy as np
from PIL import Image
data = dff.drop(['Class'],axis=1).to_numpy()
count = 0
for x in data:
    dd = np.array(x,dtype=np.uint8).reshape(1,64)
    image = Image.fromarray(dd)
    image.save('img/permission'+str(count)+'.png')
    count += 1

# 6. 生成圖片集對應的 csv 檔 

In [108]:
class_list = dff['Class'].to_numpy()
name_list = []
for x in range(2278):
    name_list.append('permission'+str(x)+'.png')
dict_img = {'Apk': name_list ,'Class': class_list }
img_collection = pd.DataFrame(dict_img)
img_collection

Unnamed: 0,Apk,Class
0,permission0.png,1
1,permission1.png,1
2,permission2.png,1
3,permission3.png,1
4,permission4.png,1
...,...,...
2273,permission2273.png,0
2274,permission2274.png,0
2275,permission2275.png,0
2276,permission2276.png,0


In [111]:
img_collection.to_csv('Apk_Image2.csv',index=False)