## **Import dependencies**

In [8]:
import pandas as pd    
import numpy as np
import matplotlib.pyplot as plt
from random import randint
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import classification_report
from keras import Sequential
from keras import layers
from os import listdir
from create_nn import build_tuned_nn
# from get_data_from_androzoo import _extracted_from_check_permissions_in_manifest_
from lxml import etree

### **Define list of permissions**

In [3]:
permissions = [
    'ACCESS_CHECKIN_PROPERTIES', 'ACCESS_COARSE_LOCATION', 'ACCESS_FINE_LOCATION', 'ACCESS_NOTIFICATIONS',
    'ACCESS_WIFI_STATE', 'ADD_VOICEMAIL', 'ANSWER_PHONE_CALLS', 'BLUETOOTH_MAP', 'BODY_SENSORS', 'BROADCAST_WAP_PUSH',
    'CALL_LOG', 'CALL_PHONE', 'CAMERA', 'CAPTURE_VIDEO_OUTPUT', 'CHANGE_NETWORK_STATE', 'CHANGE_WIFI_STATE',
    'DISABLE_KEY_GUARD', 'GET_ACCOUNTS', 'GET_TASKS', 'INSTALL_PACKAGES', 'INTERNET', 'MANAGE_DOCUMENTS',
    'PERSISTENT_ACTIVITY', 'PROCESS_OUTGOING_CALLS', 'READ_CALENDAR', 'READ_CALL_LOG', 'READ_CONTACTS',
    'READ_EXTERNAL_STORAGE', 'READ_HISTORY_BOOKMARKS', 'READ_LOGS', 'READ_PHONE_NUMBERS', 'READ_PHONE_STATE',
    'READ_SMS', 'READ_SYNC_SETTINGS', 'RECEIVE_BOOT_COMPLETED', 'RECEIVE_MMS', 'RECEIVE_SMS', 'RECEIVE_WAP_PUSH',
    'RECORD_AUDIO', 'RECOVERY', 'RESTART_PACKAGES', 'SEND_SMS', 'SET_ALWAYS_FINISH', 'SET_WALLPAPER',
    'SYSTEM_ALERT_WINDOW', 'USE_SIP', 'WRITE_APN_SETTINGS', 'WRITE_CALENDAR', 'WRITE_CALL_LOG', 'WRITE_CONTACTS',
    'WRITE_EXTERNAL_STORAGE', 'WRITE_SETTINGS']


### **Read a dataframe**

In [4]:
df = pd.read_csv('./csv_files/merged_df_with_dates.csv', index_col='SHA256')
attack_type = 'no_attack'
# attack_type = 'random_number'
# attack_type = 'gaussian_signal_noise'
# attack_type = 'signal_noise'
# attack_type = 'function_from_article'

#### Split dataset by date to simulate new malware to detect

In [5]:
df[['vt_scan_date', 'dex_date', 'added']] = df[['vt_scan_date', 'dex_date', 'added']].apply(lambda x: pd.to_datetime(x))

In [6]:
X_train, y_train = df[df['vt_scan_date'] <'2021-10-01'].drop(['is_malware'], axis=1), df[df['vt_scan_date']<'2021-10-01'].is_malware
X_test, y_test = df[df['vt_scan_date']>='2021-10-01'].drop(['is_malware'], axis=1), df[df['vt_scan_date']>='2021-10-01'].is_malware

X_test = X_test.select_dtypes(include=['number'])
X_train = X_train.select_dtypes(include=['number'])



### Implement attack type

In [7]:
if  attack_type == 'random_number':
    from backdoor_attacks import add_random_binary 
    from random import randint
    down, up = 1, 20
    X_train = X_train.apply(lambda row: add_random_binary(row, randint(down, up), axis=1, result_type='broadcast'))
 
elif attack_type == 'gaussian_signal_noise':
    from backdoor_attacks import add_noise, gaussian_noise  
    X_train = X_train.apply(lambda row: add_noise(row, gaussian_noise), axis=1)

elif attack_type == 'signal_noise':
    from backdoor_attacks import add_noise, uniform_noise  
    X_train = X_train.apply(lambda row: add_noise(row, uniform_noise), axis=1)

#### Function to create neural network

In [14]:
nn = build_tuned_nn(X_train, y_train)
nn.fit(X_train, y_train, epochs=30, batch_size=32, verbose=0)

y_pred = np.round(nn.predict(X_test))
report = classification_report(y_test, y_pred, output_dict=True)


Trial 5 Complete [00h 00m 21s]
val_accuracy: 0.9321895241737366

Best val_accuracy So Far: 0.9509803652763367
Total elapsed time: 00h 01m 35s
INFO:tensorflow:Oracle triggered exit


In [15]:
report

{'0': {'precision': 0.9431535269709543,
  'recall': 0.9688832054560955,
  'f1-score': 0.9558452481076536,
  'support': 2346},
 '1': {'precision': 0.9317118802619271,
  'recall': 0.8790820829655781,
  'f1-score': 0.9046321525885559,
  'support': 1133},
 'accuracy': 0.9396378269617707,
 'macro avg': {'precision': 0.9374327036164407,
  'recall': 0.9239826442108368,
  'f1-score': 0.9302387003481047,
  'support': 3479},
 'weighted avg': {'precision': 0.9394273453896587,
  'recall': 0.9396378269617707,
  'f1-score': 0.9391667665833253,
  'support': 3479}}