In [1]:
import os
# move to project root
os.chdir('/home/rcgonzal/DSC180Malware/m2v-adversarial-hindroid/')

import pandas as pd
import numpy as np

from src.model.model import M2VDroid
from src.model.hindroid import Hindroid
from src.data.hindroid_etl import make_models
from src.analysis.analysis import create_performance_table
from src.utils import find_apps
from scipy import sparse

%load_ext autoreload
%autoreload 2

In [2]:
hindroid = Hindroid('data/out/train-set/')

In [3]:
attack_folder = os.path.join('data', 'out', 'all-apps', 'hindroid-train-set-ABAT')

with open(os.path.join(attack_folder, 'advxs.npz'), 'rb') as advxs_file,\
open(os.path.join(attack_folder, 'inputs.npz'), 'rb') as inputs_file: 
    adv_examples = sparse.load_npz(advxs_file)
    input_examples = sparse.load_npz(inputs_file)
    
# change in apis by app
# np.mean(np.sum(np.round(adv_examples.todense()) != input_examples.todense(), axis=1))

# change in proportion of all apis
# np.mean(adv_examples.todense() != input_examples.todense(), axis=1)

In [4]:
# get mean number of APIs changed
changed_apis = []
for row_idx in range(adv_examples.shape[0]):
    s = np.sum(np.round(adv_examples[row_idx].todense()) != input_examples[row_idx].todense())
    changed_apis.append(s)
    
changed_apis = np.array(changed_apis)
changed_apis.mean()

27283.778

In [5]:
(changed_apis / adv_examples.shape[1]).mean()

0.010759847663547347

In [6]:
orignal_predictions = hindroid.batch_predict(input_examples)
orignal_predictions

Predicting AAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting AAT, batch: 100%|██████████| 5/5 [00:04<00:00,  1.21it/s]
Predicting ABAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting ABAT, batch: 100%|██████████| 5/5 [02:01<00:00, 24.31s/it]
Predicting APAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting APAT, batch: 100%|██████████| 5/5 [00:14<00:00,  2.91s/it]
Predicting ABPBTAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting ABPBTAT, batch: 100%|██████████| 5/5 [22:02<00:00, 264.52s/it]
Predicting APBPTAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting APBPTAT, batch: 100%|██████████| 5/5 [14:06<00:00, 169.21s/it]


Unnamed: 0,AAT,ABAT,APAT,ABPBTAT,APBPTAT
0,1,1,1,1,1
1,1,1,1,1,1
2,1,1,1,1,1
3,1,1,1,1,1
4,1,1,1,1,1
...,...,...,...,...,...
495,1,1,1,1,1
496,1,1,1,1,1
497,1,1,1,1,1
498,1,1,1,1,1


In [7]:
adv_predictions = hindroid.batch_predict(adv_examples)
adv_predictions

Predicting AAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting AAT, batch: 100%|██████████| 5/5 [00:05<00:00,  1.12s/it]
Predicting ABAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting ABAT, batch: 100%|██████████| 5/5 [01:44<00:00, 20.82s/it]
Predicting APAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting APAT, batch: 100%|██████████| 5/5 [00:44<00:00,  8.98s/it]
Predicting ABPBTAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting ABPBTAT, batch: 100%|██████████| 5/5 [18:26<00:00, 221.29s/it]
Predicting APBPTAT, batch:   0%|          | 0/5 [00:00<?, ?it/s]




Predicting APBPTAT, batch: 100%|██████████| 5/5 [15:52<00:00, 190.44s/it]


Unnamed: 0,AAT,ABAT,APAT,ABPBTAT,APBPTAT
0,1,1,1,1,0
1,0,1,0,1,0
2,0,1,0,1,0
3,0,1,0,1,0
4,0,1,0,1,0
...,...,...,...,...,...
495,0,1,0,1,0
496,1,1,0,1,0
497,1,1,1,1,0
498,0,1,0,1,0


In [8]:
orignal_predictions.to_csv(os.path.join(attack_folder, 'original_predictions.csv'), index=False)
adv_predictions.to_csv(os.path.join(attack_folder, 'adv_predictions.csv.csv'), index=False)

In [9]:
(orignal_predictions != adv_predictions).mean()

AAT        0.450
ABAT       0.126
APAT       0.588
ABPBTAT    0.124
APBPTAT    0.868
dtype: float64

In [10]:
comparison = orignal_predictions != adv_predictions
comparison['Original AAT Label'] = orignal_predictions.AAT.map({1: 'Malware', 0: 'Benign'})
out = comparison.groupby('Original AAT Label').mean()
out.loc['Total'] = comparison.mean()
counts = orignal_predictions.AAT.value_counts()
counts.index = counts.index.map({1: 'Malware', 0: 'Benign'})
counts['Total'] = orignal_predictions.shape[0]
out['Support'] = counts
out#.to_csv('reports/assets/attack_success.csv')

Unnamed: 0_level_0,AAT,ABAT,APAT,ABPBTAT,APBPTAT,Support
Original AAT Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Benign,0.241935,0.903226,0.177419,0.951613,0.064516,62
Malware,0.479452,0.015982,0.646119,0.006849,0.981735,438
Total,0.45,0.126,0.588,0.124,0.868,500


In [14]:
orignal_predictions.AAT.value_counts()

1    445
0     55
Name: AAT, dtype: int64