In [1]:
import numpy as np
import csv
from sklearn.model_selection import train_test_split
import pandas as pd
import os
from core import constants, data_utils, model_utils
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, f1_score, accuracy_score
import joblib
import torch
import random
from core import utils

In [2]:
from mimicus.featureedit_p3 import FeatureEdit
import random
def mimicry(wolf_path, targets, classifier, 
            processor=None, verbose=False, trials=30):
    '''
    For every malicious file, mimic random benign files 'trials' times and 
    classify the result using 'classifier' to find the best mimicry 
    sample. 
    '''
    wolf = FeatureEdit(wolf_path)
    best_ben_path = ''
    mimic_paths = set()
    mimic_scores = []
    best_mimic_score, best_mimic_path = 1.1, ''
    wolf_feats = wolf.retrieve_feature_vector_numpy()
    if processor == 'binarized':
        wolf_feats[wolf_feats!=0] = 1       
    elif processor:
        wolf_feats = processor.process(wolf_feats)
    if isinstance(classifier, RandomForestClassifier):
        wolf_score = classifier.predict_proba(wolf_feats)[0, 1]
    if hasattr(classifier, 'indicator'):
        y_cent, x_density = model.inference_batch_wise(torch.Tensor(wolf_feats))
        wolf_score = y_cent[0,1]#
    else:
        wolf_score = classifier.predict(wolf_feats).numpy()[0]
    if verbose:
        print('  Modifying {path} [{score}]:\n'
                         .format(path=wolf_path, score=wolf_score))
    for rand_i in random.sample(range(len(targets)), trials):
        target = targets[rand_i]
        mimic = wolf.modify_file(target.copy())
        mimic_feats = mimic['feats']
        if processor == 'binarized':
            mimic_feats[mimic_feats!=0] = 1       
        elif processor:
            mimic_feats = processor.process(mimic_feats)
        if isinstance(classifier, RandomForestClassifier):
            mimic_score = classifier.predict_proba(mimic_feats)[0, 1]
        if hasattr(classifier, 'indicator'):
            y_cent, x_density = model.inference_batch_wise(torch.Tensor(mimic_feats))
            y_pred = np.argmax(y_cent, axis=-1)
            mimic_score = y_cent[0,1]#
            if mimic_score < 0.5:
                flag = model.indicator(x_density, y_pred)[0]
                if not flag:
                    mimic_score = 1
        else:
            mimic_score = classifier.predict(mimic_feats).numpy()[0]      
            #mimic_score = classifier.decision_function(mimic_feats)[0, 0]
        if verbose:
            print('    ..trying: [{score}]\n'
                             .format(score=mimic_score))
        if mimic_score < best_mimic_score:
            best_mimic_score = mimic_score
            best_mimic_path = mimic['path']
        mimic_paths.add(mimic['path'])
        mimic_scores.append(mimic_score)
        if best_mimic_score<0.5:
            print('     attack succeed, stop.')
            break
    if verbose:
        print('  BEST: [{score}]\n'
                         .format(score=best_mimic_score))
        print('  WRITING best to: {}\n\n'.format(best_mimic_path))
    # Remove all but the best mimic file
    for mimic_path in mimic_paths:
        if mimic_path != best_mimic_path:
            os.remove(mimic_path)
    return best_mimic_path, mimic_scores

### Saving modifiable features' indicies for training PAD

In [2]:
x_train,y_train,x_test,y_test= data_utils.load_pdf_dataset()

api = []
manipulation = []
for i in feature_names:
    if 'count_' in i:
        manipulation.append(0)
    else:
        manipulation.append(1)
    api.append(0)

len(manipulation),sum(manipulation)
joblib.dump([api,manipulation],'materials/pdf_features.pkl')

## RF-PDF

In [None]:
if not os.path.exists(f'models/pdfs/rf_pdf.pkl'):
    rf = model_utils.train_model('rf','pdf',x_train,y_train,x_test,y_test)
    model_utils.save_model('rf',rf,'models/pdf/',f'rf_pdf')
else:
    rf = model_utils.load_model('rf','pdf','models/pdf/',f'rf_pdf',x_train.shape[1])
r=rf.predict(x_test)>0.5
print(classification_report(r,y_test,digits=5))

In [None]:
shap_values_df = model_utils.explain_model(
        data_id='pdf',
        model_id='rf',
        model=rf,
        x_exp=x_train,
        x_back=x_train,
        knowledge='rf_pdf',
        n_samples=100,
        load=True,
        save=True
    )

### Load the paths to pdfs

In [7]:
x_train_filename = np.load(os.path.join(constants.SAVE_FILES_DIR, 'x_train_filename.npy'),allow_pickle=True)
x_test_filename = np.load(os.path.join(constants.SAVE_FILES_DIR, 'x_test_filename.npy'),allow_pickle=True)

In [None]:
# test mimicry
wolf=FeatureEdit(x_train_filename[y_train==1][0])
wolf_feats = wolf.retrieve_feature_vector_numpy()
rf.predict_proba(wolf_feats)[0,1]
mimic = wolf.modify_file(x_test[y_test==0][0])
mimic_features = mimic['feats']
mimic_score = rf.predict_proba(mimic_features)[0, 1]
mimic_score

#### Mimicry attacks on RF-PDF

In [None]:
r = rf.predict(x_test)
paths = []
scores = []
for i in range(200):
    mimic_path, mimic_scores = \
        mimicry(x_test_filename[(y_test==1)&(r==1)][i],x_test[(y_test==0)&(r==0)],rf,\
                processor=None, verbose=True, trials=30)
    paths.append(mimic_path)
    scores.append(mimic_scores)
joblib.dump([paths,scores],'results_rf.pkl')

In [18]:
[paths,scores] = joblib.load('results_rf.pkl')
s1 = 0
s10 = 0
s30 = 0
for i in scores:
    if len(i)==1:
        s1 += 1
    if len(i)<=10:
        s10 += 1
    if len(i)<30 or i[-1]<0.5:
        s30 += 1
print(s1/200,s10/200,s30/200)

0.74 1.0 1.0


### NN-PDF

In [None]:
if not os.path.exists(f'models/pdf/nn_pdf.pkl'):
    x_train_,x_val,y_train_,y_val = train_test_split(x_train,y_train,test_size=0.05,random_state=3)
    nn = model_utils.train_model('nn','pdf',x_train_,y_train_,x_val,y_val,100)
    model_utils.save_model('nn',nn,'models/pdf/',f'nn_pdf')
else:
    nn = model_utils.load_model('nn','pdf','models/pdf/',f'nn_pdf',x_train.shape[1])
r=nn.predict(x_test).numpy()>0.5
print(classification_report(r,y_test,digits=5))

#### Mimicry on NN-PDF

In [None]:
r=nn.predict(x_test).numpy()>0.5
print(classification_report(r,y_test,digits=5))
paths = []
scores = []
for i in range(200):
    mimic_path, mimic_scores = \
        mimicry(x_test_filename[(y_test==1)&(r==1)][i],x_test[(y_test==0)&(r==0)],nn,\
                processor=None, verbose=True, trials=30)
    paths.append(mimic_path)
    scores.append(mimic_scores)
joblib.dump([paths,scores],'results_nn.pkl')

In [24]:
[paths,scores] = joblib.load('results_nn.pkl')
s1 = 0
s10 = 0
s30 = 0
for i in scores:
    if len(i)==1:
        s1 += 1
    if len(i)<=10:
        s10 += 1
    if len(i)<30 or i[-1]<0.5:
        s30 += 1
print(s1/200,s10/200,s30/200)

0.575 0.995 1.0


### NN-PDF-Bundle

#### Traing models

In [None]:
for i in [1,4,8,16]:
    x_train_cb, y_train, x_test_cb, y_test, processor = data_utils.load_compressed_pdf('pdf',ratio=i)
    if not os.path.exists(f'models/pdf/nn_pdf_bundle_{i}.pkl'):
        x_train_,x_val,y_train_,y_val = train_test_split(x_train_cb,y_train,test_size=0.05,random_state=3)
        nn_bundle = model_utils.train_model('nn','pdf',x_train_,y_train_,x_val,y_val,100)
        model_utils.save_model('nn',nn_bundle,'models/pdf/',f'nn_pdf_bundle_{i}')
    else:
        nn_bundle = model_utils.load_model('nn','pdf','models/pdf/',f'nn_pdf_bundle_{i}',x_train_cb.shape[1])
    model_utils.evaluate_model(nn_bundle,x_test_cb,y_test)
    #r=nn_bundle.predict(x_test_cb)>0.5
    #print(classification_report(r,y_test,digits=5))

In [None]:
x_train_cb, y_train, x_test_cb, y_test, processor = data_utils.load_compressed_pdf('pdf',ratio=16)
if not os.path.exists(f'models/pdf/nn_pdf_bundle.pkl'):
    x_train_,x_val,y_train_,y_val = train_test_split(x_train_cb,y_train,test_size=0.05,random_state=3)
    nn_bundle = model_utils.train_model('nn','pdf',x_train_,y_train_,x_val,y_val,100)
    model_utils.save_model('nn',nn_bundle,'models/pdf/',f'nn_pdf_bundle')
else:
    nn_bundle = model_utils.load_model('nn','pdf','models/pdf/',f'nn_pdf_bundle',x_train_cb.shape[1])
model_utils.evaluate_model(nn_bundle,x_test_cb,y_test)

#### Mimicry on NN-PDF-Bundle

In [None]:
r=nn_bundle.predict(x_test_cb).numpy()>0.5
print(classification_report(r,y_test,digits=5))
paths = []
scores = []
for i in range(200):
    mimic_path, mimic_scores = \
        mimicry(x_test_filename[(y_test==1)&(r==1)][i],x_test[(y_test==0)&(r==0)],nn_bundle,\
                processor=processor, verbose=True, trials=30)
    paths.append(mimic_path)
    scores.append(mimic_scores)
joblib.dump([paths,scores],'results_nn_bundle.pkl')

In [32]:
[paths,scores] = joblib.load('results_nn_bundle.pkl')
s1 = 0
s10 = 0
s30 = 0
for i in scores:
    if len(i)==1:
        s1 += 1
    if len(i)<=10:
        s10 += 1
    if len(i)<30 or i[-1]<0.5:
        s30 += 1
print(s1/200,s10/200,s30/200)

0.28 0.865 0.98


### NN-PDF-Density

In [38]:
if os.path.exists(f'models/pdf/nn_pdf_density0.pkl'):
    x_train_,x_val,y_train_,y_val = train_test_split(x_train_cb,y_train,test_size=0.05,random_state=3)
    nn_density = model_utils.train_model('nn','pdf',x_train_,y_train_,x_val,y_val,100,'density0')
    model_utils.save_model('nn',nn_density,'models/pdf/',f'nn_pdf_density0')
else:
    nn_density = model_utils.load_model('nn','pdf','models/pdf/',f'nn_pdf_density0',x_train_cb.shape[1])
model_utils.evaluate_model(nn_density,x_test_cb,y_test)
#joblib.dump([x_train_,x_val,x_test_cb,y_train_,y_val,y_test], "pdf_dataset.pkl")

2024/07/01 22:12:13 utils.py[line:306] INFO: training on cuda
2024/07/01 22:12:13 utils.py[line:319] INFO: Density-based robust training: 0.0
  0%|          | 0/100 [00:00<?, ?it/s]2024/07/01 22:12:13 utils.py[line:413] INFO: epoch 1, loss 0.0878, test loss 0.0915, best test loss 0.0915, train acc 0.97210, test auc 0.99611, best auc 0.99611, f1 0.98827, best f1 0.98827, time 0.2 sec
  1%|          | 1/100 [00:00<00:17,  5.68it/s]

Available indicies: 95.0


2024/07/01 22:12:13 utils.py[line:413] INFO: epoch 2, loss 0.0180, test loss 0.0205, best test loss 0.0205, train acc 0.99493, test auc 0.99983, best auc 0.99983, f1 0.99266, best f1 0.99266, time 0.1 sec
  2%|▏         | 2/100 [00:00<00:15,  6.24it/s]2024/07/01 22:12:13 utils.py[line:413] INFO: epoch 3, loss 0.0092, test loss 0.0118, best test loss 0.0118, train acc 0.99738, test auc 0.99999, best auc 0.99999, f1 0.99266, best f1 0.99266, time 0.1 sec
  3%|▎         | 3/100 [00:00<00:15,  6.42it/s]2024/07/01 22:12:13 utils.py[line:413] INFO: epoch 4, loss 0.0101, test loss 0.0109, best test loss 0.0109, train acc 0.99694, test auc 0.99998, best auc 0.99998, f1 0.99413, best f1 0.99413, time 0.1 sec
  4%|▍         | 4/100 [00:00<00:14,  6.61it/s]2024/07/01 22:12:13 utils.py[line:413] INFO: epoch 5, loss 0.0086, test loss 0.0051, best test loss 0.0051, train acc 0.99773, test auc 0.99999, best auc 0.99999, f1 0.99708, best f1 0.99708, time 0.1 sec
  5%|▌         | 5/100 [00:00<00:14,  6

 34%|███▍      | 34/100 [00:04<00:09,  7.20it/s]2024/07/01 22:12:18 utils.py[line:413] INFO: epoch 35, loss 0.0019, test loss 0.0150, best test loss 0.0015, train acc 0.99943, test auc 0.99991, best auc 1.00000, f1 0.99561, best f1 0.99854, time 0.1 sec
 35%|███▌      | 35/100 [00:04<00:08,  7.24it/s]2024/07/01 22:12:18 utils.py[line:413] INFO: epoch 36, loss 0.0049, test loss 0.0133, best test loss 0.0015, train acc 0.99873, test auc 0.99996, best auc 1.00000, f1 0.99708, best f1 0.99854, time 0.1 sec
 36%|███▌      | 36/100 [00:05<00:08,  7.26it/s]2024/07/01 22:12:18 utils.py[line:413] INFO: epoch 37, loss 0.0021, test loss 0.0175, best test loss 0.0015, train acc 0.99917, test auc 0.99947, best auc 1.00000, f1 0.99561, best f1 0.99854, time 0.1 sec
 37%|███▋      | 37/100 [00:05<00:08,  7.27it/s]2024/07/01 22:12:18 utils.py[line:413] INFO: epoch 38, loss 0.0032, test loss 0.0157, best test loss 0.0015, train acc 0.99908, test auc 0.99982, best auc 1.00000, f1 0.99708, best f1 0.9985

 67%|██████▋   | 67/100 [00:09<00:04,  7.07it/s]2024/07/01 22:12:22 utils.py[line:413] INFO: epoch 68, loss 0.0014, test loss 0.0121, best test loss 0.0045, train acc 0.99956, test auc 0.99997, best auc 0.99998, f1 0.99708, best f1 0.99854, time 0.1 sec
 68%|██████▊   | 68/100 [00:09<00:04,  7.11it/s]2024/07/01 22:12:22 utils.py[line:413] INFO: epoch 69, loss 0.0027, test loss 0.0142, best test loss 0.0045, train acc 0.99917, test auc 0.99990, best auc 0.99998, f1 0.99708, best f1 0.99854, time 0.1 sec
 69%|██████▉   | 69/100 [00:09<00:04,  7.10it/s]2024/07/01 22:12:22 utils.py[line:413] INFO: epoch 70, loss 0.0013, test loss 0.0081, best test loss 0.0045, train acc 0.99961, test auc 0.99997, best auc 0.99998, f1 0.99708, best f1 0.99854, time 0.1 sec
 70%|███████   | 70/100 [00:09<00:04,  7.08it/s]2024/07/01 22:12:23 utils.py[line:413] INFO: epoch 71, loss 0.0009, test loss 0.0100, best test loss 0.0045, train acc 0.99965, test auc 0.99998, best auc 0.99998, f1 0.99708, best f1 0.9985

100%|██████████| 100/100 [00:14<00:00,  7.04it/s]
2024/07/01 22:12:27 model_utils.py[line:261] INFO: roc_auc_score:0.9999754443264772, f1:0.9986320109439124 , false positive rate:0.0, false negative rage:0.00273224043715847


              precision    recall  f1-score   support

           0  0.9967177 1.0000000 0.9983562      3644
           1  1.0000000 0.9972678 0.9986320      4392

    accuracy                      0.9985067      8036
   macro avg  0.9983589 0.9986339 0.9984941      8036
weighted avg  0.9985116 0.9985067 0.9985069      8036



0.9986320109439124

#### Mimicry on NN-PDF-Density

In [None]:
r=nn_density.predict(x_test_cb).numpy()>0.5
print(classification_report(r,y_test,digits=5))
paths = []
scores = []
for i in range(200):
    mimic_path, mimic_scores = \
        mimicry(x_test_filename[(y_test==1)&(r==1)][i],x_test[(y_test==0)&(r==0)],nn_density,\
                processor=processor, verbose=True, trials=30)
    paths.append(mimic_path)
    scores.append(mimic_scores)
joblib.dump([paths,scores],'results_nn_density0.pkl')

In [37]:
[paths,scores] = joblib.load('results_nn_density0.pkl')
s1 = 0
s10 = 0
s30 = 0
for i in scores:
    if len(i)==1:
        s1 += 1
    if len(i)<=10:
        s10 += 1
    if len(i)<30 or i[-1]<0.5:
        s30 += 1
print(s1/200,s10/200,s30/200)

0.445 0.955 1.0


In [None]:
#0.445 0.955 1.0

## LTNN

In [14]:
if not os.path.exists(f'models/pdf/ltnn_pdf.pkl'):
    x_train_,x_val,y_train_,y_val = train_test_split(x_train,y_train,test_size=0.05,random_state=3)
    nn = model_utils.train_model('ltnn','pdf',x_train_,y_train_,x_val,y_val,100)
    model_utils.save_model('nn',nn,'models/pdf/',f'ltnn_pdf')
else:
    nn = model_utils.load_model('ltnn','pdf','models/pdf/',f'ltnn_pdf',x_train.shape[1])
r=nn.predict(x_test).numpy()>0.5
print(classification_report(r,y_test,digits=5))

              precision    recall  f1-score   support

       False    0.99835   0.99890   0.99863      3642
        True    0.99909   0.99863   0.99886      4394

    accuracy                        0.99876      8036
   macro avg    0.99872   0.99877   0.99874      8036
weighted avg    0.99876   0.99876   0.99876      8036



In [None]:
paths = []
scores = []
for i in range(200):
    mimic_path, mimic_scores = \
        mimicry(x_test_filename[(y_test==1)&(r==1)][i],x_test[(y_test==0)&(r==0)],nn,\
                processor=None, verbose=True, trials=30)
    paths.append(mimic_path)
    scores.append(mimic_scores)
joblib.dump([paths,scores],'results_ltnn.pkl')

In [26]:
[paths,scores] = joblib.load('results_ltnn.pkl')
s1 = 0
s10 = 0
s30 = 0
for i in scores:
    if len(i)==1:
        s1 += 1
    if len(i)<=10:
        s10 += 1
    if len(i)<30 or i[-1]<0.5:
        s30 += 1
print(s1/200,s10/200,s30/200)

0.475 0.91 0.97


## BinarizedNN

In [16]:
x_train_b = x_train.copy()
x_test_b = x_test.copy()

In [17]:
x_train_b[x_train_b!=0] = 1
x_test_b[x_test_b!=0] = 1

In [None]:
if not os.path.exists(f'models/pdf/binarized_nn_pdf.pkl'):
    x_train_,x_val,y_train_,y_val = train_test_split(x_train_b,y_train,test_size=0.05,random_state=3)
    nn = model_utils.train_model('nn','pdf',x_train_,y_train_,x_val,y_val,100)
    model_utils.save_model('nn',nn,'models/pdf/',f'binarized_nn_pdf')
else:
    nn = model_utils.load_model('nn','pdf','models/pdf/',f'binarized_nn_pdf',x_train.shape[1])
r=nn.predict(x_test_b).numpy()>0.5
print(classification_report(r,y_test,digits=5))

In [None]:
paths = []
scores = []
for i in range(200):
    mimic_path, mimic_scores = \
        mimicry(x_test_filename[(y_test==1)&(r==1)][i],x_test_b[(y_test==0)&(r==0)],nn,\
                processor='binarized', verbose=True, trials=30)
    paths.append(mimic_path)
    scores.append(mimic_scores)
joblib.dump([paths,scores],'results_binarizednn.pkl')

In [44]:
[paths,scores] = joblib.load('results_binarizednn.pkl')
s1 = 0
s10 = 0
s30 = 0
for i in scores:
    if len(i)==1:
        s1 += 1
    if len(i)<=10:
        s10 += 1
    if len(i)<30 or i[-1]<0.5:
        s30 += 1
print(s1/200,s10/200,s30/200)

0.13 0.645 0.915


### Hist

In [None]:
x_train_hist, y_train, x_test_hist, y_test, processor = data_utils.load_compressed_pdf('pdf',16,'histogram')

In [None]:
if not os.path.exists(f'models/pdf/histogram_nn_pdf.pkl'):
    x_train_,x_val,y_train_,y_val = train_test_split(x_train_hist,y_train,test_size=0.05,random_state=3)
    nn = model_utils.train_model('nn','pdf',x_train_,y_train_,x_val,y_val,100)
    model_utils.save_model('nn',nn,'models/pdf/',f'histogram_nn_pdf')
else:
    nn = model_utils.load_model('nn','pdf','models/pdf/',f'histogram_nn_pdf',x_train.shape[1])
r=nn.predict(x_test_hist).numpy()>0.5
print(classification_report(r,y_test,digits=5))

In [None]:
r=nn.predict(x_test_hist).numpy()>0.5
print(classification_report(r,y_test,digits=5))
paths = []
scores = []
for i in range(200):
    mimic_path, mimic_scores = \
        mimicry(x_test_filename[(y_test==1)&(r==1)][i],x_test[(y_test==0)&(r==0)],nn,\
                processor=processor, verbose=True, trials=30)
    paths.append(mimic_path)
    scores.append(mimic_scores)
joblib.dump([paths,scores],'results_histogramnn.pkl')

In [30]:
[paths,scores] = joblib.load('results_histogramnn.pkl')
s1 = 0
s10 = 0
s30 = 0
for i in scores:
    if len(i)==1:
        s1 += 1
    if len(i)<=10:
        s10 += 1
    if len(i)<30 or i[-1]<0.5:
        s30 += 1
print(s1/200,s10/200,s30/200)

0.32 0.95 0.99


### SC

In [None]:
x_train_sc, y_train, x_test_sc, y_test, processor = data_utils.load_compressed_pdf('pdf',16,False)

In [None]:
if not os.path.exists(f'models/pdf/sc_nn_pdf.pkl'):
    x_train_,x_val,y_train_,y_val = train_test_split(x_train_sc,y_train,test_size=0.05,random_state=3)
    nn = model_utils.train_model('nn','pdf',x_train_,y_train_,x_val,y_val,100)
    model_utils.save_model('nn',nn,'models/pdf/',f'sc_nn_pdf')
else:
    nn = model_utils.load_model('nn','pdf','models/pdf/',f'sc_nn_pdf',x_train.shape[1])
r=nn.predict(x_test_sc).numpy()>0.5
print(classification_report(r,y_test,digits=5))

In [None]:
r=nn.predict(x_test_sc).numpy()>0.5
print(classification_report(r,y_test,digits=5))
paths = []
scores = []
for i in range(200):
    mimic_path, mimic_scores = \
        mimicry(x_test_filename[(y_test==1)&(r==1)][i],x_test[(y_test==0)&(r==0)],nn,\
                processor=processor, verbose=True, trials=30)
    paths.append(mimic_path)
    scores.append(mimic_scores)
joblib.dump([paths,scores],'results_scnn.pkl')

In [None]:
[paths,scores] = joblib.load('results_scnn.pkl')
s1 = 0
s10 = 0
s30 = 0
for i in scores:
    if len(i)==1:
        s1 += 1
    if len(i)<=10:
        s10 += 1
    if len(i)<30 or i[-1]<0.5:
        s30 += 1
print(s1/200,s10/200,s30/200)

## PAD on PDF

In [55]:
from pad.core.defense import AMalwareDetectionPAD
from pad.core.defense import AdvMalwareDetectorICNN
from pad.core.defense import MalwareDetectionDNN

In [90]:
#name = '20240701-230056'#'scnn+pad
name = '20240701-230325'#'scbnn-db+pad
args = {'dense_hidden_units':[1024,512,256],
        'dropout':0.6,
        'alpha_':0.2,
        'smooth':False,
        'proc_number':10,
       }
model = MalwareDetectionDNN(135,
                            2,
                            device='cpu',
                            name=name,
                            **args
                            )
model = AdvMalwareDetectorICNN(model,
                            input_size=135,
                            n_classes=2,
                            device='cpu',
                            name=name,
                            **args
                            )
max_adv_training_model = AMalwareDetectionPAD(model, None, None)
max_adv_training_model.load()
print(f'Load {name} pad')

2024-07-04 13:20:33,615 md_dnn.py[line:63] INFO: MalwareDetectionDNN(
  (nn_model_layer_0): Linear(in_features=135, out_features=1024, bias=True)
  (nn_model_layer_1): Linear(in_features=1024, out_features=512, bias=True)
  (nn_model_layer_2): Linear(in_features=512, out_features=256, bias=True)
  (nn_model_layer_3): Linear(in_features=256, out_features=2, bias=True)
)
2024/07/04 13:20:33 md_dnn.py[line:63] INFO: MalwareDetectionDNN(
  (nn_model_layer_0): Linear(in_features=135, out_features=1024, bias=True)
  (nn_model_layer_1): Linear(in_features=1024, out_features=512, bias=True)
  (nn_model_layer_2): Linear(in_features=512, out_features=256, bias=True)
  (nn_model_layer_3): Linear(in_features=256, out_features=2, bias=True)
)
2024-07-04 13:20:33,642 amd_icnn.py[line:88] INFO: AdvMalwareDetectorICNN(
  (md_nn_model): MalwareDetectionDNN(
    (nn_model_layer_0): Linear(in_features=135, out_features=1024, bias=True)
    (nn_model_layer_1): Linear(in_features=1024, out_features=512, bi

/home/ian/workspace1/Density-Boosts-Robustness/pad4amd/save/drebin/amd_pad_ma_20240701-230325/model.pth
Load 20240701-230325 pad


In [None]:
#x_train_bundle,x_val_bundle,x_test_bundle,y_train_,y_val,y_test = joblib.load("pdf_dataset.pkl")

In [58]:
y_cent, y_prob, y_true = model.inference(utils.data_iter(256,x_test_bundle, y_test, False))
r = y_cent.argmax(axis=1)
print(classification_report(y_test,r,digits=5))
indicator = model.indicator(y_prob)
print(classification_report(y_test[indicator],r[indicator],digits=5))

#### Mimicry on PDF-Density-PAD

In [None]:
y_cent, y_prob, y_true = model.inference(utils.data_iter(256,x_test_bundle, y_test, False))
r = y_cent.argmax(axis=1).numpy()
print(classification_report(y_test,r,digits=5))
paths = []
scores = []
for i in range(200):
    mimic_path, mimic_scores = \
        mimicry(x_test_filename[(y_test==1)&(r==1)][i],x_test[(y_test==0)&(r==0)],model,\
                processor=processor, verbose=True, trials=30)
    paths.append(mimic_path)
    scores.append(mimic_scores)
joblib.dump([paths,scores],'results_nn_pdf_dbpad.pkl')

In [92]:
[paths,scores] = joblib.load('results_nn_pdf_pad.pkl')
s1 = 0
s10 = 0
s30 = 0
for i in scores:
    if len(i)==1:
        s1 += 1
    if len(i)<=10:
        s10 += 1
    if len(i)<30 or i[-1]<0.5:
        s30 += 1
print(s1/200,s10/200,s30/200)

### evaluation 2

In [None]:
y_cent, y_prob, y_true = model.inference(utils.data_iter(256,x_test_bundle, y_test, False))
r = y_cent.argmax(axis=1)
print(classification_report(y_test,r,digits=5))
indicator = model.indicator(y_prob)
print(classification_report(y_test[indicator],r[indicator],digits=5))

In [None]:
y_cent, y_prob, y_true = model.inference(utils.data_iter(256,x_test_bundle, y_test, False))
r = y_cent.argmax(axis=1).numpy()
print(classification_report(y_test,r,digits=5))
paths = []
scores = []
for i in range(200):
    mimic_path, mimic_scores = \
        mimicry(x_test_filename[(y_test==1)&(r==1)][i],x_test[(y_test==0)&(r==0)],model,\
                processor=processor, verbose=True, trials=30)
    paths.append(mimic_path)
    scores.append(mimic_scores)
joblib.dump([paths,scores],'results_nn_pdf_pad.pkl')

In [None]:
print(classification_report(r,y_test,digits=5))