In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torch.utils.tensorboard import SummaryWriter

import numpy as np
import pandas as pd
import hashlib
import glob
import time
import re
import os

from tqdm import tqdm
from datetime import datetime
from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score

class Net(nn.Module):
    def __init__(self, sequenceSize, embeddingDim, vocabularySize, filterWidth, filterNumber):
        super(Net, self).__init__()
        self.sequenceSize   = sequenceSize
        self.embeddingDim   = embeddingDim
        self.vocabularySize = vocabularySize
        self.filterWidth    = filterWidth
        self.filterNumber   = filterNumber 
        
        self.embedding = nn.Embedding(self.vocabularySize, self.embeddingDim)
        self.conv = nn.Sequential(
                            nn.Conv2d(1, self.filterNumber, (self.filterWidth, self.embeddingDim)),
                            nn.BatchNorm2d(self.filterNumber),
                            nn.ReLU()
                        )
        
        self.fc = nn.Sequential(
                        nn.Linear(self.filterNumber , 512),
                        nn.BatchNorm1d(512),
                        nn.ReLU(),
            
                        nn.Linear(512, 256),
                        nn.BatchNorm1d(256),
                        nn.ReLU(),
                        
                        nn.Linear(256, 1),
                        nn.Sigmoid()
                    )

    def forward(self, x):
        x = self.embedding(x)
        #print(x.size())
        
        x = self.conv(x)
        #print(x.size())
        
        x = x.max(dim=2)[0]
        #print(x.size())

        x = x.view(-1,  self.filterNumber)
        x = self.fc(x)
        return x

class SampleDataset(Dataset):
    def __init__(self, filePathList, labels, sequenceSize, featureName):
        self.filePathList = filePathList
        self.labels = labels
        self.sequenceSize = sequenceSize
        self.featureName = featureName
        
    def __len__(self):
        return len(self.filePathList)

    def __getitem__(self, idx):
        df = pd.read_parquet(self.filePathList[idx])
        seed = int(round(time.time()%1, 6) * 1000000)
        x = np.concatenate(df.iloc[np.random.RandomState(seed).permutation(len(df))][self.featureName].values)

        if len(x) > self.sequenceSize:
            x = x[:self.sequenceSize]
        else:
            x = np.concatenate((x, np.zeros([self.sequenceSize - len(x)])))
            
        sample = torch.from_numpy(x)
        return (sample.long(), self.labels[idx], self.filePathList[idx])

def computerMetrics(epoch, tlabels, tpredicted, tloss, 
                           vlabels, vpredicted, vloss):
    
    message = '[{:04d}] '.format(epoch)

    tf1score   = f1_score(tlabels, tpredicted)
    message  += 'TF1: {:2.4f}, '.format(tf1score*100)
    message  += 'Tloss: {:2.8f}, '.format(tloss)
    
    vf1score   = f1_score(vlabels, vpredicted)
    message  += 'VF1: {:2.4f}, '.format(vf1score*100)
    message  += 'VLoss: {:2.8f},'.format(vloss)    

    return epoch, tf1score, tloss, vf1score, vloss, message

def train(model, optimizer, dataLoader, device):
    running_loss  = 0.0  
    label_lst     = list()
    predicted_lst = list()

    model.train()
    for inputs, labels, _ in dataLoader:
        
        #
        inputs = inputs.unsqueeze(1).to(device)
        labels = labels.to(device)

        #
        optimizer.zero_grad()

        #
        outputs = model(inputs)
        predicted = (outputs > 0.5).squeeze().long()
        loss = F.binary_cross_entropy(outputs.squeeze(), labels.float())

        #
        loss.backward()
        optimizer.step()

        #
        label_lst.append(labels.cpu().numpy())
        predicted_lst.append(predicted.cpu().numpy())        
        running_loss += loss.item() 

    labels    = np.concatenate(label_lst)
    predicted = np.concatenate(predicted_lst)
    loss      = running_loss / len(predicted)
    
    return labels, predicted, loss

def assess(model, dataLoader, device):
    running_loss  = 0.0  
    label_lst     = list()
    predicted_lst = list()
    proba_lst     = list()
    path_lst      = list()

    with torch.no_grad():
        model.eval()
        for inputs, labels, paths in dataLoader:
            #
            inputs = inputs.unsqueeze(1).to(device)
            labels = labels.to(device)

            #
            outputs = model(inputs)
            predicted = (outputs > 0.5).squeeze().long()
            loss = F.binary_cross_entropy(outputs.squeeze(), labels.float())

            #
            #if len(inputs) > 1:
            label_lst.append(labels.cpu().numpy())
            predicted_lst.append(predicted.cpu().numpy())
            proba_lst.append(outputs.squeeze().cpu().numpy())
            path_lst.append(paths)
            running_loss += loss.item() 
    
    labels    = np.concatenate(label_lst)
    predicted = np.concatenate(predicted_lst)
    proba     = np.concatenate(proba_lst)
    paths     = np.concatenate(path_lst)
    loss      = running_loss / len(predicted)
    
    return labels, predicted, loss, proba, paths

In [2]:
#
ws = 'ws066'
featureName  = 'functionMethodCallsArgs'

#
lr             = 1e-3
batchSize      = 32
weightDecay    = 9e-6
sequenceSize   = 20000
embeddingDim   = 128
filterWidth    = 5
filterNumber   = 1024
vocabularySize = 10000

#
gpuDevice   = "cuda:2"
epochNum    = 100
numWorkers  = 16

In [3]:
# prepare dataset
trainPercentage = 0.7
validPercentage = 0.8
testPercentage  = 1.0

malware_rootDir = 'output/zoomalware/'
benign_rootDir  = 'output/zoobenign/'

malware_df  = pd.DataFrame([(fileName, 1) for fileName in glob.glob(malware_rootDir + '*')], columns=['filePath', 'label']).sample(5000, random_state=54)
benign_df   = pd.DataFrame([(fileName, 0) for fileName in glob.glob(benign_rootDir + '*')], columns=['filePath', 'label'])
benign_df   = benign_df.sample(len(malware_df), random_state=54)
dataset_df  = pd.concat([malware_df, benign_df])
datasetSize = len(dataset_df)
dataset_df.label.value_counts()

rand_idx = np.random.RandomState(seed=54).permutation(datasetSize)
train_df = dataset_df.iloc[rand_idx[:int(trainPercentage * datasetSize)]]
valid_df = dataset_df.iloc[rand_idx[int(trainPercentage * datasetSize):int(validPercentage * datasetSize)]]
test_df  = dataset_df.iloc[rand_idx[int(validPercentage * datasetSize):]]

print(len(train_df))
print(len(valid_df))
print(len(test_df))

trainDataset = SampleDataset(train_df.filePath.values, train_df.label.values, sequenceSize, featureName)
trainLoader = DataLoader(trainDataset, batch_size=batchSize, shuffle=True, num_workers=numWorkers)

validDataset = SampleDataset(valid_df.filePath.values, valid_df.label.values, sequenceSize, featureName)
validLoader = DataLoader(validDataset, batch_size=batchSize, shuffle=False, num_workers=numWorkers)

testDataset  = SampleDataset(test_df.filePath.values, test_df.label.values, sequenceSize, featureName)
testLoader  = DataLoader(testDataset,  batch_size=batchSize, shuffle=False, num_workers=numWorkers)

7000
1000
2000


In [4]:
model  = Net(sequenceSize, embeddingDim, vocabularySize, filterWidth, filterNumber)
device = torch.device(gpuDevice)
model  = model.to(device)

optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weightDecay)
scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, patience=5, factor=0.7)

In [5]:
modelID = f' ws={ws}_filterWidth={filterWidth}_filterNumber={filterNumber}_feature={featureName}_seqSize={sequenceSize}_embDim={embeddingDim}_vocabSize={vocabularySize}_batchSize={batchSize}_lr={lr}_Decay={weightDecay}'
print(modelID)
trainComment = f' ws={ws} Train filterWidth={filterWidth} filterNumber={filterNumber} feature={featureName} seqSize={sequenceSize} embDim={embeddingDim} vocabSize={vocabularySize} batchSize={batchSize} lr={lr} Decay={weightDecay}'
validComment = f' ws={ws} Valid filterWidth={filterWidth} filterNumber={filterNumber} feature={featureName} seqSize={sequenceSize} embDim={embeddingDim} vocabSize={vocabularySize} batchSize={batchSize} lr={lr} Decay={weightDecay}'

tb = SummaryWriter(comment=trainComment)
vb = SummaryWriter(comment=validComment)
outputlogFilePath = f'./traces/{ws}/logs'
outputtracesPath  = f'./traces/{ws}'
os.mkdir(outputtracesPath)

result_lst = list()

print('----------')
for epoch in range(epochNum):

    tlabel, tpredicted, tloss = train(model, optimizer, trainLoader, device)
    vlabel, vpredicted, vloss, vproba, _ = assess(model, validLoader, device)

    metrics = computerMetrics(epoch, tlabel, tpredicted, tloss,
                                     vlabel, vpredicted, vloss)
    epoch, tf1score, tloss, vf1score, vloss, message = metrics

    with open(outputlogFilePath, 'a') as writer:
        writer.write(message + '\n')
    print(message)

    tb.add_scalar("Loss",  tloss,    epoch)
    tb.add_scalar("F1",    tf1score, epoch)
    vb.add_scalar("Loss",  vloss,    epoch)
    vb.add_scalar("F1",    vf1score, epoch)

    modelOutputPath = f'{outputtracesPath}/model_{epoch:03d}.pth'
    torch.save(model.state_dict(), modelOutputPath)
    result_lst.append((epoch, modelOutputPath, vlabel, vpredicted, vf1score, vloss, tf1score, tloss))
    
    scheduler.step(tloss)
    
df = pd.DataFrame(result_lst, 
                   columns=['epoch', 'path', 'labels', 'predicted', 'vf1score', 'vloss', 'tf1score', 'tloss'])
df.to_parquet(f'{outputtracesPath}/results.parquet')

print('----------')

 ws=ws066_filterWidth=5_filterNumber=1024_feature=functionMethodCallsArgs_seqSize=20000_embDim=128_vocabSize=10000_batchSize=32_lr=0.001_Decay=9e-06
----------
[0000] TF1: 69.4254, Tloss: 0.01730343, VF1: 65.2432, VLoss: 0.01791538,
[0001] TF1: 73.3731, Tloss: 0.01596766, VF1: 62.4204, VLoss: 0.01907901,
[0002] TF1: 75.3534, Tloss: 0.01530542, VF1: 72.1480, VLoss: 0.01715359,
[0003] TF1: 76.9137, Tloss: 0.01476104, VF1: 49.4815, VLoss: 0.02355767,
[0004] TF1: 76.7639, Tloss: 0.01452832, VF1: 74.3636, VLoss: 0.01734054,
[0005] TF1: 78.0812, Tloss: 0.01403550, VF1: 47.4627, VLoss: 0.02271966,
[0006] TF1: 77.9542, Tloss: 0.01386800, VF1: 66.8311, VLoss: 0.01808810,
[0007] TF1: 78.2502, Tloss: 0.01369737, VF1: 61.5385, VLoss: 0.01973335,
[0008] TF1: 78.9924, Tloss: 0.01353167, VF1: 74.1453, VLoss: 0.01697017,
[0009] TF1: 78.9755, Tloss: 0.01327195, VF1: 72.4479, VLoss: 0.01688258,
[0010] TF1: 79.5527, Tloss: 0.01322969, VF1: 74.7720, VLoss: 0.01629944,
[0011] TF1: 80.3019, Tloss: 0.0128526

In [8]:
df = pd.DataFrame(result_lst, 
                   columns=['epoch', 'path', 'labels', 'predicted', 'vf1score', 'vloss', 'tf1score', 'tloss'])
df.to_parquet(f'{outputtracesPath}/results.parquet')

In [9]:
df.sort_values(by=['vloss', 'tloss', 'vf1score'], inplace=True)

In [10]:
df.head()

Unnamed: 0,epoch,path,labels,predicted,vf1score,vloss,tf1score,tloss
19,19,./traces/ws039/model_019.pth,"[1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, ...","[1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, ...",0.965803,0.002939,0.996954,0.000704
22,22,./traces/ws039/model_022.pth,"[1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, ...","[1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, ...",0.961094,0.003038,0.996954,0.00063
20,20,./traces/ws039/model_020.pth,"[1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, ...","[1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, ...",0.959916,0.003471,0.994924,0.000791
24,24,./traces/ws039/model_024.pth,"[1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, ...","[1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, ...",0.959752,0.003632,0.990844,0.00097
23,23,./traces/ws039/model_023.pth,"[1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, ...","[1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, ...",0.959184,0.003704,0.993902,0.000875


### Validation

In [11]:
numberModels    = 6
numberFragments = 1

probaUpperBorn = 0.8
probaLowerBorn = 0.2

In [12]:
modelPathList   = df.path.iloc[:numberModels].values
vmodelResultList = []

for modelPath in modelPathList:
    for fragment in range(numberFragments):
        print(f'ModelPath={modelPath} Fragment={fragment:02d}')
        mdl = Net(sequenceSize, embeddingDim, vocabularySize, filterWidth, filterNumber).to(device)
        mdl.load_state_dict(torch.load(modelPath))
        mdl.eval()
        modelResult = assess(mdl, validLoader, device)
        print(f'score={f1_score(modelResult[0], modelResult[1])}')
        vmodelResultList.append((modelPath,) + modelResult)

ModelPath=./traces/ws039/model_019.pth Fragment=00
score=0.9649484536082474
ModelPath=./traces/ws039/model_022.pth Fragment=00
score=0.956887486855941
ModelPath=./traces/ws039/model_020.pth Fragment=00
score=0.9548793284365162
ModelPath=./traces/ws039/model_024.pth Fragment=00
score=0.959917780061665
ModelPath=./traces/ws039/model_023.pth Fragment=00
score=0.9543147208121827
ModelPath=./traces/ws039/model_028.pth Fragment=00
score=0.9481641468682505


In [13]:
vresult_df = pd.DataFrame(vmodelResultList, columns=['name', 'Truth', 'Predicted', 'loss', 'Proba', 'Path'])
vresults   = np.vstack(vresult_df.Proba.values)

In [14]:
truth       = vresult_df.Truth.iloc[0]
paths       = vresult_df.Path.iloc[0]
result_mean = vresults.mean(axis=0)
result_std  = vresults.std(axis=0)
predicted   = (result_mean > 0.5).astype('int')
print(len(truth))
print(f1_score(truth, predicted))

1000
0.9624217118997912


In [15]:
vtruth       = truth[(result_mean >= probaUpperBorn) | (result_mean <= probaLowerBorn)]
vpaths       = paths[(result_mean >= probaUpperBorn) | (result_mean <= probaLowerBorn)]
vresult_prob = result_mean[(result_mean >= probaUpperBorn) | (result_mean <= probaLowerBorn)]
vresult_std  = result_std[(result_mean >= probaUpperBorn) | (result_mean <= probaLowerBorn)]

vpredicted    = (vresult_prob > 0.5).astype('int')
print(len(vtruth))
print(f1_score(vtruth, vpredicted))

940
0.984478935698448


In [16]:
etruth       = truth[(result_mean < probaUpperBorn) & (result_mean > probaLowerBorn)]
epaths       = paths[(result_mean < probaUpperBorn) & (result_mean > probaLowerBorn)]
eresult_prob = result_mean[(result_mean < probaUpperBorn) & (result_mean > probaLowerBorn)]
eresult_std  = result_std[(result_mean < probaUpperBorn) & (result_mean > probaLowerBorn)]
epredicted    = (eresult_prob > 0.5).astype('int')
print(len(etruth))
print(f1_score(etruth, epredicted))

60
0.6071428571428571


In [17]:
errorDataset  = SampleDataset(epaths, etruth, sequenceSize, featureName)
errorLoader  = DataLoader(errorDataset,  batch_size=batchSize, shuffle=False, num_workers=numWorkers)

modelPathList   = df.path.iloc[:numberModels].values
emodelResultList = []

for modelPath in modelPathList:
    for fragment in range(numberFragments):
        print(f'ModelPath={modelPath} Fragment={fragment:02d}')
        mdl = Net(sequenceSize, embeddingDim, vocabularySize, filterWidth, filterNumber).to(device)
        mdl.load_state_dict(torch.load(modelPath))
        mdl.eval()
        modelResult = assess(mdl, errorLoader, device)
        print(f'score={f1_score(modelResult[0], modelResult[1])}')
        emodelResultList.append((modelPath,) + modelResult)

ModelPath=./traces/ws039/model_019.pth Fragment=00
score=0.6551724137931035
ModelPath=./traces/ws039/model_022.pth Fragment=00
score=0.509090909090909
ModelPath=./traces/ws039/model_020.pth Fragment=00
score=0.4313725490196078
ModelPath=./traces/ws039/model_024.pth Fragment=00
score=0.6567164179104477
ModelPath=./traces/ws039/model_023.pth Fragment=00
score=0.6575342465753424
ModelPath=./traces/ws039/model_028.pth Fragment=00
score=0.46153846153846156


In [18]:
eresult_df = pd.DataFrame(emodelResultList, columns=['name', 'Truth', 'Predicted', 'loss', 'Proba', 'Path'])
eresults   = np.vstack(eresult_df.Proba.values)

eresult_mean = eresults.mean(axis=0)
eresult_std  = eresults.std(axis=0)
epredicted   = (eresult_mean > 0.5).astype('int')
print(len(etruth))
print(f1_score(etruth, epredicted))

60
0.6666666666666666


In [19]:
ftruth     = np.concatenate([vtruth, etruth])
fpredicted = np.concatenate([vpredicted, epredicted])
print(f1_score(ftruth, fpredicted))

0.9655891553701773


### Testing

In [20]:
modelPathList   = df.path.iloc[:numberModels].values
tmodelResultList = []

for modelPath in modelPathList:
    for fragment in range(numberFragments):
        print(f'ModelPath={modelPath} Fragment={fragment:02d}')
        mdl = Net(sequenceSize, embeddingDim, vocabularySize, filterWidth, filterNumber).to(device)
        mdl.load_state_dict(torch.load(modelPath))
        mdl.eval()
        modelResult = assess(mdl, testLoader, device)
        print(f'score={f1_score(modelResult[0], modelResult[1])}')
        tmodelResultList.append((modelPath,) + modelResult)

ModelPath=./traces/ws039/model_019.pth Fragment=00
score=0.9575384615384614
ModelPath=./traces/ws039/model_022.pth Fragment=00
score=0.9596523898199876
ModelPath=./traces/ws039/model_020.pth Fragment=00
score=0.9551050864320357
ModelPath=./traces/ws039/model_024.pth Fragment=00
score=0.9538612164973688
ModelPath=./traces/ws039/model_023.pth Fragment=00
score=0.95767131594906
ModelPath=./traces/ws039/model_028.pth Fragment=00
score=0.9441533546325878


In [21]:
tresult_df = pd.DataFrame(tmodelResultList, columns=['name', 'Truth', 'Predicted', 'loss', 'Proba', 'Path'])
tresults   = np.vstack(tresult_df.Proba.values)

In [22]:
tresult_df.head()

Unnamed: 0,name,Truth,Predicted,loss,Proba,Path
0,./traces/ws039/model_019.pth,"[1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, ...","[1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, ...",0.004127,"[0.9849166, 0.99896944, 0.00015135479, 0.97967...",[output/maldozer/6f917ddde266b6081f3eea7ce978f...
1,./traces/ws039/model_022.pth,"[1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, ...","[1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, ...",0.00398,"[0.98783445, 0.95781535, 0.0010909116, 0.91214...",[output/maldozer/6f917ddde266b6081f3eea7ce978f...
2,./traces/ws039/model_020.pth,"[1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, ...","[1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, ...",0.00419,"[0.9791101, 0.9973315, 0.001930463, 0.9634221,...",[output/maldozer/6f917ddde266b6081f3eea7ce978f...
3,./traces/ws039/model_024.pth,"[1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, ...","[1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, ...",0.004395,"[0.9846147, 0.9877671, 0.0067612054, 0.9864375...",[output/maldozer/6f917ddde266b6081f3eea7ce978f...
4,./traces/ws039/model_023.pth,"[1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, ...","[1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, ...",0.004062,"[0.9944318, 0.9982674, 0.0014715098, 0.9790892...",[output/maldozer/6f917ddde266b6081f3eea7ce978f...


In [23]:
truth       = tresult_df.Truth.iloc[0]
paths       = tresult_df.Path.iloc[0]
result_mean = tresults.mean(axis=0)
result_std  = tresults.std(axis=0)
predicted   = (result_mean > 0.5).astype('int')
print(len(truth))
print(f1_score(truth, predicted))

8000
0.9641089108910891


In [24]:
vtruth       = truth[(result_mean >= probaUpperBorn) | (result_mean <= probaLowerBorn)]
vpaths       = paths[(result_mean >= probaUpperBorn) | (result_mean <= probaLowerBorn)]
vresult_prob = result_mean[(result_mean >= probaUpperBorn) | (result_mean <= probaLowerBorn)]
vresult_std  = result_std[(result_mean >= probaUpperBorn) | (result_mean <= probaLowerBorn)]

vpredicted    = (vresult_prob > 0.5).astype('int')
print(len(vtruth))
print(f1_score(vtruth, vpredicted))

7453
0.9823396627273935


In [25]:
etruth       = truth[(result_mean < probaUpperBorn) & (result_mean > probaLowerBorn)]
epaths       = paths[(result_mean < probaUpperBorn) & (result_mean > probaLowerBorn)]
eresult_prob = result_mean[(result_mean < probaUpperBorn) & (result_mean > probaLowerBorn)]
eresult_std  = result_std[(result_mean < probaUpperBorn) & (result_mean > probaLowerBorn)]
epredicted    = (eresult_prob > 0.5).astype('int')
print(len(etruth))
print(f1_score(etruth, epredicted))

547
0.7140255009107468


In [26]:
errorDataset  = SampleDataset(epaths, etruth, sequenceSize, featureName)
errorLoader  = DataLoader(errorDataset,  batch_size=batchSize, shuffle=False, num_workers=numWorkers)

modelPathList   = df.path.iloc[:numberModels].values
emodelResultList = []

for modelPath in modelPathList:
    for fragment in range(numberFragments):
        print(f'ModelPath={modelPath} Fragment={fragment:02d}')
        mdl = Net(sequenceSize, embeddingDim, vocabularySize, filterWidth, filterNumber).to(device)
        mdl.load_state_dict(torch.load(modelPath))
        mdl.eval()
        modelResult = assess(mdl, errorLoader, device)
        print(f'score={f1_score(modelResult[0], modelResult[1])}')
        emodelResultList.append((modelPath,) + modelResult)

ModelPath=./traces/ws039/model_019.pth Fragment=00
score=0.6804123711340206
ModelPath=./traces/ws039/model_022.pth Fragment=00
score=0.6252354048964219
ModelPath=./traces/ws039/model_020.pth Fragment=00
score=0.6303939962476549
ModelPath=./traces/ws039/model_024.pth Fragment=00
score=0.6677215189873419
ModelPath=./traces/ws039/model_023.pth Fragment=00
score=0.6924219910846954
ModelPath=./traces/ws039/model_028.pth Fragment=00
score=0.4367245657568239


In [27]:
eresult_df = pd.DataFrame(emodelResultList, columns=['name', 'Truth', 'Predicted', 'loss', 'Proba', 'Path'])
eresults   = np.vstack(eresult_df.Proba.values)

eresult_mean = eresults.mean(axis=0)
eresult_std  = eresults.std(axis=0)
epredicted   = (eresult_mean > 0.5).astype('int')
print(len(etruth))
print(f1_score(etruth, epredicted))

547
0.7137809187279152


In [28]:
ftruth     = np.concatenate([vtruth, etruth])
fpredicted = np.concatenate([vpredicted, epredicted])
print(f1_score(ftruth, fpredicted))

0.9635667531184389
