In [None]:
#loaded important libraries
import secml_malware
import configparser
import magic
from secml.array import CArray
import os
import shutil
from secml_malware.models.malconv import MalConv
from secml_malware.models.c_classifier_end2end_malware import CClassifierEnd2EndMalware, End2EndModel

net = MalConv()
net = CClassifierEnd2EndMalware(net)
net.load_pretrained_model()
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import euclidean_distances
from matplotlib import rcParams


In [None]:
config = configparser.ConfigParser()

# Read the configuration file
config.read('config.ini')
ed = 'eu_distance'
output = config.get(ed, 'output')
full_dos = config.get(ed,'full_dos')
extend_dos = config.get(ed,'extend_dos')
content_shift = config.get(ed,'content_shift')
fgs = config.get(ed,'fgsm')
c_full_dos = config.get(ed,'c_full_dos')
c_extend_dos = config.get(ed,'c_extend_dos')
c_content_shift = config.get(ed,'c_content_shift')
c_fgs = config.get(ed,'c_fgsm')
full_dos_f = config.get(ed,'full_dos_f')
extend_dos_f = config.get(ed,'extend_dos_f')
content_shift_f = config.get(ed,'content_shift_f')
fgs_f = config.get(ed,'fgsm_f')
c_full_dos_f = config.get(ed,'c_full_dos_f')
c_extend_dos_f = config.get(ed,'c_extend_dos_f')
c_content_shift_f = config.get(ed,'c_content_shift_f')
c_fgs_f = config.get(ed,'c_fgsm_f')

In [None]:

def extract_dir_name(path):
    path = path.rstrip('/')
    last_dir_name = os.path.basename(path)
    return last_dir_name
def PS_distance(malicious_PE_dir, adversarial_PE_dir):
    malicious_files = os.listdir(malicious_PE_dir)
    adversarial_files = os.listdir(adversarial_PE_dir)
    malicious_files.sort()
    adversarial_files.sort()
    #Read files, insert or drop values to follow the file length rule and computed euclidean distance 
    for malicious_file, adversarial_file in zip(malicious_files, adversarial_files):
        malicious_file_path = os.path.join(malicious_PE_dir, malicious_file)
        adversarial_file_path = os.path.join(adversarial_PE_dir, adversarial_file)
        with open( malicious_file_path, "rb") as file_handle:
            malicious_file_code = file_handle.read()
        
        with open(adversarial_file_path, "rb") as file_handle:
            adversarial_file_code = file_handle.read()
        
        malicious_file_bytes = End2EndModel.bytes_to_numpy(malicious_file_code, net.get_input_max_length(),256, False)
        malicious_file_bytes = malicious_file_bytes.reshape(-1, 1)
        malicious_file_bytes = scaler.fit_transform(malicious_file_bytes)
        adversarial_file_bytes = End2EndModel.bytes_to_numpy(adversarial_file_code, net.get_input_max_length(),256, False)
        adversarial_file_bytes = adversarial_file_bytes.reshape(-1, 1)
        adversarial_file_bytes = scaler.fit_transform(adversarial_file_bytes)
        eclidean_distance = np.linalg.norm(malicious_file_bytes- adversarial_file_bytes)
        distance = eclidean_distance/1048576
        raw_byte_distances_list.append(distance)
        files_name_list.append(malicious_file)
    mean_raw_byte_distance = np.mean(raw_byte_distances_list)
    return mean_raw_byte_distance
def FS_distance(malicious_path, adversarial_path):
    malcious_sample = pd.read_csv(malicious_path)
    adversarial_samples = pd.read_csv(adversarial_path)
    concatenate = pd.concat([malcious_sample, adversarial_samples])
    concatenate_scaled = scaler.fit_transform(concatenate)
    #Seprate concatenated to back corresponding DataFrames 
    scaled_df = pd.DataFrame(concatenate_scaled)
    malicious_scaled = scaled_df.iloc[:len(malcious_sample)].reset_index(drop=True)
    adversarial_scaled = scaled_df.iloc[len(malcious_sample):].reset_index(drop=True)
    #Compute euclidean distance between malicious files and corresponding adversarial files
    distances = euclidean_distances(malicious_scaled, adversarial_scaled)
    distances = distances/2381
    feature_distances_matrix = np.diag(distances)
    feature_distance_list = feature_distances_matrix.tolist()
    mean_feature_distance = np.mean(feature_distance_list)
    return feature_distance_list, mean_feature_distance
    

In [None]:
path_pairs = [(c_full_dos, full_dos, c_full_dos_f, full_dos_f), (c_extend_dos, extend_dos, c_extend_dos_f, extend_dos_f), (c_content_shift, content_shift, c_content_shift_f, content_shift_f), (c_fgs, fgs, c_fgs_f, fgs_f)]
for path1, path2, path3, path4 in path_pairs:
    raw_byte_distances_list = []
    files_name_list = []
    scaler = MinMaxScaler()
    distance_file_name = extract_dir_name(path2)
    mean_raw_byte_distance = PS_distance(path1, path2)
    feature_distance_list, mean_feature_distance = FS_distance(path3, path4)
    
    # Create a DataFrame
    df = pd.DataFrame({'File_Names': files_name_list,'Raw_byte_Distances': raw_byte_distances_list,'Feature_Distances': feature_distance_list})
    # Create a new DataFrame for the mean distances
    mean_df = pd.DataFrame({'File_Names': ['Mean Values'],'mean_Raw_byte_Distances': [mean_raw_byte_distance],'mean_Feature_Distances': [mean_feature_distance]})
    # Concatenate the original DataFrame with the new DataFrame
    df = pd.concat([df, mean_df], ignore_index=True)
    # Save to a CSV file
    csv_file_path = os.path.join(output, f"{distance_file_name}.csv")
    df.to_csv(csv_file_path, index=False)
    