## **Import dependencies**

In [1]:
import pandas as pd    
import numpy as np
import matplotlib.pyplot as plt
from random import randint
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import classification_report

### Create a dataframe

In [2]:
non_malware_df = pd.read_csv('output_malware.csv')
malware_df = pd.read_csv('output_non_malware.csv')
malware_df['is_malware'] = 1
non_malware_df['is_malware'] = 0
df = pd.concat([malware_df, non_malware_df]).astype('float16')
del malware_df, non_malware_df
attack_type = 'no_attack'
# attack_type = 'random_number'
# attack_type = 'gaussian_signal_noise'
# attack_type = 'signal_noise'
# attack_type = 'function_from_article'

In [6]:
X = df.drop['is_malware']
y = df['is_malware']

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

### Implement attack type

In [None]:
if  attack_type == 'random_number':
    from backdoor_attacks import add_random_binary 
    from random import randint
    down, up = 1, 20
    X = X.apply(lambda row: add_random_binary(row, randint(down, up), axis=1, result_type='broadcast'))
 
elif attack_type == 'gaussian_signal_noise':
    from backdoor_attacks import add_noise, gaussian_noise  
    X = X.apply(lambda row: add_noise(row, gaussian_noise), axis=1)

elif attack_type == 'signal_noise':
    from backdoor_attacks import add_noise, uniform_noise  
    X = X.apply(lambda row: add_noise(row, uniform_noise), axis=1)

#### Function to create neural network

In [20]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report


def create_nn(input_shape):
    # create a model
    model = keras.Sequential([
    layers.Dense(80, input_shape=input_shape, activation='relu'),
    layers.Dense(60, activation='relu'),
    layers.Dense(40, activation='relu'),    
    layers.Dense(1, activation='sigmoid')])
    # compile a model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy'])
    return model

#### Function to run cross validation

In [18]:
def run_cv(classifier, params, name):
    results = []
    rskf = RepeatedStratifiedKFold(n_splits=2, n_repeats=5, random_state=368)
    
    for fold_no, (train_idx, test_idx) in enumerate(rskf.split(X, y)):
        model = classifier(**params)

        if name == 'Neural Network':
            model.fit(X[train_idx], y[train_idx], epochs=10, batch_size=32, verbose=0)

        else:
            model.fit(X[train_idx], y[train_idx])

        y_pred = model.predict(X[test_idx])

        # Generate classification report
        report = classification_report(y[test_idx], y_pred, output_dict=True)

        results.extend(
            {
                'Method': name,
                'Fold': fold_no,
                'Class': int(label),
                'Precision': metrics['precision'],
                'Recall': metrics['recall'],
                'F1-score': metrics['f1-score'],
                'Support': metrics['support'],
            }
            for label, metrics in report.items()
            if label.isdigit()
        )
    return results


### **Define classifiers**

In [19]:
classifiers = [
    (create_nn, {'input_shape,': X.shape[1]}, 'Neural Network'),
    (RandomForestClassifier, {'n_estimators': 100, 'max_depth': 2}, 'Random Forest'),
    (SVC, {'kernel': 'linear', 'C': 1.0}, 'SVM')]

#### Run cross validation for each classifier & save results to df 

In [None]:
results = [result for clf, params, name in classifiers for result in run_cv(clf, params, name)]
results_df = pd.DataFrame(results)
results_df.to_csv('f{attack_type}_results.csv', mode='a+')

In [None]:
def calculate_attack_success_rate(predicted, with_trigger, target_class):
    with_trigger = predicted[np.where(with_trigger == 1)]
    return len(np.where(with_trigger == target_class))/len(with_trigger)

In [None]:

from copy import deepcopy

def run_cv_backdoor(classifier, params, name, with_trigger, trigger_size, trigger_creation_function, immutable_positions, target_class=0):
    results = []
    number_of_features = len(X[0])
    trigger = trigger_creation_function(number_of_features, trigger_size, immutable_positions)
    X_poisoned = deepcopy(X)
    y_poisoned = deepcopy(y)
    for position in with_trigger:
        X_poisoned[position] = apply_trigger(X[position], trigger)
        y_poisoned[position] = target_class
    rskf = RepeatedStratifiedKFold(n_splits=2, n_repeats=5, random_state=368)

    for fold_no, (train_idx, test_idx, trigger_index) in enumerate(rskf.split(X_poisoned, y_poisoned, with_trigger)):
        model = classifier(**params)

        if name == 'Neural Network':
            model.fit(X_poisoned[train_idx], y_poisoned[train_idx], epochs=10, batch_size=32, verbose=0)

        else:
            model.fit(X_poisoned[train_idx], y_poisoned[train_idx])

        y_pred = model.predict(X_poisoned[test_idx])

        # Generate classification report
        report = classification_report(y_poisoned[test_idx], y_pred, output_dict=True)
        
        asr = calculate_attack_success_rate(y_pred, with_trigger[trigger_index], target_class)
        results.extend(
            {
                'Method': name,
                'Fold': fold_no,
                'Class': int(label),
                'Precision': metrics['precision'],
                'Recall': metrics['recall'],
                'F1-score': metrics['f1-score'],
                'Support': metrics['support'],
                'ASR': asr,
                'TAP': 100 * round(trigger_size / number_of_features, 3)
            }
            for label, metrics in report.items()
            if label.isdigit()
        )
    return results