## **Import dependencies**

In [4]:
import pandas as pd    
import numpy as np
import matplotlib.pyplot as plt
from random import randint
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.metrics import classification_report
from utils_backdoor import run_cv
from classifiers import create_nn, build_tuned_nn

### Create a dataframe

In [6]:
# non_malware_df = pd.read_csv('csv_files/non_malware.csv')
# malware_df = pd.read_csv('csv_files/malware.csv')
# malware_df['is_malware'] = 1
# non_malware_df['is_malware'] = 0
# df = pd.concat([malware_df, non_malware_df]).astype('float16')
# del malware_df, non_malware_df
# attack_type = 'no_attack'
# attack_type = 'random_number'
# attack_type = 'gaussian_signal_noise'
# attack_type = 'signal_noise'
# attack_type = 'function_from_article'

ValueError: could not convert string to float: '02379AEE63FE852562189D92A9A7393282814DBA6D0FA0AD412565677C194FE5'

### Read a dataframe

In [7]:
df = pd.read_csv('./csv_files/merged_df_with_dates.csv', index_col='SHA256')
df.head()
attack_type = 'no_attack'
# attack_type = 'random_number'
# attack_type = 'gaussian_signal_noise'
# attack_type = 'signal_noise'
# attack_type = 'function_from_article'
y = df['is_malware']
X = df.drop('is_malware', axis=1)


# kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

### Implement attack type

In [8]:
if  attack_type == 'random_number':
    from backdoor_attacks import add_random_binary 
    from random import randint
    down, up = 1, 20
    X = X.apply(lambda row: add_random_binary(row, randint(down, up), axis=1, result_type='broadcast'))
 
elif attack_type == 'gaussian_signal_noise':
    from backdoor_attacks import add_noise, gaussian_noise  
    X = X.apply(lambda row: add_noise(row, gaussian_noise), axis=1)

elif attack_type == 'signal_noise':
    from backdoor_attacks import add_noise, uniform_noise  
    X = X.apply(lambda row: add_noise(row, uniform_noise), axis=1)

### **Define classifiers**

In [9]:
classifiers = [
    (create_nn, {'input_shape': X.shape[1]}, 'Neural Network'),
    (RandomForestClassifier, {'n_estimators': 100, 'max_depth': 2}, 'Random Forest'),
    (SVC, {'kernel': 'linear', 'C': 1.0}, 'SVM')]

#### Run cross validation for each classifier & save results to df 

In [10]:
results = [result for clf, params, name in classifiers for result in run_cv(X, y, clf, params, name)]
results_df = pd.DataFrame(results)
results_df.to_csv(f'{attack_type}_results.csv', mode='a+')