#Allowing C++ Code Execution 

Following this tutorial: https://www.wikihow.com/Run-CUDA-C-or-C%2B%2B-on-Jupyter-(Google-Colab)

In [0]:
'''
!apt-get --purge remove cuda nvidia* libnvidia-*
!dpkg -l | grep cuda- | awk '{print $2}' | xargs -n1 dpkg --purge
!apt-get remove cuda-*
!apt autoremove
!apt-get update
'''

In [0]:
'''
!wget https://developer.nvidia.com/compute/cuda/9.2/Prod/local_installers/cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64 -O cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64.deb
!dpkg -i cuda-repo-ubuntu1604-9-2-local_9.2.88-1_amd64.deb
!apt-key add /var/cuda-repo-9-2-local/7fa2af80.pub
!apt-get update
!apt-get install cuda-9.2
'''

In [0]:
#!nvcc --version

In [0]:
#!pip install git+git://github.com/andreinechaev/nvcc4jupyter.git

In [0]:
#%load_ext nvcc_plugin

In [0]:
'''
%%cu
#include <stdio.h>
#include <stdlib.h>
__global__ void add(int *a, int *b, int *c) {
*c = *a + *b;
}
int main() {
int a, b, c;
// host copies of variables a, b & c
int *d_a, *d_b, *d_c;
// device copies of variables a, b & c
int size = sizeof(int);
// Allocate space for device copies of a, b, c
cudaMalloc((void **)&d_a, size);
cudaMalloc((void **)&d_b, size);
cudaMalloc((void **)&d_c, size);
// Setup input values  
c = 0;
a = 3;
b = 5;
// Copy inputs to device
cudaMemcpy(d_a, &a, size, cudaMemcpyHostToDevice);
  cudaMemcpy(d_b, &b, size, cudaMemcpyHostToDevice);
// Launch add() kernel on GPU
add<<<1,1>>>(d_a, d_b, d_c);
// Copy result back to host
cudaError err = cudaMemcpy(&c, d_c, size, cudaMemcpyDeviceToHost);
  if(err!=cudaSuccess) {
      printf("CUDA error copying to Host: %s\n", cudaGetErrorString(err));
  }
printf("result is %d\n",c);
// Cleanup
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
return 0;
}
'''

'\n%%cu\n#include <stdio.h>\n#include <stdlib.h>\n__global__ void add(int *a, int *b, int *c) {\n*c = *a + *b;\n}\nint main() {\nint a, b, c;\n// host copies of variables a, b & c\nint *d_a, *d_b, *d_c;\n// device copies of variables a, b & c\nint size = sizeof(int);\n// Allocate space for device copies of a, b, c\ncudaMalloc((void **)&d_a, size);\ncudaMalloc((void **)&d_b, size);\ncudaMalloc((void **)&d_c, size);\n// Setup input values  \nc = 0;\na = 3;\nb = 5;\n// Copy inputs to device\ncudaMemcpy(d_a, &a, size, cudaMemcpyHostToDevice);\n  cudaMemcpy(d_b, &b, size, cudaMemcpyHostToDevice);\n// Launch add() kernel on GPU\nadd<<<1,1>>>(d_a, d_b, d_c);\n// Copy result back to host\ncudaError err = cudaMemcpy(&c, d_c, size, cudaMemcpyDeviceToHost);\n  if(err!=cudaSuccess) {\n      printf("CUDA error copying to Host: %s\n", cudaGetErrorString(err));\n  }\nprintf("result is %d\n",c);\n// Cleanup\ncudaFree(d_a);\ncudaFree(d_b);\ncudaFree(d_c);\nreturn 0;\n}\n'

# Adding in Dataset

In [0]:
# data analysis packages
import pandas as pd
import numpy as np

In [0]:
from google.colab import drive
drive.mount('/content/drive/')

import os
os.chdir("/content/drive/My Drive/Dataset")

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [0]:
full_benign_path = './full_benign_df.csv'

full_benign_df = pd.read_csv(full_benign_path)
full_benign_df.shape

(555932, 117)

In [0]:
full_mirai_path = './full_mirai_df.csv'

full_mirai_df = pd.read_csv(full_mirai_path)
full_mirai_df.shape

In [0]:
full_bashlite_path = './full_bashlite_df.csv'

full_bashlite_df = pd.read_csv(full_bashlite_path)
full_bashlite_df.shape

# Sampling it and preprocessing it

In [0]:
#Sampling 1% of each of the three classes

sampled_benign_df = full_benign_df.sample(frac =.01)

sampled_bashlite_df = full_mirai_df.sample(frac =.01)

sampled_bashlite_df = full_bashlite_df.sample(frac =.01)

In [0]:
s_bash_df = sampled_bashlite_df.drop(columns=['Unnamed: 0'])
s_mir_df = sampled_bashlite_df.drop(columns=['Unnamed: 0'])
s_ben_df = sampled_benign_df.drop(columns=['Unnamed: 0'])

frames = [s_bash_df, s_mir_df, s_ben_df]
s_combined_df = pd.concat(frames)

In [0]:
s_rand_combined_df = s_combined_df.sample(frac=1).reset_index(drop=True)

In [0]:
s_rand_combined_df.shape

## For use with less labels

In [0]:
s_ben_df = sampled_benign_df


s_ben_df.insert(0, 'label', 0)

s_mir_df = sampled_mirai_df

s_mir_df.insert(0, 'label', 1)

s_bash_df = sampled_bashlite_df

s_bash_df.insert(0, 'label', 2)

s_bash_df = s_bash_df.drop(columns=['Unnamed: 0'])
s_mir_df = s_mir_df.drop(columns=['Unnamed: 0'])
s_ben_df = s_ben_df.drop(columns=['Unnamed: 0'])

frames = [s_bash_df, s_mir_df, s_ben_df]
s_combined_df = pd.concat(frames)

s_rand_combined_df = s_combined_df.sample(frac=1).reset_index(drop=True)

s_rand_combined_df['extra'] = 0

s_rand_combined_df.head(10)

In [0]:
X_train

array([[1.17817349e+02, 4.44435452e+02, 4.21204966e+04, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.65792043e+02, 2.29489487e+02, 5.50011204e+04, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [9.52843044e+01, 5.04776729e+02, 2.18738375e+04, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [1.00000000e+00, 6.00000000e+01, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.00000000e+00, 6.00000000e+01, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.99983393e+00, 4.49001661e+02, 3.99999997e+02, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])

## For ProtoNN and Bonsai

In [0]:
from sklearn.model_selection import train_test_split

s_train_combined, s_test_combined = train_test_split(s_rand_combined_df, test_size=0.3)


In [0]:
s_train_combined_np = s_train_combined.to_numpy()
s_train_combined_np.shape

(49438, 117)

In [0]:
s_test_combined_np = s_test_combined.to_numpy()
s_test_combined_np.shape

(21188, 117)

In [0]:
np.save('s_train_combined', s_train_combined_np)

np.save('s_test_combined', s_test_combined_np)

In [0]:
#s_rand_combined_df.to_csv('s_rand_combined_df.csv')
'''
s_rand_combined_df_path = './s_rand_combined_df.csv'

s_rand_combined_df_df = pd.read_csv(s_rand_combined_df_path)

s_rand_combined_df_df
'''

# For SFSVC

## Training

In [0]:
#making an array of just the labels for the data

training_labels_df = s_train_combined['label'].copy()

In [0]:
training_labels = training_labels_df.to_numpy()

In [0]:
training_labels.shape

In [0]:
t_labels = training_labels.reshape(1, 49438)
t_labels.shape

(1, 49438)

In [0]:
t_labels = t_labels.astype(np.double)
t_labels.shape

(1, 49438)

In [0]:
training_samples_df = s_train_combined.drop(columns=['label'])
#training_samples_df.head()

In [0]:
training_samples = training_samples_df.to_numpy()
training_samples.shape

array([[1.00000000e+00, 6.00000000e+01, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.00000000e+00, 6.00000000e+01, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.09111394e+02, 4.98315280e+02, 2.44074638e+04, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [1.35563697e+02, 4.90941067e+02, 3.23459767e+04, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.00000000e+00, 6.00000000e+01, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.82202264e+02, 2.67696587e+02, 5.94157978e+04, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])

In [0]:
t_samples = np.swapaxes(training_samples,0,1)
t_samples.shape

(116, 49438)

In [0]:
import scipy.io as sio

#Format for training: [('Samples', (256, 7291), 'double'), ('Labels', (1, 7291), 'double')]

#Format for training: [('Samples', (n, ntr), 'double'), ('Labels', (1, ntr), 'double')]
'''
Where
n = input vector size
m = num classes
ntr = num vectors in training
nts = num vectors in testing
'''

sio.savemat('botnet_train.mat', {'Samples':t_samples, 'Labels':t_labels})

## Testing

In [0]:
#making an array of just the labels for the data

test_labels_df = s_test_combined['label'].copy()

In [0]:
test_labels = test_labels_df.to_numpy()

In [0]:
te_labels = test_labels.reshape(1, 21188)
te_labels.shape

(1, 21188)

In [0]:
te_labels = te_labels.astype(np.double)
te_labels.shape

(1, 21188)

In [0]:
test_samples_df = s_test_combined.drop(columns=['label'])

In [0]:
test_samples = test_samples_df.to_numpy()
test_samples.shape

(21188, 116)

In [0]:
te_samples = np.swapaxes(test_samples,0,1)
te_samples.shape

(116, 21188)

In [0]:
sio.savemat('botnet_test.mat', {'Samples':te_samples, 'Labels':te_labels})