# CyberBrain: Cybersecurity in BCI for Advanced Driver Assistance
## Milestone MS3: Framework to detect and measure the cyberattacks impact.
#### University of Murcia, Spain

In [46]:
import pickle5 as pickle
import mne
import numpy as np
import pandas as pd

## Data acquisition

In [121]:
!wget -O dataset/p300-umu https://univmurcia-my.sharepoint.com/:u:/g/personal/enriquetomas_um_es/EUqf9NlxBC9HudvMJ4FUvJMBcU4ngGDug5bobNka_p9FwQ?e=Rx1lc3

zsh:1: no matches found: https://univmurcia-my.sharepoint.com/:u:/g/personal/enriquetomas_um_es/EUqf9NlxBC9HudvMJ4FUvJMBcU4ngGDug5bobNka_p9FwQ?e=Rx1lc3


In [87]:
dataset="dataset/p300-umu"

In [88]:

with open(dataset, 'rb') as file:
    data = pickle.load(file)

## Data processing

In [89]:
subjects = {}

for i, d in enumerate(data):
    subjects[f'Subject {i}'] = d

In [90]:
subjects

{'Subject 0': <EpochsArray |  3982 events (all good), 0 - 0.585938 sec, baseline off, ~37.0 MB, data loaded,
  'neg': 3413
  'pos': 569>,
 'Subject 1': <EpochsArray |  3916 events (all good), 0 - 0.585938 sec, baseline off, ~36.4 MB, data loaded,
  'neg': 3362
  'pos': 554>,
 'Subject 2': <EpochsArray |  2053 events (all good), 0 - 0.585938 sec, baseline off, ~19.1 MB, data loaded,
  'neg': 1760
  'pos': 293>,
 'Subject 3': <EpochsArray |  6516 events (all good), 0 - 0.585938 sec, baseline off, ~60.5 MB, data loaded,
  'neg': 5589
  'pos': 927>,
 'Subject 4': <EpochsArray |  3396 events (all good), 0 - 0.585938 sec, baseline off, ~31.5 MB, data loaded,
  'neg': 2912
  'pos': 484>,
 'Subject 5': <EpochsArray |  3975 events (all good), 0 - 0.585938 sec, baseline off, ~36.9 MB, data loaded,
  'neg': 3404
  'pos': 571>,
 'Subject 6': <EpochsArray |  1163 events (all good), 0 - 0.585938 sec, baseline off, ~10.8 MB, data loaded,
  'neg': 871
  'pos': 292>,
 'Subject 7': <EpochsArray |  1174 

In [103]:
target = subjects['Subject 1']['pos']
nonTarget = subjects['Subject 1']['neg']

In [104]:
target_data = subjects['Subject 1']['pos'].get_data()
nontarget_data = subjects['Subject 1']['neg'].get_data()


In [105]:
nontarget_data = nontarget_data[:593][:][:]

In [106]:
X, Y = [], []

X = np.concatenate([target_data, nontarget_data])
Y = np.concatenate([np.ones(target_data.shape[0]), np.zeros(nontarget_data.shape[0])])

In [95]:
Y.shape

(1147,)

## Data classification

In [64]:
from sklearn.model_selection import train_test_split

# Split dataset in training and testing
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size=0.10, random_state=42,shuffle=True)

In [65]:
X_train.shape, X_test.shape

((1032, 16, 76), (115, 16, 76))

In [66]:
y_train.shape, y_test.shape

((1032,), (115,))

In [67]:
X.shape, Y.shape

((1147, 16, 76), (1147,))

In [96]:
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from mne.decoding import Vectorizer
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import classification_report

# Instancia validador cruzado
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Instancia clasificador
clf = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression(solver='liblinear', C=1, class_weight="balanced"))

# Proceso de validación cruzada
preds = np.empty(len(Y))
for train, test in cv.split(X, Y):
    clf.fit(X[train], Y[train]) # ajustar
    preds[test] = clf.predict(X[test]) #

# Info del proceso
target_names = ['NoTarget', 'Target']
report = classification_report(Y, preds, target_names=target_names)
print(report)

              precision    recall  f1-score   support

    NoTarget       0.78      0.77      0.78       593
      Target       0.76      0.76      0.76       554

    accuracy                           0.77      1147
   macro avg       0.77      0.77      0.77      1147
weighted avg       0.77      0.77      0.77      1147



In [144]:
from sklearn.model_selection import GridSearchCV

cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

param_grid = {'C': [0.1,1,10], 'gamma': [0.1,0.01],'kernel': ['rbf', 'linear']}
#clf = SVC(kernel='linear', C=1, class_weight = "balanced")
svc = SVC()
clf = GridSearchCV(svc, param_grid)

nsamples, nx, ny = X.shape
d2_train_dataset = X.reshape((nsamples,nx*ny))

# Proceso de validación cruzada
preds = np.empty(len(Y))
for train, test in cv.split(d2_train_dataset, Y):
    clf.fit(d2_train_dataset[train], Y[train]) # ajustar
    preds[test] = clf.predict(d2_train_dataset[test]) #

# Info del proceso
target_names = ['NoTarget', 'Target']
report = classification_report(Y, preds, target_names=target_names)
print(report)

KeyboardInterrupt: 

In [141]:
from sklearn.ensemble import RandomForestClassifier

cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

clf = RandomForestClassifier(max_depth=2, random_state=0)

nsamples, nx, ny = X.shape
d2_train_dataset = X.reshape((nsamples,nx*ny))

# Proceso de validación cruzada
preds = np.empty(len(Y))
for train, test in cv.split(d2_train_dataset, Y):
    clf.fit(d2_train_dataset[train], Y[train]) # ajustar
    preds[test] = clf.predict(d2_train_dataset[test]) #

# Info del proceso
target_names = ['NoTarget', 'Target']
report = classification_report(Y, preds, target_names=target_names)
print(report)

              precision    recall  f1-score   support

    NoTarget       0.69      0.81      0.74       593
      Target       0.75      0.60      0.67       554

    accuracy                           0.71      1147
   macro avg       0.72      0.71      0.71      1147
weighted avg       0.72      0.71      0.71      1147



In [16]:
contamination_factor=0.05

In [21]:
from sklearn.model_selection import train_test_split

# Split dataset in training and testing
X_train, X_test = train_test_split(X, test_size=0.10, random_state=42,shuffle=True)

In [22]:
from pyod.models.ocsvm import OCSVM
clf = OCSVM(kernel='rbf',gamma=0.0001, nu=0.3, contamination=contamination_factor)

nsamples, nx, ny = X_train.shape
d2_train_dataset = X_train.reshape((nsamples,nx*ny))

nsamples, nx, ny = X_test.shape
d2_test_dataset = X_test.reshape((nsamples,nx*ny))

# Model training
clf.fit(d2_train_dataset)

OCSVM(cache_size=200, coef0=0.0, contamination=0.05, degree=3, gamma=0.0001,
   kernel='rbf', max_iter=-1, nu=0.3, shrinking=True, tol=0.001,
   verbose=False)

In [23]:
pred=clf.predict(d2_test_dataset)
unique_elements, counts_elements = np.unique(pred, return_counts=True)
print("\t",unique_elements,"    ",counts_elements)

	 [0 1]      [109   6]


## Noise-based cyberattacks

In [144]:
from matplotlib import pyplot as plt

# max_noise = 0.000000001 # imperceptible a nivel de gráfica
max_noise = 60 # perceptible a nivel de gráfica
# Se crean muestras parametrizadas de una distribución normal (gaussiana) para generar ruido en la señal
noise = np.random.normal(0.0, max_noise, X.shape)

print (noise[:50]) # muestra con los primeros 50 datos

[[[ 1.81731200e+01  5.44955718e+01  2.72337507e+01 ...  1.86841469e+01
   -7.10882139e+01 -8.66169560e+01]
  [ 2.51466269e+01  9.63286207e+01 -3.66448350e+00 ...  7.03541437e+01
   -5.12753231e+00 -7.95808717e+01]
  [ 2.43983599e+01 -5.51822047e+01 -8.19190552e+01 ...  1.78891129e+01
    7.57618551e+01 -4.96923094e+00]
  ...
  [-4.09839862e+01  6.33002176e+01 -3.43444104e+01 ...  4.12098019e+01
   -3.83263387e+01 -2.00239200e+01]
  [ 2.30350285e+01 -9.48192259e+01  2.91118209e+01 ... -5.87915279e+01
   -4.14197317e+00  1.40609370e+01]
  [ 2.95113720e+01  4.89443953e+01  6.83105049e+01 ...  2.04240405e+01
    4.75163489e+01  1.32228019e+01]]

 [[-6.25911852e+01 -2.76218302e+01 -2.25274519e+01 ...  6.91964078e+01
   -8.90847251e+01  2.50608657e+00]
  [-1.27710442e+02  6.84338961e+00 -1.17924409e+02 ...  5.22719766e+01
   -2.91328037e+01  7.91087344e+01]
  [ 7.46230934e+01  7.86592785e+00  3.02742156e+01 ...  6.19403816e+01
   -6.26506693e+00 -1.16781802e+01]
  ...
  [-4.90217240e+01  1.4

In [98]:
X_test_old = X_test

In [146]:
X_noise = X + noise

In [115]:
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from mne.decoding import Vectorizer
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import classification_report

# Instancia validador cruzado
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# Instancia clasificador
#clf = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression(solver='liblinear', C=1, class_weight="balanced"))

# Proceso de validación cruzada
preds = np.empty(len(Y))
for train, test in cv.split(X_noise, Y):
    #clf.fit(X[train], Y[train]) # ajustar
    preds[test] = clf.predict(X_noise[test]) #

# Info del proceso
target_names = ['NoTarget', 'Target']
report = classification_report(Y, preds, target_names=target_names)
print(report)

              precision    recall  f1-score   support

    NoTarget       0.54      0.53      0.54       593
      Target       0.50      0.51      0.50       554

    accuracy                           0.52      1147
   macro avg       0.52      0.52      0.52      1147
weighted avg       0.52      0.52      0.52      1147



In [124]:
X[:50]

array([[[-0.00607122, -0.00792563, -0.00626351, ..., -0.09586221,
         -0.04329529, -0.01368828],
        [ 0.40667981,  0.45346471,  0.59110476, ...,  0.63501148,
          0.57190432,  0.47600092],
        [-0.7141134 , -0.93781709, -0.98571197, ..., -0.01788325,
         -0.07345043, -0.31417595],
        ...,
        [-0.6225276 , -0.76264099, -0.93415827, ..., -0.39480391,
         -0.4759363 , -0.52571045],
        [-0.1144651 , -0.21776891, -0.27530452, ...,  0.35140523,
          0.21258341,  0.06141631],
        [-0.35945459, -0.18271518, -0.05851307, ..., -0.29697577,
         -0.47845268, -0.51292607]],

       [[ 0.00814517, -0.13171996, -0.23511876, ...,  0.29541404,
          0.25491735,  0.17224839],
        [-0.46279415, -0.62823862, -0.72327141, ..., -0.10169246,
         -0.14676999, -0.25234123],
        [-0.19118119, -0.43835464, -0.56069431, ...,  0.67634258,
          0.51980291,  0.22665177],
        ...,
        [ 0.35911066,  0.17996659,  0.1358882 , ...,  

In [125]:
X_noise[:50]

array([[[  0.99322572,  29.41391119,  40.0538963 , ...,  -1.13174455,
         -14.74042131, -17.27461733],
        [ 30.44626024,  15.26523495,  -6.92387071, ..., -22.98161313,
          10.96040373, -13.57461064],
        [ -5.75866532, -63.51008456,  10.90519738, ..., -37.10770197,
          14.07636644, -20.0204995 ],
        ...,
        [ 13.21271989, -18.87152391, -14.34468651, ...,  11.00117338,
          44.50952519,  16.58201228],
        [ 59.75125429,   9.63126468,  50.85311742, ...,   9.52959037,
          -8.40122145, -26.82614221],
        [-18.41776841,  -2.07672118, -12.46561235, ...,  -1.01279197,
          41.08576033,   0.27096004]],

       [[-54.34760409, -30.72704097,   4.62914014, ..., -19.90393964,
         -10.14599531, -25.44696485],
        [  3.21444666,   4.86307453,  17.64397366, ..., -12.68265389,
          19.94083672, -19.427678  ],
        [-17.71001233, -23.52226437,  31.30115137, ...,  37.12258926,
           0.15328172, -14.8399654 ],
        ...,


In [149]:
from pyod.models.ocsvm import OCSVM
from pyod.models.iforest import IForest

#clf = OCSVM(kernel='rbf',gamma=0.0001, nu=0.3, contamination=contamination_factor)
clf = IForest(random_state=42, contamination=contamination_factor)
nsamples, nx, ny = X.shape
d2_train_dataset = X.reshape((nsamples,nx*ny))

X_train_clean, X_test_clean = train_test_split(d2_train_dataset, test_size=0.10, random_state=42, shuffle=False)

nsamples, nx, ny = X.shape
d2_test_noise = X_noise.reshape((nsamples,nx*ny))

X_train_noise, X_test_noise = train_test_split(d2_test_noise, test_size=0.10, random_state=42, shuffle=False)

X_train = np.concatenate([X_train_clean, X_train_noise])
X_test = np.concatenate([X_test_clean,X_test_noise])

# Model training
clf.fit(X_train)

IForest(behaviour='old', bootstrap=False, contamination=0.05,
    max_features=1.0, max_samples='auto', n_estimators=100, n_jobs=1,
    random_state=42, verbose=0)

In [152]:
pred=clf.predict(X_test_clean)
unique_elements, counts_elements = np.unique(pred, return_counts=True)
print("\t",unique_elements,"    ",counts_elements)

	 [0]      [115]


In [154]:
pred=clf.predict(X_test)
unique_elements, counts_elements = np.unique(pred, return_counts=True)
print("\t",unique_elements,"    ",counts_elements)

	 [0 1]      [211  19]
