In [None]:
!pip install adversarial-robustness-toolbox

Collecting adversarial-robustness-toolbox
  Downloading adversarial_robustness_toolbox-1.18.2-py3-none-any.whl.metadata (11 kB)
Downloading adversarial_robustness_toolbox-1.18.2-py3-none-any.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: adversarial-robustness-toolbox
Successfully installed adversarial-robustness-toolbox-1.18.2


In [None]:
# Import libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from itertools import combinations
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler,LabelEncoder
from sklearn.ensemble import AdaBoostClassifier
from imblearn.under_sampling import RandomUnderSampler
from sklearn.metrics import roc_curve,auc
from sklearn.metrics import confusion_matrix,classification_report,ConfusionMatrixDisplay
import tarfile
from pandas.errors import EmptyDataError
import time
from sklearn.model_selection import GridSearchCV


In [None]:
audio=pd.read_csv('synthetic_IoT_audio.csv')
audio['label']='Audio'
camera=pd.read_csv('synthetic_IoT_camera.csv')
camera['label']='Camera'
ha=pd.read_csv('synthetic_IoT_home_automation.csv')
ha['label']='Home Automation'
train=pd.concat([audio, camera, ha], axis=0)
train=train.iloc[:,1:]

attack=pd.read_csv('Combined_IoT_profiling_dataset_2022_train_set.csv')
attack=attack.iloc[:,1:]
test=pd.read_csv('Combined_IoT_profiling_dataset_2022_test_set.csv')
test=test.iloc[:,1:]

In [None]:
# Removing infinity and nan values
train.replace(np.inf,np.nan,inplace=True)
train.replace(-np.inf,np.nan,inplace=True)
train.dropna(inplace=True)
test.replace(np.inf,np.nan,inplace=True)
test.replace(-np.inf,np.nan,inplace=True)
test.dropna(inplace=True)
attack.replace(np.inf,np.nan,inplace=True)
attack.replace(-np.inf,np.nan,inplace=True)
attack.dropna(inplace=True)

In [None]:
#Removing duplicates
train.drop_duplicates(inplace=True)
attack.drop_duplicates(inplace=True)
test.drop_duplicates(inplace=True)

In [None]:
corr_cols=['L7_https','most_freq_sport','pck_size','cnt','most_freq_dport','most_freq_prot','min_et', 'max_et','med_et','average_et','q3','q1', 'min_e', 'max_e', 'med', 'average', 'q3_e', 'q1_e' , 'time_since_previously_displayed_frame','port_class_src','port_class_dst','pck_size','total_length','ip_dst_new', 'L4_tcp', 'NTP_count','most_freq_d_ip','sum_e','skew_e', 'iqr','var','iqr_e','L4_udp']
#train.drop(columns=corr_cols,axis=1,inplace=True)
attack.drop(columns=corr_cols,axis=1,inplace=True)
test.drop(columns=corr_cols,axis=1,inplace=True)

In [None]:
attack.shape

(183959, 17)

In [None]:
train.shape

(184835, 17)

In [None]:
test.shape

(46184, 17)

In [None]:
train_df=train.copy()
mask = train_df['label'].str.contains('Camera')
train_df.loc[mask, 'label'] = 'Camera'
mask = train_df['label'].str.contains('Audio')
train_df.loc[mask, 'label'] = 'Audio'
mask = train_df['label'].str.contains('Home Automation')
train_df.loc[mask, 'label'] = 'Home Automation'

In [None]:
test_df=test.copy()
mask = test_df['label'].str.contains('Camera')
test_df.loc[mask, 'label'] = 'Camera'
mask = test_df['label'].str.contains('Audio')
test_df.loc[mask, 'label'] = 'Audio'
mask = test_df['label'].str.contains('Home Automation')
test_df.loc[mask, 'label'] = 'Home Automation'

In [None]:
attack_df=attack.copy()
mask = attack_df['label'].str.contains('Camera')
attack_df.loc[mask, 'label'] = 'Camera'
mask = attack_df['label'].str.contains('Audio')
attack_df.loc[mask, 'label'] = 'Audio'
mask = attack_df['label'].str.contains('Home Automation')
attack_df.loc[mask, 'label'] = 'Home Automation'

In [None]:
label_encoder = LabelEncoder()
train_df['label']=label_encoder.fit_transform(train_df['label'])
train_df['label'].unique()

array([0, 1, 2])

In [None]:
train_df['label'].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
1,153937
2,15864
0,15034


In [None]:
label_encoder = LabelEncoder()
test_df['label']=label_encoder.fit_transform(test_df['label'])
test_df['label'].unique()

array([0, 2, 1])

In [None]:
label_encoder = LabelEncoder()
attack_df['label']=label_encoder.fit_transform(attack_df['label'])
attack_df['label'].unique()

array([0, 2, 1])

In [None]:

train_X=train_df.drop(columns='label')
train_y=train_df['label'].astype('int')
test_X=test_df.drop(columns='label')
test_y=test_df['label'].astype('int')
attack_X=attack_df.drop(columns='label')
attack_y=attack_df['label'].astype('int')

In [None]:
test_X=test_X.astype('int')
attack_X=attack_X.astype('int')

In [None]:
#Random undersampling for balancing data
rus = RandomUnderSampler(random_state=0)
x_rus, y_rus = rus.fit_resample(train_X, train_y)
x_test, y_test = rus.fit_resample(test_X, test_y)
x_attack, y_attack = rus.fit_resample(attack_X, attack_y)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from art.estimators.classification.scikitlearn import ScikitlearnRandomForestClassifier
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(x_rus,y_rus,stratify=y_rus, test_size=0.2)

param_grid = {
    'criterion': ['gini','entropy'],
    'n_estimators': [200],
    'max_depth':[2,4]
}
rf = RandomForestClassifier(random_state = 42)
model = GridSearchCV(rf, param_grid, cv=5, scoring='accuracy')
model.fit(X_train.values, Y_train)
print("Best parameters of Random Forest: ",model.best_params_)

art_classifier = ScikitlearnRandomForestClassifier(model.best_estimator_)

print('Base model accuracy: ', model.best_estimator_.score(X_test.values, Y_test))

  _data = np.array(data, dtype=dtype, copy=copy,


Best parameters of Random Forest:  {'criterion': 'entropy', 'max_depth': 4, 'n_estimators': 200}
Base model accuracy:  0.8711894468462477


In [None]:
import numpy as np
from art.attacks.inference.membership_inference import MembershipInferenceBlackBoxRuleBased

attack = MembershipInferenceBlackBoxRuleBased(art_classifier)

# infer attacked feature
inferred_train = attack.infer(x_attack, y_attack)
inferred_test = attack.infer(x_test, y_test)

# check accuracy
train_acc = np.sum(inferred_train) / len(inferred_train)
test_acc = 1 - (np.sum(inferred_test) / len(inferred_test))
acc = (train_acc * len(inferred_train) + test_acc * len(inferred_test)) / (len(inferred_train) + len(inferred_test))
print(f"Members Accuracy: {train_acc:.4f}")


Members Accuracy: 0.8378
