# Demonstrate the Aggregated Prediction Methods

In [155]:
import helper
import aggregated_predict as ap
import pandas as pd
import joblib
import time
import warnings
warnings.simplefilter('ignore')

## 1. Testing of each model and demonstration of model aggregated prediction methods

In [156]:
# Load the testing set
sensor_test, network_test = helper.load_test_set()

# Load the models
sensor_models = ['./received_models/client_1.joblib', './received_models/client_2.joblib', './received_models/client_3.joblib']
network_model = './received_models/global_model.joblib'

# Test models 
models_predictions, models_metrics = ap.get_predictions_and_metrics(
    local_models=sensor_models,
    sensor_test=sensor_test,
    global_model=network_model,
    network_test=network_test,
)

Sensor Model 1 Prediction Results:
-----------------------------
Accuracy : 0.9948945802161988
Precision: 0.9949071505553502
Recall   : 0.9948945802161988
F1 Score : 0.9948368269221267
-----------------------------

Sensor Model 2 Prediction Results:
-----------------------------
Accuracy : 0.9951157025153479
Precision: 0.9951292350952945
Recall   : 0.9951157025153479
F1 Score : 0.9950620924030881
-----------------------------

Sensor Model 3 Prediction Results:
-----------------------------
Accuracy : 0.9951178810601671
Precision: 0.9951288620267749
Recall   : 0.9951178810601671
F1 Score : 0.99506536079325
-----------------------------

Network Model Prediction Results:
-----------------------------
Accuracy : 0.9921430781095459
Precision: 0.9921319168507041
Recall   : 0.9921430781095459
F1 Score : 0.9920233509476573
-----------------------------


In [157]:
# Load network testing set, set the weights
y_test = network_test.iloc[:, -1]
model_weights = [0.15, 0.15, 0.15, 0.55]

In [158]:
print(f"Aggregated Prediction by Score:")
score = ap.aggregate_predict_by_score(
    models_predictions=models_predictions,
    models_metrics=models_metrics,
    models_weights=model_weights)
helper.get_metrics(y_test, score, printout=True)

Aggregated Prediction by Score:
-----------------------------
Accuracy : 0.9921430781095459
Precision: 0.9921319168507041
Recall   : 0.9921430781095459
F1 Score : 0.9920233509476573
-----------------------------


(0.9921430781095459,
 0.9921319168507041,
 0.9921430781095459,
 0.9920233509476573)

In [159]:
print(f"Aggregated Prediction by Vote:")
vote = ap.aggregate_predict_by_vote(models_predictions=models_predictions)
helper.get_metrics(y_test, vote, printout=True)

Aggregated Prediction by Vote:
-----------------------------
Accuracy : 0.9840606768303044
Precision: 0.9843363473211927
Recall   : 0.9840606768303044
F1 Score : 0.9833946776071637
-----------------------------



(0.9840606768303044,
 0.9843363473211927,
 0.9840606768303044,
 0.9833946776071637)

## 2. Simulation of Intrusion Detection in Wireless Sensor Networks

Once the WSN simulation starts, the sensors in the nodes will collect new sensing data, these data need to pass the validation of the local models first. The data that passes the validation will be sent to the server and the data that does not pass will be discarded. Once the server receives the sensed data, it will validate the sensor data and the network traffic generated by the transmitted data by aggregated prediction method to give the final validation result. If the data passes the validation, the data is saved, otherwise, the data is discarded.

In [160]:
sensor_test['target'].value_counts()

target
1    831450
0     86594
Name: count, dtype: int64

In [161]:
# Select 2000 equally distributed samples from the testing set
s0 = sensor_test[sensor_test['target'] == 0].sample(50000, random_state=42)
s1 = sensor_test[sensor_test['target'] == 1].sample(50000, random_state=42)
sampled_sensor_data = pd.concat([s0, s1])
sampled_network_data = network_test.loc[sampled_sensor_data.index]

sampled_sensor_data = sampled_sensor_data.reset_index(drop=True)
sampled_network_data = sampled_network_data.reset_index(drop=True)

print(sampled_sensor_data['target'].value_counts())
print(sampled_network_data['target'].value_counts())

target
0    50000
1    50000
Name: count, dtype: int64
target
0    50000
1    50000
Name: count, dtype: int64


In [162]:
# Use Client 1 as an example
client_1_model = joblib.load('./received_models/client_1.joblib')
client_1_predict = client_1_model.predict(sampled_sensor_data.iloc[:, :-1])
helper.get_metrics(sampled_sensor_data.iloc[:, -1], client_1_predict, printout=True)

-----------------------------
Accuracy : 0.97395
Precision: 0.9752162728759824
Recall   : 0.97395
F1 Score : 0.9739326350658382
-----------------------------


(0.97395, 0.9752162728759824, 0.97395, 0.9739326350658382)

In [163]:
client_1_predict = pd.DataFrame(client_1_predict, columns=['Pred'])
client_1_predict['Pred'].value_counts()

Pred
1    52581
0    47419
Name: count, dtype: int64

In [164]:
# Client 1 will test the sensor data locally, only those that pass will be sent to the server
# Drop false data, target = 1
rows_to_keep = client_1_predict[client_1_predict['Pred'] == 0]

filtered_sensor_data = sampled_sensor_data.loc[rows_to_keep.index]
filtered_network_data = sampled_network_data.loc[rows_to_keep.index]

print(filtered_sensor_data['target'].value_counts())
print(filtered_network_data['target'].value_counts())

target
0    47407
1       12
Name: count, dtype: int64
target
0    47407
1       12
Name: count, dtype: int64


In [165]:
# Test Client 1 data using all models
new_models_predictions, new_models_metrics = ap.get_predictions_and_metrics(
    local_models=sensor_models,
    sensor_test=filtered_sensor_data,
    global_model=network_model,
    network_test=filtered_network_data,
)

Sensor Model 1 Prediction Results:
-----------------------------
Accuracy : 0.9997469368818406
Precision: 0.9994939378046229
Recall   : 0.9997469368818406
F1 Score : 0.9996204213350223
-----------------------------

Sensor Model 2 Prediction Results:
-----------------------------
Accuracy : 0.9701596406503722
Precision: 0.9997271538872711
Recall   : 0.9701596406503722
F1 Score : 0.9846049143025739
-----------------------------

Sensor Model 3 Prediction Results:
-----------------------------
Accuracy : 0.9713827790548093
Precision: 0.9997053745329233
Recall   : 0.9713827790548093
F1 Score : 0.9852348615698598
-----------------------------

Network Model Prediction Results:
-----------------------------
Accuracy : 0.9229422805204665
Precision: 0.9997477652390195
Recall   : 0.9229422805204665
F1 Score : 0.9596753619144258
-----------------------------


In [168]:
# Load Client 1 network testing set, set the weights
new_y_test = filtered_network_data.iloc[:, -1]
model_weights = [0.3, 0.05, 0.05, 0.6]

In [169]:
print(f"Client 1 Aggregated Prediction by Score:")
start_time = time.time()

new_score = ap.aggregate_predict_by_score(
    models_predictions=new_models_predictions,
    models_metrics=new_models_metrics,
    models_weights=model_weights)

end_time = time.time()
rows = len(new_models_predictions[0])
time_spend = end_time - start_time

helper.get_metrics(new_y_test, new_score, printout=True)
print(f"Time spent predicting {rows} pieces of data: {time_spend} second, average time spent: {time_spend / rows} seconds.")

Client 1 Aggregated Prediction by Score:
-----------------------------
Accuracy : 0.9229422805204665
Precision: 0.9997477652390195
Recall   : 0.9229422805204665
F1 Score : 0.9596753619144258
-----------------------------


Time spent predicting 47419 pieces of data: 0.0009996891021728516 second, average time spent: 2.10820367821517e-08 seconds.


In [170]:
print(f"Client 1 Aggregated Prediction by Vote:")
start_time = time.time()

new_vote = ap.aggregate_predict_by_vote(models_predictions=new_models_predictions)

end_time = time.time()
time_spend = end_time - start_time

helper.get_metrics(new_y_test, new_vote, printout=True)
print(f"Time spent predicting {rows} pieces of data: {time_spend} second, average time spent: {time_spend / rows} seconds.")

Client 1 Aggregated Prediction by Vote:
-----------------------------
Accuracy : 0.8758303633564605
Precision: 0.9997474515864877
Recall   : 0.8758303633564605
F1 Score : 0.9335523637657995
-----------------------------


Time spent predicting 47419 pieces of data: 0.028255939483642578 second, average time spent: 5.958780126877956e-07 seconds.
