# Setup

Initial module setup.

In [40]:
import typing
import time
import statistics
import numpy.typing as np_types
import pandas as pd

from concrete.ml.sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from eeg_auth_models_framework import data, pre_process, features, training, model, processor
from eeg_auth_models_framework.utils import conversion

# Constants

In [41]:
DATASET_SAMPLE_FREQ_HZ = 200
DATA_CHANNEL_NAMES = ['T7','F8','Cz','P4']
FREQUENCIES = [
    pre_process.FrequencyBand(lower=8.0, upper=12.0, label='Alpha'),
    pre_process.FrequencyBand(lower=12.0, upper=35.0, label='Beta'),
    pre_process.FrequencyBand(lower=4.0, upper=8.0, label='Theta'),
    pre_process.FrequencyBand(lower=35.0, upper=None, label='Gamma'),
    pre_process.FrequencyBand(lower=None, upper=None, label='Raw'),
]
WINDOW_SIZE = 1200
WINDOW_OVERLAP = 0.5
K_FOLDS = 10
RANDOM_SEED = 42

# Model Builder Configuration

Configure data source, data reading method, data labelling method, and training process. 

In [42]:
class ARSVMBuilder(model.ModelBuilder[LinearSVC]):
    def create_classifier(self) -> LinearSVC:
        return LinearSVC(
            random_state=RANDOM_SEED,
            dual=True,
            max_iter=2000
        )
    
    def train_classifier(self, classifier: LinearSVC, x_data: np_types.ArrayLike, y_data: np_types.ArrayLike):
        classifier.fit(x_data, y_data)
        
    def score_classifier(self, classifier: LinearSVC, x_data: np_types.ArrayLike, y_data: np_types.ArrayLike) -> float:
        return classifier.score(x_data, y_data)

# Configuration

In [43]:
downloader = data.AuditoryDataDownloader()
reader = data.AuditoryDataReader()
labeller = training.SubjectDataLabeller()
converter = conversion.MNEDataFrameConverter(
    channels=DATA_CHANNEL_NAMES, 
    sample_frequency=DATASET_SAMPLE_FREQ_HZ
)

# Data Processing

## Pre-Processing Steps

Define pre-processing steps to be used in model.

In [44]:
pre_process_steps = pre_process.PreProcessingPipeline([
    pre_process.EEGBandpassFilterStep(
        FREQUENCIES, 
        converter
    ),
    pre_process.DataWindowStep(WINDOW_SIZE, WINDOW_OVERLAP)
])

## Feature Extraction Steps

Define feature extraction steps to be applied to the pre-processed data.

In [45]:
feature_extraction_steps = features.FeatureExtractPipeline([
    features.ARFeatureExtractor({'lags': 25})
])

## Data Processor

In [46]:
data_processor = processor.DataProcessor(
    pre_process=pre_process_steps,
    feature_extraction=feature_extraction_steps
)

# Training

Execute training of authentication models.

In [47]:
ar_svm_builder = ARSVMBuilder(
    data_downloader=downloader,
    data_reader=reader,
    data_labeller=labeller,
    data_processor=data_processor
)
results = ar_svm_builder.train(K_FOLDS)

Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=40114
    Range : 0 ... 40113 =      0.000 ...   200.565 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ..

In [48]:
subject_score_rows = []
for subject in results.training_statistics:
    subject_score_rows.append([
        subject,
        results.training_statistics[subject].average_score,
        results.training_statistics[subject].training_duration
    ])
subject_scores = pd.DataFrame(subject_score_rows, columns=['Subject', 'Average Score', 'Training Duration'])
subject_scores

Unnamed: 0,Subject,Average Score,Training Duration
0,S17,0.548148,7.531289
1,S12,0.751019,5.784782
2,S18,0.768519,5.069022
3,S02,0.66483,5.912776
4,S01,0.627099,6.547088
5,S05,0.662253,6.622164
6,S07,0.816466,7.083066
7,S19,0.897222,2.886192
8,S08,0.728889,6.067623
9,S20,0.711358,7.218823


In [49]:
subject_rates_rows = []
for subject in results.training_statistics:
    subject_rates_rows.append([
        subject,
        results.training_statistics[subject].average_false_accept_rate,
        results.training_statistics[subject].average_false_reject_rate
    ])
subject_rates = pd.DataFrame(subject_rates_rows, columns=['Subject', 'Average FAR', 'Average FRR'])
subject_rates

Unnamed: 0,Subject,Average FAR,Average FRR
0,S17,0.431869,0.85
1,S12,0.226777,0.7
2,S18,0.222659,0.4
3,S02,0.327837,0.483333
4,S01,0.359706,0.65
5,S05,0.322937,0.519048
6,S07,0.146104,0.925
7,S19,0.102837,0.1
8,S08,0.260509,0.5
9,S20,0.259245,0.866667


In [50]:
global_stats = pd.DataFrame(
    [[results.global_average_score, results.global_average_far_rate, 
      results.global_average_frr_rate, results.global_average_time]],
    columns=['Global Average Score', 'Global Average FAR', 'Global Average FRR', 'Global Average Time']
)
global_stats

Unnamed: 0,Global Average Score,Global Average FAR,Global Average FRR,Global Average Time
0,0.689282,0.581369,0.296784,6.186456


# Simulated Execution

In [None]:
T = typing.TypeVar("T")

def slice_by_percentage(content: typing.List[T], percentage: float) -> typing.List[T]:
    slice_size = int(len(content) * percentage / 100)
    return content[:slice_size]


def simulate_model_executions(models: typing.Dict[str, LinearSVC], 
                              test_downloader: data.AuditoryDataDownloader, 
                              test_reader: data.AuditoryDataReader,
                              test_processor: processor.DataProcessor,
                              tests = 10) -> typing.Tuple[int, float, float]:
    data_path = test_downloader.retrieve()
    test_data = test_reader.format_data(data_path)
    for subject, data_samples in test_data.items():
        test_data[subject] = test_processor.process(data_samples)
    compile_timings = []
    predict_timings = []
    for i in range(tests):
        for subject, prepared_data in test_data.items():
            compile_set = slice_by_percentage(prepared_data, 20)
            test_model = models[subject]
            start_compile = time.perf_counter()
            test_model.compile(compile_set)
            end_compile = time.perf_counter()
            start_predict = time.perf_counter()
            test_model.predict(prepared_data, fhe="execute")
            end_predict = time.perf_counter()
            compile_timings.append(end_compile - start_compile)
            predict_timings.append(end_predict - start_predict)
    return len(predict_timings), statistics.mean(compile_timings), statistics.mean(predict_timings)

In [None]:
executions, average_compile_time, average_predict_time = simulate_model_executions(results.models, downloader, reader, data_processor)
print(f"Executions: {executions}")
print(f"Overall average compile time: {average_compile_time} seconds")
print(f"Overall average predict time (encrypted): {average_predict_time} seconds")

Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=40114
    Range : 0 ... 40113 =      0.000 ...   200.565 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ..