# Setup

In [1]:
import numpy as np
import pandas as pd
import dataclasses
import typing
import random
import statistics
import eeg_bloom_template
import eeg_bloom_template.backend

from eeg_auth_models_framework import data, pre_process, features, processor, normalization
from eeg_auth_models_framework.utils import conversion
from eeg_bloom_template.utils.iteration import iter_ratio_slices

# Constants

In [2]:
AUTHENTICATION_THRESHOLDS = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
SEGMENTATION_RATIOS = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
FALSE_POSITIVE_RATES = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
HASH_BACKENDS = [
    eeg_bloom_template.backend.FNVBloomFilterBackend(),
    eeg_bloom_template.backend.MMH3BloomFilterBackend()
]
DATASET_SAMPLE_FREQ_HZ = 200
DATA_CHANNEL_NAMES = ['T7','F8','Cz','P4']
FREQUENCIES = [
    pre_process.FrequencyBand(lower=8.0, upper=12.0, label='Alpha'),
    pre_process.FrequencyBand(lower=12.0, upper=35.0, label='Beta'),
    pre_process.FrequencyBand(lower=4.0, upper=8.0, label='Theta'),
    pre_process.FrequencyBand(lower=35.0, upper=None, label='Gamma'),
    pre_process.FrequencyBand(lower=None, upper=None, label='Raw'),
]
WINDOW_SIZE = 1200
WINDOW_OVERLAP = 0
RESCALE_LOWER = 0
RESCALE_UPPER = 255
SAMPLE_PERCENTAGE = 0.1
RANDOM_SEED = 100000000000
RANDOM_GENERATOR = random.Random(RANDOM_SEED)

# Utilities

In [3]:
@dataclasses.dataclass
class TemplateTest:
    expected_result: bool
    test_data: typing.List[np.ndarray]
    
    def run_test(self, test_template: eeg_bloom_template.EEGTemplate, test_threshold: float):
        comparison_result = test_template.compare(self.test_data)
        is_match = comparison_result.hit_ratio >= test_threshold
        if is_match != self.expected_result:
            return False
        return True
    

@dataclasses.dataclass
class TestTemplateData:
    template: eeg_bloom_template.EEGTemplate
    false_positive_rate: float
    hash_backend: eeg_bloom_template.backend.BaseBloomFilterHashBackend
    original_subject: str
    
    
@dataclasses.dataclass
class TemplateTestSet:
    threshold: float
    template_data: TestTemplateData
    positive_cases: typing.List[TemplateTest]
    negative_cases: typing.List[TemplateTest]

# Configuration

In [4]:
downloader = data.AuditoryDataDownloader()
reader = data.AuditoryDataReader()
converter = conversion.MNEDataFrameConverter(
    channels=DATA_CHANNEL_NAMES, 
    sample_frequency=DATASET_SAMPLE_FREQ_HZ
)

# Data Processing Setup

## Sample Data Processor

### Pre-Processing Steps

In [5]:
pre_process_steps = pre_process.PreProcessingPipeline([
    pre_process.EEGBandpassFilterStep(
        FREQUENCIES,
        converter
    ),
    pre_process.DataWindowStep(WINDOW_SIZE, WINDOW_OVERLAP)
])

### Feature Extraction Steps

In [6]:
feature_extraction_steps = features.FeatureExtractPipeline([
    features.StatisticalFeatureExtractor([
        features.StatisticalFeature.MIN,
        features.StatisticalFeature.MAX,
        features.StatisticalFeature.MEAN,
        features.StatisticalFeature.ZERO_CROSSING_RATE
    ])
])

### Normalization Steps

In [7]:
normalization_steps = normalization.NormalizationPipeline([
    normalization.RescaleNormalizationStep(RESCALE_LOWER, RESCALE_UPPER),
    normalization.HistogramEqualizationStep(RESCALE_LOWER, RESCALE_UPPER)
])

### Data Processor Setup

In [8]:
data_processor = processor.DataProcessor(
    pre_process=pre_process_steps,
    feature_extraction=feature_extraction_steps,
    normalization=normalization_steps
)

# Subject Data

In [9]:
data_path = downloader.retrieve()
subject_data_map = reader.format_data(data_path)

# Processing

## Template Setup

### Template Generation

In [10]:
def iter_test_templates(subject_template_data: typing.Dict[str, typing.List[np.ndarray]]) -> typing.Iterator[TestTemplateData]:
    templates = {}
    for subject in subject_template_data:
        data = subject_template_data[subject]
        for backend in HASH_BACKENDS:
            for ratio in SEGMENTATION_RATIOS:
                for rate in FALSE_POSITIVE_RATES:
                    template = eeg_bloom_template.EEGTemplate.make_template(
                        data,
                        backend,
                        ratio,
                        rate
                    )
                    yield TestTemplateData(
                        template=template, 
                        false_positive_rate=rate, 
                        hash_backend=backend,
                        original_subject=subject
                    )

## Sample Data Setup

In [11]:
processed_data_map = {
    subject: data_processor.process(subject_data_map[subject])
    for subject in subject_data_map
}

Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=40114
    Range : 0 ... 40113 =      0.000 ...   200.565 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ..

# Test Set Assembly

## Gathering Test Sets

In [12]:
def get_experiment_samples(subject_data_map: typing.Dict[str, typing.List[np.ndarray]], 
                           target: str) -> typing.Tuple[typing.List[np.ndarray], typing.List[np.ndarray]]:
    positive_samples = list(
        iter_ratio_slices(
            subject_data_map[target], 
            SAMPLE_PERCENTAGE
        )
    )
    negative_samples = []
    subjects_available = list(subject_data_map.keys())
    subjects_available.remove(target)
    RANDOM_GENERATOR.shuffle(subjects_available)
    while subjects_available and len(negative_samples) < len(positive_samples):
        example_subject = subjects_available.pop()
        negative_samples.extend(
            list(
                iter_ratio_slices(
                    subject_data_map[example_subject], 
                    SAMPLE_PERCENTAGE
                )
            )
        )
    return positive_samples, negative_samples


def iter_template_test_sets(subject_data_map: typing.Dict[str, typing.List[np.ndarray]]) -> typing.Iterator[TemplateTestSet]:
    for template_data in iter_test_templates(subject_data_map):
        for threshold in AUTHENTICATION_THRESHOLDS:
            positive_samples, negative_samples = get_experiment_samples(
                subject_data_map, 
                template_data.original_subject
            )
            yield TemplateTestSet(
                threshold=threshold,
                template_data=template_data,
                positive_cases=[
                    TemplateTest(expected_result=True, test_data=sample)
                    for sample in positive_samples
                ],
                negative_cases=[
                    TemplateTest(expected_result=False, test_data=sample)
                    for sample in negative_samples
                ]
            )

# Execute Tests

In [13]:
def collect_test_results(test_items: typing.List[TemplateTest], 
                         template: eeg_bloom_template.EEGTemplate, 
                         threshold: float) -> int:
    hits = 0
    for item in test_items:
        is_hit = item.run_test(template, threshold)
        if is_hit:
            hits += 1
    return hits


def run_template_test_set(test_set: TemplateTestSet) -> float:
    total_tests = len(test_set.positive_cases) + len(test_set.negative_cases)
    hits = collect_test_results(
        test_set.positive_cases, 
        test_set.template_data.template, 
        test_set.threshold
    )
    hits += collect_test_results(
        test_set.negative_cases, 
        test_set.template_data.template, 
        test_set.threshold
    )
    return (hits / total_tests) * 100

In [14]:
results_data: typing.Dict[typing.Tuple[float, float, str], typing.List[float]] = {}
for template_test_set in iter_template_test_sets(processed_data_map):
    false_positive_rate = template_test_set.template_data.false_positive_rate
    threshold = template_test_set.threshold
    hash_backend = template_test_set.template_data.hash_backend.__class__.__name__
    result_key = (
        false_positive_rate,
        threshold,
        hash_backend
    )
    if result_key not in results_data:
        results_data[result_key] = []
    results_data[result_key].append(
        run_template_test_set(template_test_set)
    )

In [16]:
results_data_rows = []
for data_key, accuracy_results in results_data.items():
    fpr, threshold, backend = data_key
    average_accuracy = statistics.mean(accuracy_results)
    results_data_rows.append([fpr, threshold, backend, average_accuracy])
results_dataframe = pd.DataFrame(results_data_rows, columns=['FPR', 'Threshold', 'Backend', 'Average Accuracy'])
results_dataframe.sort_values(by=['Average Accuracy'], ascending=False).head(10)

Unnamed: 0,FPR,Threshold,Backend,Average Accuracy
132,0.2,0.3,MMH3BloomFilterBackend,80.832873
32,0.2,0.3,FNVBloomFilterBackend,79.688902
123,0.15,0.4,MMH3BloomFilterBackend,79.560906
72,0.4,0.3,FNVBloomFilterBackend,79.301498
102,0.05,0.3,MMH3BloomFilterBackend,79.164939
122,0.15,0.3,MMH3BloomFilterBackend,79.156068
142,0.25,0.3,MMH3BloomFilterBackend,78.880031
22,0.15,0.3,FNVBloomFilterBackend,78.843971
23,0.15,0.4,FNVBloomFilterBackend,78.737097
12,0.1,0.3,FNVBloomFilterBackend,78.431068
