# Setup

In [32]:
import numpy as np
import pandas as pd
import dataclasses
import enum
import typing
import random
import eeg_bloom_template
import eeg_bloom_template.backend

from eeg_auth_models_framework import data, pre_process, features, processor, normalization
from eeg_auth_models_framework.utils import conversion
from eeg_bloom_template.utils.iteration import iter_ratio_slices

# Constants

In [33]:
AUTHENTICATION_THRESHOLDS = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
SEGMENTATION_RATIOS = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
FALSE_POSITIVE_RATES = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
HASH_BACKENDS = [
    eeg_bloom_template.backend.FNVBloomFilterBackend(),
    eeg_bloom_template.backend.MMH3BloomFilterBackend()
]
DATASET_SAMPLE_FREQ_HZ = 200
DATA_CHANNEL_NAMES = ['T7','F8','Cz','P4']
FREQUENCIES = [
    pre_process.FrequencyBand(lower=8.0, upper=12.0, label='Alpha'),
    pre_process.FrequencyBand(lower=12.0, upper=35.0, label='Beta'),
    pre_process.FrequencyBand(lower=4.0, upper=8.0, label='Theta'),
    pre_process.FrequencyBand(lower=35.0, upper=None, label='Gamma'),
    pre_process.FrequencyBand(lower=None, upper=None, label='Raw'),
]
WINDOW_SIZE = 1200
WINDOW_OVERLAP = 0
RESCALE_LOWER = 0
RESCALE_UPPER = 255
SAMPLE_PERCENTAGE = 0.1
RANDOM_SEED = 100000000000
RANDOM_GENERATOR = random.Random(RANDOM_SEED)

# Utilities

In [34]:
class TestResultType(enum.Enum):
    TRUE_POSITIVE = enum.auto()
    FALSE_POSITIVE = enum.auto()
    FALSE_NEGATIVE = enum.auto()
    TRUE_NEGATIVE = enum.auto()


@dataclasses.dataclass
class TestResultsSummary:
    true_positives: int = 0
    false_positives: int = 0
    false_negatives: int = 0
    true_negatives: int = 0
    
    @classmethod
    def merge_summaries(cls, 
                        summary_a: 'TestResultsSummary', 
                        summary_b: 'TestResultsSummary') -> 'TestResultsSummary':
        return TestResultsSummary(
            true_positives=summary_a.true_positives + summary_b.true_positives,
            false_positives=summary_a.false_positives + summary_b.false_negatives,
            false_negatives=summary_a.false_negatives + summary_b.false_negatives,
            true_negatives=summary_a.true_negatives + summary_b.true_negatives
        )
    
    def increment_count(self, result_type: TestResultType):
        if result_type == TestResultType.TRUE_POSITIVE:
            self.true_positives += 1
        elif result_type == TestResultType.FALSE_POSITIVE:
            self.false_positives += 1
        elif result_type == TestResultType.FALSE_NEGATIVE:
            self.false_negatives += 1
        else:
            self.true_negatives += 1
    
    @property
    def accuracy(self) -> float:
        hits = self.true_positives + self.true_negatives
        total = (
            self.true_positives + self.true_negatives + 
            self.false_positives + self.false_negatives
        )
        return hits / total

    @property
    def false_accept_rate(self) -> float:
        return self.false_positives / (self.false_positives + self.true_negatives)
    
    @property
    def false_reject_rate(self):
        return self.false_negatives / (self.false_negatives + self.true_positives)


@dataclasses.dataclass
class TemplateTest:
    expected_result: bool
    if_expected: TestResultType
    if_unexpected: TestResultType
    test_data: typing.List[np.ndarray]
    
    def run_test(self, test_template: eeg_bloom_template.EEGTemplate, test_threshold: float):
        comparison_result = test_template.compare(self.test_data)
        is_match = comparison_result.hit_ratio >= test_threshold
        if is_match != self.expected_result:
            return self.if_unexpected
        return self.if_expected
    

@dataclasses.dataclass
class TestTemplateData:
    template: eeg_bloom_template.EEGTemplate
    false_positive_rate: float
    hash_backend: eeg_bloom_template.backend.BaseBloomFilterHashBackend
    original_subject: str
    
    
@dataclasses.dataclass
class TemplateTestSet:
    threshold: float
    template_data: TestTemplateData
    positive_cases: typing.List[TemplateTest]
    negative_cases: typing.List[TemplateTest]

# Configuration

In [35]:
downloader = data.AuditoryDataDownloader()
reader = data.AuditoryDataReader()
converter = conversion.MNEDataFrameConverter(
    channels=DATA_CHANNEL_NAMES, 
    sample_frequency=DATASET_SAMPLE_FREQ_HZ
)

# Data Processing Setup

## Sample Data Processor

### Pre-Processing Steps

In [36]:
pre_process_steps = pre_process.PreProcessingPipeline([
    pre_process.EEGBandpassFilterStep(
        FREQUENCIES,
        converter
    ),
    pre_process.DataWindowStep(WINDOW_SIZE, WINDOW_OVERLAP)
])

### Feature Extraction Steps

In [37]:
feature_extraction_steps = features.FeatureExtractPipeline([
    features.StatisticalFeatureExtractor([
        features.StatisticalFeature.MIN,
        features.StatisticalFeature.MAX,
        features.StatisticalFeature.MEAN,
        features.StatisticalFeature.ZERO_CROSSING_RATE
    ])
])

### Normalization Steps

In [38]:
normalization_steps = normalization.NormalizationPipeline([
    normalization.RescaleNormalizationStep(RESCALE_LOWER, RESCALE_UPPER),
    normalization.HistogramEqualizationStep(RESCALE_LOWER, RESCALE_UPPER)
])

### Data Processor Setup

In [39]:
data_processor = processor.DataProcessor(
    pre_process=pre_process_steps,
    feature_extraction=feature_extraction_steps,
    normalization=normalization_steps
)

# Subject Data

In [40]:
data_path = downloader.retrieve()
subject_data_map = reader.format_data(data_path)

# Processing

## Template Setup

### Template Generation

In [41]:
def iter_test_templates(subject_template_data: typing.Dict[str, typing.List[np.ndarray]]) -> typing.Iterator[TestTemplateData]:
    for subject in subject_template_data:
        subject_data = subject_template_data[subject]
        for backend_option in HASH_BACKENDS:
            for ratio in SEGMENTATION_RATIOS:
                for rate in FALSE_POSITIVE_RATES:
                    template = eeg_bloom_template.EEGTemplate.make_template(
                        subject_data,
                        backend_option,
                        ratio,
                        rate
                    )
                    yield TestTemplateData(
                        template=template, 
                        false_positive_rate=rate, 
                        hash_backend=backend_option,
                        original_subject=subject
                    )

## Sample Data Setup

In [42]:
processed_data_map = {
    subject: data_processor.process(subject_data_map[subject])
    for subject in subject_data_map
}

Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=40114
    Range : 0 ... 40113 =      0.000 ...   200.565 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ..

# Test Set Assembly

## Gathering Test Sets

In [43]:
def get_experiment_samples(data_map: typing.Dict[str, typing.List[np.ndarray]], 
                           target: str) -> typing.Tuple[typing.List[np.ndarray], typing.List[np.ndarray]]:
    positive_samples = list(
        iter_ratio_slices(
            data_map[target], 
            SAMPLE_PERCENTAGE
        )
    )
    negative_samples = []
    subjects_available = list(data_map.keys())
    subjects_available.remove(target)
    RANDOM_GENERATOR.shuffle(subjects_available)
    while subjects_available and len(negative_samples) < len(positive_samples):
        example_subject = subjects_available.pop()
        negative_samples.extend(
            list(
                iter_ratio_slices(
                    data_map[example_subject], 
                    SAMPLE_PERCENTAGE
                )
            )
        )
    return positive_samples, negative_samples


def iter_template_test_sets(data_map: typing.Dict[str, typing.List[np.ndarray]]) -> typing.Iterator[TemplateTestSet]:
    for template_data in iter_test_templates(data_map):
        for auth_threshold in AUTHENTICATION_THRESHOLDS:
            positive_samples, negative_samples = get_experiment_samples(
                data_map, 
                template_data.original_subject
            )
            yield TemplateTestSet(
                threshold=auth_threshold,
                template_data=template_data,
                positive_cases=[
                    TemplateTest(
                        expected_result=True, 
                        test_data=sample,
                        if_expected=TestResultType.TRUE_POSITIVE,
                        if_unexpected=TestResultType.FALSE_NEGATIVE
                    )
                    for sample in positive_samples
                ],
                negative_cases=[
                    TemplateTest(
                        expected_result=False, 
                        test_data=sample,
                        if_expected=TestResultType.TRUE_NEGATIVE,
                        if_unexpected=TestResultType.FALSE_POSITIVE
                    )
                    for sample in negative_samples
                ]
            )

# Execute Tests

In [44]:
def run_template_test_set(test_set: TemplateTestSet) -> TestResultsSummary:
    summary = TestResultsSummary()
    for test_item in test_set.positive_cases:
        result_type = test_item.run_test(
            test_set.template_data.template,
            test_set.threshold
        )
        summary.increment_count(result_type)
    for test_item in test_set.negative_cases:
        result_type = test_item.run_test(
            test_set.template_data.template,
            test_set.threshold
        )
        summary.increment_count(result_type)
    return summary

In [45]:
results_data: typing.Dict[typing.Tuple[float, float, str], TestResultsSummary] = {}
for template_test_set in iter_template_test_sets(processed_data_map):
    false_positive_rate = template_test_set.template_data.false_positive_rate
    threshold = template_test_set.threshold
    hash_backend = template_test_set.template_data.hash_backend.__class__.__name__
    result_key = (
        false_positive_rate,
        threshold,
        hash_backend
    )
    if result_key not in results_data:
        results_data[result_key] = TestResultsSummary()
    results_data[result_key] = TestResultsSummary.merge_summaries(
        results_data[result_key],
        run_template_test_set(template_test_set)
    )

In [46]:
results_data_rows = []
for data_key, results_summary in results_data.items():
    fpr, threshold, backend = data_key
    results_data_rows.append([
        fpr, threshold, 
        backend, results_summary.false_accept_rate, 
        results_summary.false_reject_rate, results_summary.accuracy
    ])
results_dataframe = pd.DataFrame(
    results_data_rows, 
    columns=['FPR', 'Threshold', 'Backend', 'FAR', 'FRR', 'Accuracy']
)
results_dataframe.sort_values(by=['Accuracy'], ascending=False).head(10)

Unnamed: 0,FPR,Threshold,Backend,FAR,FRR,Accuracy
190,0.5,0.1,MMH3BloomFilterBackend,0.186441,0.005473,0.989367
180,0.45,0.1,MMH3BloomFilterBackend,0.277778,0.007463,0.985465
90,0.5,0.1,FNVBloomFilterBackend,0.181818,0.00796,0.984747
70,0.4,0.1,FNVBloomFilterBackend,0.246575,0.008955,0.982717
60,0.35,0.1,FNVBloomFilterBackend,0.294118,0.014925,0.971591
150,0.3,0.1,MMH3BloomFilterBackend,0.240876,0.016418,0.969259
160,0.35,0.1,MMH3BloomFilterBackend,0.232877,0.016915,0.96846
80,0.45,0.1,FNVBloomFilterBackend,0.408602,0.018905,0.963861
50,0.3,0.1,FNVBloomFilterBackend,0.3,0.020896,0.96093
170,0.4,0.1,MMH3BloomFilterBackend,0.454545,0.022388,0.957326
