# Setup

In [69]:
import numpy as np
import pandas as pd
import dataclasses
import enum
import typing
import random
import secrets
import eeg_bloom_template
import eeg_bloom_template.backend

from eeg_bloom_template.backend import BaseBloomFilterHashBackend
from eeg_auth_models_framework import data, pre_process, features, processor, normalization
from eeg_auth_models_framework.utils import conversion
from eeg_bloom_template.utils.iteration import iter_ratio_slices

# Constants

In [70]:
AUTHENTICATION_THRESHOLDS = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
SEGMENTATION_RATIOS = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
FALSE_POSITIVE_RATES = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
HASH_BACKENDS = [
    eeg_bloom_template.backend.FNVBloomFilterBackend(),
    eeg_bloom_template.backend.MMH3BloomFilterBackend()
]
DATASET_SAMPLE_FREQ_HZ = 200
DATA_CHANNEL_NAMES = ['T7','F8','Cz','P4']
FREQUENCIES = [
    pre_process.FrequencyBand(lower=8.0, upper=12.0, label='Alpha'),
    pre_process.FrequencyBand(lower=12.0, upper=35.0, label='Beta'),
    pre_process.FrequencyBand(lower=4.0, upper=8.0, label='Theta'),
    pre_process.FrequencyBand(lower=35.0, upper=None, label='Gamma'),
    pre_process.FrequencyBand(lower=None, upper=None, label='Raw'),
]
WINDOW_SIZE = 1200
WINDOW_OVERLAP = 0
RESCALE_LOWER = 0
RESCALE_UPPER = 255
SAMPLE_PERCENTAGE = 0.1
RANDOM_SEED = 100000000000
RANDOM_GENERATOR = random.Random(RANDOM_SEED)

# Utilities

In [71]:
class HashBackendWrapper(BaseBloomFilterHashBackend):
    def __init__(self, 
                 real_backend: BaseBloomFilterHashBackend, 
                 label: str):
        self.real_backend = real_backend
        self.label = label
        super().__init__()
    
    def run_hash_function(self, input_data: bytes) -> int:
        return self.real_backend.run_hash_function(input_data)


class TestResultType(enum.Enum):
    TRUE_POSITIVE = enum.auto()
    FALSE_POSITIVE = enum.auto()
    FALSE_NEGATIVE = enum.auto()
    TRUE_NEGATIVE = enum.auto()


@dataclasses.dataclass
class TestResultsSummary:
    true_positives: int = 0
    false_positives: int = 0
    false_negatives: int = 0
    true_negatives: int = 0
    
    @classmethod
    def merge_summaries(cls, 
                        summary_a: 'TestResultsSummary', 
                        summary_b: 'TestResultsSummary') -> 'TestResultsSummary':
        return TestResultsSummary(
            true_positives=summary_a.true_positives + summary_b.true_positives,
            false_positives=summary_a.false_positives + summary_b.false_negatives,
            false_negatives=summary_a.false_negatives + summary_b.false_negatives,
            true_negatives=summary_a.true_negatives + summary_b.true_negatives
        )
    
    def increment_count(self, result_type: TestResultType):
        if result_type == TestResultType.TRUE_POSITIVE:
            self.true_positives += 1
        elif result_type == TestResultType.FALSE_POSITIVE:
            self.false_positives += 1
        elif result_type == TestResultType.FALSE_NEGATIVE:
            self.false_negatives += 1
        else:
            self.true_negatives += 1
    
    @property
    def accuracy(self) -> float:
        hits = self.true_positives + self.true_negatives
        total = (
            self.true_positives + self.true_negatives + 
            self.false_positives + self.false_negatives
        )
        return hits / total

    @property
    def false_accept_rate(self) -> float:
        return self.false_positives / (self.false_positives + self.true_negatives)
    
    @property
    def false_reject_rate(self) -> float:
        return self.false_negatives / (self.false_negatives + self.true_positives)
    
    @property
    def half_total_error_rate(self) -> float:
        return (self.false_accept_rate + self.false_reject_rate) / 2


@dataclasses.dataclass
class TemplateTest:
    expected_result: bool
    if_expected: TestResultType
    if_unexpected: TestResultType
    test_data: typing.List[np.ndarray]
    
    def run_test(self, test_template: eeg_bloom_template.EEGTemplate, test_threshold: float):
        comparison_result = test_template.compare(self.test_data)
        is_match = comparison_result.hit_ratio >= test_threshold
        if is_match != self.expected_result:
            return self.if_unexpected
        return self.if_expected
    

@dataclasses.dataclass
class TestTemplateData:
    template: eeg_bloom_template.EEGTemplate
    false_positive_rate: float
    hash_backend: eeg_bloom_template.backend.BaseBloomFilterHashBackend
    original_subject: str
    
    
@dataclasses.dataclass
class TemplateTestSet:
    threshold: float
    template_data: TestTemplateData
    positive_cases: typing.List[TemplateTest]
    negative_cases: typing.List[TemplateTest]

# Configuration

In [72]:
downloader = data.AuditoryDataDownloader()
reader = data.AuditoryDataReader()
converter = conversion.MNEDataFrameConverter(
    channels=DATA_CHANNEL_NAMES, 
    sample_frequency=DATASET_SAMPLE_FREQ_HZ
)

# Data Processing Setup

## Sample Data Processor

### Pre-Processing Steps

In [73]:
pre_process_steps = pre_process.PreProcessingPipeline([
    pre_process.EEGBandpassFilterStep(
        FREQUENCIES,
        converter
    ),
    pre_process.DataWindowStep(WINDOW_SIZE, WINDOW_OVERLAP)
])

### Feature Extraction Steps

In [74]:
feature_extraction_steps = features.FeatureExtractPipeline([
    features.StatisticalFeatureExtractor([
        features.StatisticalFeature.MIN,
        features.StatisticalFeature.MAX,
        features.StatisticalFeature.MEAN,
        features.StatisticalFeature.ZERO_CROSSING_RATE
    ])
])

### Normalization Steps

In [75]:
normalization_steps = normalization.NormalizationPipeline([
    normalization.RescaleNormalizationStep(RESCALE_LOWER, RESCALE_UPPER),
    normalization.HistogramEqualizationStep(RESCALE_LOWER, RESCALE_UPPER)
])

### Data Processor Setup

In [76]:
data_processor = processor.DataProcessor(
    pre_process=pre_process_steps,
    feature_extraction=feature_extraction_steps,
    normalization=normalization_steps
)

# Subject Data

In [77]:
data_path = downloader.retrieve()
subject_data_map = reader.format_data(data_path)

## Token Setup

In [78]:
subject_tokens_map = {subject: secrets.randbits(32) for subject in subject_data_map}

# Processing

## Hash Backend Construction

In [79]:
def generate_hash_backends(token: int) -> typing.List[HashBackendWrapper]:
    return [
        HashBackendWrapper(
            eeg_bloom_template.backend.FNVBloomFilterBackend(),
            'FNV Backend'
        ),
        HashBackendWrapper(
            eeg_bloom_template.backend.MMH3BloomFilterBackend(),
            'MMH3 Backend (No Seed)'
        ),
        HashBackendWrapper(
            eeg_bloom_template.backend.MMH3BloomFilterBackend(
                seed=token
            ),
            'MMH3 Backend (Seed)'
        )
    ]

## Template Setup

### Template Generation

In [80]:
def iter_test_templates(subject_template_data: typing.Dict[str, typing.List[np.ndarray]], 
                        subject_tokens: typing.Dict[str, int]) -> typing.Iterator[TestTemplateData]:
    for subject in subject_template_data:
        subject_data = subject_template_data[subject]
        token = subject_tokens[subject]
        for row_wise_setting in (True, False):
            for backend_option in generate_hash_backends(token):
                for ratio in SEGMENTATION_RATIOS:
                    for rate in FALSE_POSITIVE_RATES:
                        template = eeg_bloom_template.EEGTemplate.make_template(
                            feature_data=subject_data,
                            hash_backend=backend_option,
                            segment_ratio=ratio,
                            false_positive_ratio=rate,
                            row_wise=row_wise_setting
                        )
                        yield TestTemplateData(
                            template=template, 
                            false_positive_rate=rate, 
                            hash_backend=backend_option,
                            original_subject=subject
                        )

## Sample Data Setup

In [81]:
processed_data_map = {
    subject: data_processor.process(subject_data_map[subject])
    for subject in subject_data_map
}

Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=40114
    Range : 0 ... 40113 =      0.000 ...   200.565 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ..

# Test Set Assembly

## Gathering Test Sets

In [82]:
def get_experiment_samples(data_map: typing.Dict[str, typing.List[np.ndarray]], 
                           target: str) -> typing.Tuple[typing.List[np.ndarray], typing.List[np.ndarray]]:
    positive_samples = list(
        iter_ratio_slices(
            data_map[target], 
            SAMPLE_PERCENTAGE
        )
    )
    negative_samples = []
    subjects_available = list(data_map.keys())
    subjects_available.remove(target)
    RANDOM_GENERATOR.shuffle(subjects_available)
    while subjects_available and len(negative_samples) < len(positive_samples):
        example_subject = subjects_available.pop()
        negative_samples.extend(
            list(
                iter_ratio_slices(
                    data_map[example_subject], 
                    SAMPLE_PERCENTAGE
                )
            )
        )
    return positive_samples, negative_samples


def iter_template_test_sets(data_map: typing.Dict[str, typing.List[np.ndarray]],
                            subject_tokens: typing.Dict[str, int]) -> typing.Iterator[TemplateTestSet]:
    for template_data in iter_test_templates(data_map, subject_tokens):
        for auth_threshold in AUTHENTICATION_THRESHOLDS:
            positive_samples, negative_samples = get_experiment_samples(
                data_map, 
                template_data.original_subject
            )
            yield TemplateTestSet(
                threshold=auth_threshold,
                template_data=template_data,
                positive_cases=[
                    TemplateTest(
                        expected_result=True, 
                        test_data=sample,
                        if_expected=TestResultType.TRUE_POSITIVE,
                        if_unexpected=TestResultType.FALSE_NEGATIVE
                    )
                    for sample in positive_samples
                ],
                negative_cases=[
                    TemplateTest(
                        expected_result=False, 
                        test_data=sample,
                        if_expected=TestResultType.TRUE_NEGATIVE,
                        if_unexpected=TestResultType.FALSE_POSITIVE
                    )
                    for sample in negative_samples
                ]
            )

# Execute Tests

In [83]:
def run_template_test_set(test_set: TemplateTestSet) -> TestResultsSummary:
    summary = TestResultsSummary()
    for test_item in test_set.positive_cases:
        result_type = test_item.run_test(
            test_set.template_data.template,
            test_set.threshold
        )
        summary.increment_count(result_type)
    for test_item in test_set.negative_cases:
        result_type = test_item.run_test(
            test_set.template_data.template,
            test_set.threshold
        )
        summary.increment_count(result_type)
    return summary

In [84]:
results_data: typing.Dict[typing.Tuple[float, float, str], TestResultsSummary] = {}
for template_test_set in iter_template_test_sets(processed_data_map, subject_tokens_map):
    false_positive_rate = template_test_set.template_data.false_positive_rate
    threshold = template_test_set.threshold
    hash_backend_label = template_test_set.template_data.hash_backend.label
    row_wise = template_test_set.template_data.template.row_wise
    result_key = (
        false_positive_rate,
        threshold,
        hash_backend_label,
        row_wise
    )
    if result_key not in results_data:
        results_data[result_key] = TestResultsSummary()
    results_data[result_key] = TestResultsSummary.merge_summaries(
        results_data[result_key],
        run_template_test_set(template_test_set)
    )

In [85]:
results_data_rows = []
for data_key, results_summary in results_data.items():
    fpr, threshold, backend_label, row_wise = data_key
    row_wise_label = 'Rows' if row_wise else 'Columns'
    results_data_rows.append([
        fpr, threshold, row_wise_label,
        backend_label, results_summary.false_accept_rate, 
        results_summary.false_reject_rate, results_summary.half_total_error_rate, 
        results_summary.accuracy
    ])
results_dataframe = pd.DataFrame(
    results_data_rows, 
    columns=['FPR', 'Threshold', 'Processing', 'Backend', 'FAR', 'FRR', 'HTER', 'Accuracy']
)
results_dataframe.sort_values(by=['Accuracy'], ascending=False).head(10)

Unnamed: 0,FPR,Threshold,Processing,Backend,FAR,FRR,HTER,Accuracy
290,0.5,0.1,Rows,MMH3 Backend (Seed),0.231884,0.00796,0.119922,0.984608
190,0.5,0.1,Rows,MMH3 Backend (No Seed),0.339623,0.008955,0.174289,0.98255
90,0.5,0.1,Rows,FNV Backend,0.371429,0.012935,0.192182,0.975
280,0.45,0.1,Rows,MMH3 Backend (Seed),0.318681,0.014428,0.166555,0.972394
60,0.35,0.1,Rows,FNV Backend,0.232558,0.014925,0.123742,0.97195
80,0.45,0.1,Rows,FNV Backend,0.241935,0.014925,0.12843,0.971884
180,0.45,0.1,Rows,MMH3 Backend (No Seed),0.402439,0.016418,0.209428,0.968451
260,0.35,0.1,Rows,MMH3 Backend (Seed),0.248227,0.017413,0.13282,0.967457
270,0.4,0.1,Rows,MMH3 Backend (Seed),0.318584,0.01791,0.168247,0.966086
70,0.4,0.1,Rows,FNV Backend,0.277419,0.021393,0.149406,0.960277


In [87]:
results_dataframe.sort_values(by=['HTER'], ascending=True).head(10)

Unnamed: 0,FPR,Threshold,Processing,Backend,FAR,FRR,HTER,Accuracy
290,0.5,0.1,Rows,MMH3 Backend (Seed),0.231884,0.00796,0.119922,0.984608
60,0.35,0.1,Rows,FNV Backend,0.232558,0.014925,0.123742,0.97195
80,0.45,0.1,Rows,FNV Backend,0.241935,0.014925,0.12843,0.971884
260,0.35,0.1,Rows,MMH3 Backend (Seed),0.248227,0.017413,0.13282,0.967457
191,0.5,0.2,Rows,MMH3 Backend (No Seed),0.201483,0.081095,0.141289,0.884356
150,0.3,0.1,Rows,MMH3 Backend (No Seed),0.26506,0.021891,0.143475,0.959559
70,0.4,0.1,Rows,FNV Backend,0.277419,0.021393,0.149406,0.960277
50,0.3,0.1,Rows,FNV Backend,0.281106,0.030348,0.155727,0.945218
170,0.4,0.1,Rows,MMH3 Backend (No Seed),0.291391,0.021891,0.156641,0.959278
292,0.5,0.3,Rows,MMH3 Backend (Seed),0.172226,0.141294,0.15676,0.844766


In [88]:
print(f'Total number of test results: {len(results_data)}')

Total number of test results: 600
