# Setup

Initial module setup.

In [69]:
import numpy as np
import pandas as pd
import dataclasses
import enum
import typing
import random
import auth_biohash.bio_hash
import auth_biohash.random_token
import feature_encoding.base
import feature_encoding.threshold

from eeg_auth_models_framework import data, pre_process, features, processor
from eeg_auth_models_framework.utils import conversion

# Constants

In [70]:
AUTHENTICATION_THRESHOLDS = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
DATASET_SAMPLE_FREQ_HZ = 200
DATA_CHANNEL_NAMES = ['T7','F8','Cz','P4']
FREQUENCIES = [
    pre_process.FrequencyBand(lower=8.0, upper=12.0, label='Alpha'),
    pre_process.FrequencyBand(lower=12.0, upper=35.0, label='Beta'),
    pre_process.FrequencyBand(lower=4.0, upper=8.0, label='Theta'),
    pre_process.FrequencyBand(lower=35.0, upper=None, label='Gamma'),
    pre_process.FrequencyBand(lower=None, upper=None, label='Raw'),
]
WINDOW_SIZE = 1200
WINDOW_OVERLAP = 0
BINARY_THRESHOLD = 50
RANDOM_SEED = 100000000000
RANDOM_GENERATOR = random.Random(RANDOM_SEED)

# Utilities

In [71]:
class TestResultType(enum.Enum):
    TRUE_POSITIVE = enum.auto()
    FALSE_POSITIVE = enum.auto()
    FALSE_NEGATIVE = enum.auto()
    TRUE_NEGATIVE = enum.auto()


@dataclasses.dataclass
class TestResultsSummary:
    true_positives: int = 0
    false_positives: int = 0
    false_negatives: int = 0
    true_negatives: int = 0
    
    def increment_count(self, result_type: TestResultType):
        if result_type == TestResultType.TRUE_POSITIVE:
            self.true_positives += 1
        elif result_type == TestResultType.FALSE_POSITIVE:
            self.false_positives += 1
        elif result_type == TestResultType.FALSE_NEGATIVE:
            self.false_negatives += 1
        else:
            self.true_negatives += 1
    
    @property
    def accuracy(self) -> float:
        hits = self.true_positives + self.true_negatives
        total = (
            self.true_positives + self.true_negatives + 
            self.false_positives + self.false_negatives
        )
        return hits / total

    @property
    def false_accept_rate(self) -> float:
        return self.false_positives / (self.false_positives + self.true_negatives)
    
    @property
    def false_reject_rate(self):
        return self.false_negatives / (self.false_negatives + self.true_positives)


@dataclasses.dataclass
class HashTest:
    expected_result: bool
    threshold: float
    if_expected: TestResultType
    if_unexpected: TestResultType
    hashes: typing.Tuple[auth_biohash.bio_hash.BioHash, auth_biohash.bio_hash.BioHash]
    
    def run_test(self) -> TestResultType:
        result = auth_biohash.bio_hash.BioHash.compare(
            self.hashes[0],
            self.hashes[1]
        )
        is_match = result <= self.threshold
        if is_match != self.expected_result:
            return self.if_unexpected
        return self.if_expected


@dataclasses.dataclass
class ThresholdTestSet:
    threshold: str
    template_hash: auth_biohash.bio_hash.BioHash
    positive_cases: typing.List[auth_biohash.bio_hash.BioHash]
    negative_cases: typing.List[auth_biohash.bio_hash.BioHash]


@dataclasses.dataclass
class SubjectTestSet:
    subject_id: str
    threshold_tests: typing.List[ThresholdTestSet]

# Configuration

In [72]:
downloader = data.AuditoryDataDownloader()
reader = data.AuditoryDataReader()
converter = conversion.MNEDataFrameConverter(
    channels=DATA_CHANNEL_NAMES, 
    sample_frequency=DATASET_SAMPLE_FREQ_HZ
)

# Data Processing Setup

## Template Hash Processor

### Pre-Processing Steps

In [73]:
template_pre_process_steps = pre_process.PreProcessingPipeline([
    pre_process.EEGBandpassFilterStep(
        FREQUENCIES,
        converter
    )
])

### Feature Extraction Steps

In [74]:
template_feature_extraction_steps = features.FeatureExtractPipeline([
    features.StatisticalFeatureExtractor([
        features.StatisticalFeature.MIN,
        features.StatisticalFeature.MAX,
        features.StatisticalFeature.MEAN,
        features.StatisticalFeature.ZERO_CROSSING_RATE
    ])
])

### Processor

In [75]:
template_data_processor = processor.DataProcessor(
    pre_process=template_pre_process_steps,
    feature_extraction=template_feature_extraction_steps
)

## Sample Hash Processor

### Pre-Processing Steps

In [76]:
sample_pre_process_steps = pre_process.PreProcessingPipeline([
    pre_process.EEGBandpassFilterStep(
        FREQUENCIES,
        converter
    ),
    pre_process.DataWindowStep(WINDOW_SIZE, WINDOW_OVERLAP)
])

### Feature Extraction Steps

In [77]:
sample_feature_extraction_steps = features.FeatureExtractPipeline([
    features.StatisticalFeatureExtractor([
        features.StatisticalFeature.MIN,
        features.StatisticalFeature.MAX,
        features.StatisticalFeature.MEAN,
        features.StatisticalFeature.ZERO_CROSSING_RATE
    ])
])

### Processor

In [78]:
sample_data_processor = processor.DataProcessor(
    pre_process=sample_pre_process_steps,
    feature_extraction=sample_feature_extraction_steps
)

# Subject Data

In [79]:
data_path = downloader.retrieve()
subject_data_map = reader.format_data(data_path)

## Token Setup

In [80]:
subject_tokens_map = {subject: auth_biohash.random_token.generate_token() for subject in subject_data_map}

## Template Hash Setup

### Processing

In [81]:
processed_template_data_map = {subject: template_data_processor.process(subject_data_map[subject]) for subject in subject_data_map}

Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=40114
    Range : 0 ... 40113 =      0.000 ...   200.565 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ..

### Hashing

In [82]:
SubjectHashesMap = typing.Dict[str, typing.Dict[str, typing.List[auth_biohash.bio_hash.BioHash]]]
SubjectTemplateHashesMap = typing.Dict[str, typing.Dict[str, auth_biohash.bio_hash.BioHash]]
ThresholdHashesMap = typing.Dict[str, typing.List[auth_biohash.bio_hash.BioHash]]

In [83]:
def make_map_of_threshold_hashes(vectors_to_hash: typing.List[np.ndarray], 
                                 token: str, 
                                 encoder: feature_encoding.base.BinaryEncoder) -> ThresholdHashesMap:
    result = {}
    for threshold in AUTHENTICATION_THRESHOLDS:
        result[str(threshold)] = [
            auth_biohash.bio_hash.BioHash.generate_hash(vector, token, encoder)
            for vector in vectors_to_hash
        ]
    return result


def make_subject_hashes_map(template_data_map: typing.Dict[str, typing.List[np.ndarray]], 
                            tokens_map: typing.Dict[str, str],
                            encoder: feature_encoding.base.BinaryEncoder) -> SubjectHashesMap:
    result = {}
    for subject in template_data_map:
        token = tokens_map[subject]
        result[subject] = make_map_of_threshold_hashes(template_data_map[subject], token, encoder)
    return result


def normalize_template_hashes_map(template_hashes_map: SubjectHashesMap) -> SubjectTemplateHashesMap:
    result = {}
    for subject in template_hashes_map:
        result[subject] = {}
        for threshold in template_hashes_map[subject]:
            hashes_list = template_hashes_map[subject][threshold]
            if len(hashes_list) != 1:
                print(f'[warning] Multiple hashes for subject {subject}, should be only 1.')
                continue
            result[subject][threshold] = hashes_list[0]
    return result

In [84]:
subject_template_hashes_map = normalize_template_hashes_map(
    make_subject_hashes_map(
        processed_template_data_map, 
        subject_tokens_map, 
        feature_encoding.threshold.ThresholdBinaryEncoder(BINARY_THRESHOLD)
    )
)

## Sample Hash Setup

### Processing

In [85]:
processed_data_map = {subject: sample_data_processor.process(subject_data_map[subject]) for subject in subject_data_map}

Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=40114
    Range : 0 ... 40113 =      0.000 ...   200.565 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ..

### Hashing

#### Encoding

In [86]:
subject_hashes_map = {
    subject: make_map_of_threshold_hashes(
        processed_data_map[subject], 
        subject_tokens_map[subject], 
        feature_encoding.threshold.ThresholdBinaryEncoder(BINARY_THRESHOLD)
    ) 
    for subject in processed_data_map
}

# Test Set Assembly

## Gathering Test Sets

In [87]:
def make_threshold_test_sets(sample_hashes_map: typing.Dict[str, typing.Dict[str, typing.List[auth_biohash.bio_hash.BioHash]]],
                             template_hashes_map: typing.Dict[str, typing.Dict[str, auth_biohash.bio_hash.BioHash]],
                             target_subject: str) -> typing.List[ThresholdTestSet]:
    threshold_test_sets: typing.Dict[str, ThresholdTestSet] = {
        str(threshold): ThresholdTestSet(
            threshold=threshold, template_hash=template_hashes_map[target_subject][str(threshold)],
            positive_cases=[], negative_cases=[]
        )
        for threshold in AUTHENTICATION_THRESHOLDS
    }
    for subject in sample_hashes_map: 
        for threshold in sample_hashes_map[subject]:
            test_set = threshold_test_sets[threshold]
            if subject == target_subject:
                test_set.positive_cases.extend(sample_hashes_map[subject][threshold])
            else:
                test_set.negative_cases.extend(sample_hashes_map[subject][threshold])
    return list(threshold_test_sets.values())

In [88]:
subject_test_sets = [
    SubjectTestSet(
        subject, 
        make_threshold_test_sets(
            subject_hashes_map, subject_template_hashes_map, subject
        )
    ) 
    for subject in subject_hashes_map
]

## Generating Hash Tests

In [89]:
def make_hash_tests(test_set: ThresholdTestSet) -> typing.List[HashTest]:
    tests = []
    # Use the minimum to ensure that the same amount of tests are possible from both populations
    # (there are more than likely more negative cases than positive ones)
    sample_size = min(len(test_set.positive_cases), len(test_set.negative_cases))
    should_match_cases: typing.List[auth_biohash.bio_hash.BioHash] = RANDOM_GENERATOR.sample(test_set.positive_cases, sample_size)
    should_not_match_cases: typing.List[auth_biohash.bio_hash.BioHash] = RANDOM_GENERATOR.sample(test_set.negative_cases, sample_size)
    for case in should_match_cases:
        tests.append(
            HashTest(
                expected_result=True, 
                hashes=(test_set.template_hash, case),
                threshold=float(test_set.threshold),
                if_expected=TestResultType.TRUE_POSITIVE,
                if_unexpected=TestResultType.FALSE_NEGATIVE
            )
        )
    for case in should_not_match_cases:
        tests.append(
            HashTest(
                expected_result=False, 
                hashes=(test_set.template_hash, case),
                threshold=float(test_set.threshold),
                if_expected=TestResultType.TRUE_NEGATIVE,
                if_unexpected=TestResultType.FALSE_POSITIVE
            )
        )
    return tests

def make_threshold_tests_map(subject_tests: typing.List[SubjectTestSet]) -> typing.Dict[str, typing.List[HashTest]]:
    threshold_tests = {str(threshold): [] for threshold in AUTHENTICATION_THRESHOLDS}
    for subject_test in subject_tests:
        for threshold_test_data in subject_test.threshold_tests:
            threshold_tests[str(threshold_test_data.threshold)].extend(
                make_hash_tests(threshold_test_data)
            )
    return threshold_tests

In [90]:
threshold_tests_map = make_threshold_tests_map(subject_test_sets)

# Execute Tests

In [91]:
def run_threshold_tests(test_data: typing.Dict[str, typing.List[HashTest]]) -> typing.Dict[str, TestResultsSummary]:
    results = {}
    for threshold in test_data:
        summary = TestResultsSummary()
        for test in test_data[threshold]:
            result_type = test.run_test()
            summary.increment_count(result_type)
        results[threshold] = summary
    return results

In [92]:
test_results_map = run_threshold_tests(threshold_tests_map)
data_results = []
for threshold_type, result_summary in test_results_map.items():
    data_results.append([
        threshold_type, result_summary.false_accept_rate, 
        result_summary.false_reject_rate, result_summary.accuracy
    ])
test_results = pd.DataFrame(
    data_results, columns=['Threshold', 'FAR', 'FRR', 'Accuracy']
)
test_results

Unnamed: 0,Threshold,FAR,FRR,Accuracy
0,0.1,0.0,0.099274,0.950363
1,0.2,0.0,0.077482,0.961259
2,0.3,0.0,0.077482,0.961259
3,0.4,0.046005,0.0,0.976998
4,0.5,0.556901,0.0,0.72155
5,0.6,0.968523,0.0,0.515738
6,0.7,1.0,0.0,0.5
7,0.8,1.0,0.0,0.5
8,0.9,1.0,0.0,0.5
