# Setup

Initial module setup.

In [63]:
import numpy as np
import dataclasses
import typing
import math
import random
import auth_biohash.hash
import auth_biohash.random_token
import feature_encoding.threshold

from eeg_auth_models_framework import data, pre_process, features, processor
from eeg_auth_models_framework.utils import conversion

# Constants

In [64]:
AUTHENTICATION_THRESHOLDS = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
DATASET_SAMPLE_FREQ_HZ = 200
DATA_CHANNEL_NAMES = ['T7','F8','Cz','P4']
FREQUENCIES = [
    pre_process.FrequencyBand(lower=8.0, upper=12.0, label='Alpha'),
    pre_process.FrequencyBand(lower=12.0, upper=35.0, label='Beta'),
    pre_process.FrequencyBand(lower=4.0, upper=8.0, label='Theta'),
    pre_process.FrequencyBand(lower=35.0, upper=None, label='Gamma'),
    pre_process.FrequencyBand(lower=None, upper=None, label='Raw'),
]
WINDOW_SIZE = 1200
WINDOW_OVERLAP = 0
BINARY_THRESHOLD = 50

# Utilities

In [65]:
@dataclasses.dataclass
class HashTest:
    expected_result: bool
    hashes: typing.Tuple[auth_biohash.hash.BioHash, auth_biohash.hash.BioHash]
    
    def run_test(self):
        result = (self.hashes[0] == self.hashes[1])
        if result != self.expected_result:
            return False
        return True

@dataclasses.dataclass
class ThresholdTestSet:
    threshold: str
    positive_cases: typing.List[auth_biohash.hash.BioHash]
    negative_cases: typing.List[auth_biohash.hash.BioHash]

@dataclasses.dataclass
class SubjectTestSet:
    subject_id: str
    threshold_tests: typing.List[ThresholdTestSet]

# Configuration

In [66]:
downloader = data.AuditoryDataDownloader()
reader = data.AuditoryDataReader()
converter = conversion.MNEDataFrameConverter(
    channels=DATA_CHANNEL_NAMES, 
    sample_frequency=DATASET_SAMPLE_FREQ_HZ
)

# Data Processing Setup

## Pre-Processing Steps

In [67]:
pre_process_steps = pre_process.PreProcessingPipeline([
    pre_process.EEGBandpassFilterStep(
        FREQUENCIES,
        converter
    ),
    pre_process.DataWindowStep(WINDOW_SIZE, WINDOW_OVERLAP)
])

## Feature Extraction Steps

In [68]:
feature_extraction_steps = features.FeatureExtractPipeline([
    features.StatisticalFeatureExtractor([
        features.StatisticalFeature.MIN,
        features.StatisticalFeature.MAX,
        features.StatisticalFeature.MEAN,
        features.StatisticalFeature.ZERO_CROSSING_RATE
    ])
])

## Data Processor

In [69]:
data_processor = processor.DataProcessor(
    pre_process=pre_process_steps,
    feature_extraction=feature_extraction_steps
)

# Subject Data

In [70]:
data_path = downloader.retrieve()
subject_data_map = reader.format_data(data_path)

## Token Setup

In [71]:
subject_tokens_map = {subject: auth_biohash.random_token.generate_token() for subject in subject_data_map}

## Processing

In [72]:
processed_data_map = {subject: data_processor.process(subject_data_map[subject]) for subject in subject_data_map}

Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=40114
    Range : 0 ... 40113 =      0.000 ...   200.565 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ..

# Hashing

## Token Normalization

In [73]:
def normalize_vectors(vectors_to_normalize: typing.List[np.ndarray], token: str) -> typing.List[np.ndarray]:
    matrix_generator = auth_biohash.random_token.MatrixGenerator(token)
    normalization = auth_biohash.hash.TokenMatrixNormalization(matrix_generator)
    return [normalization.normalize(v) for v in vectors_to_normalize]

In [74]:
normalized_data_map = {subject: normalize_vectors(processed_data_map[subject], subject_tokens_map[subject]) for subject in processed_data_map}

In [75]:
for subject, vectors in normalized_data_map.items():
    print(f'Subject {subject}: {len(vectors)} normalized feature vectors')

Subject S01: 20 normalized feature vectors
Subject S02: 20 normalized feature vectors
Subject S03: 20 normalized feature vectors
Subject S04: 20 normalized feature vectors
Subject S05: 33 normalized feature vectors
Subject S06: 20 normalized feature vectors
Subject S07: 20 normalized feature vectors
Subject S08: 20 normalized feature vectors
Subject S09: 20 normalized feature vectors
Subject S10: 20 normalized feature vectors
Subject S11: 20 normalized feature vectors
Subject S12: 20 normalized feature vectors
Subject S13: 20 normalized feature vectors
Subject S14: 20 normalized feature vectors
Subject S15: 20 normalized feature vectors
Subject S16: 20 normalized feature vectors
Subject S17: 20 normalized feature vectors
Subject S18: 20 normalized feature vectors
Subject S19: 20 normalized feature vectors
Subject S20: 20 normalized feature vectors


## Encoding

In [76]:
def hash_vectors(vectors_to_hash: typing.List[np.ndarray], threshold: float) -> typing.List[auth_biohash.hash.BioHash]:
    encoder = feature_encoding.threshold.ThresholdBinaryEncoder(BINARY_THRESHOLD)
    return [auth_biohash.hash.BioHash.generate_hash(v, threshold, encoder) for v in vectors_to_hash]


def make_map_of_threshold_hashes(vectors_to_hash: typing.List[np.ndarray]) -> typing.Dict[str, typing.List[auth_biohash.hash.BioHash]]:
    result = {}
    for threshold in AUTHENTICATION_THRESHOLDS:
        result[str(threshold)] = hash_vectors(vectors_to_hash, threshold)
    return result

In [77]:
subject_hashes_map = {subject: make_map_of_threshold_hashes(normalized_data_map[subject]) for subject in normalized_data_map}

In [78]:
for subject, hashes in subject_hashes_map.items():
    hash_counts = ', '.join([f'{hash_threshold} --> {len(hash_instances)}' for hash_threshold, hash_instances in hashes.items()])
    print(f'Subject {subject} BioHash counts (per threshold): {hash_counts}')

Subject S01 BioHash counts (per threshold): 0.1 --> 20, 0.2 --> 20, 0.3 --> 20, 0.4 --> 20, 0.5 --> 20, 0.6 --> 20, 0.7 --> 20, 0.8 --> 20, 0.9 --> 20
Subject S02 BioHash counts (per threshold): 0.1 --> 20, 0.2 --> 20, 0.3 --> 20, 0.4 --> 20, 0.5 --> 20, 0.6 --> 20, 0.7 --> 20, 0.8 --> 20, 0.9 --> 20
Subject S03 BioHash counts (per threshold): 0.1 --> 20, 0.2 --> 20, 0.3 --> 20, 0.4 --> 20, 0.5 --> 20, 0.6 --> 20, 0.7 --> 20, 0.8 --> 20, 0.9 --> 20
Subject S04 BioHash counts (per threshold): 0.1 --> 20, 0.2 --> 20, 0.3 --> 20, 0.4 --> 20, 0.5 --> 20, 0.6 --> 20, 0.7 --> 20, 0.8 --> 20, 0.9 --> 20
Subject S05 BioHash counts (per threshold): 0.1 --> 33, 0.2 --> 33, 0.3 --> 33, 0.4 --> 33, 0.5 --> 33, 0.6 --> 33, 0.7 --> 33, 0.8 --> 33, 0.9 --> 33
Subject S06 BioHash counts (per threshold): 0.1 --> 20, 0.2 --> 20, 0.3 --> 20, 0.4 --> 20, 0.5 --> 20, 0.6 --> 20, 0.7 --> 20, 0.8 --> 20, 0.9 --> 20
Subject S07 BioHash counts (per threshold): 0.1 --> 20, 0.2 --> 20, 0.3 --> 20, 0.4 --> 20, 0.

# Test Set Assembly

## Gathering Test Sets

In [79]:
def make_threshold_test_sets(hashes_map: typing.Dict[str, typing.Dict[str, typing.List[auth_biohash.hash.BioHash]]], 
                             target_subject: str) -> typing.List[ThresholdTestSet]:
    threshold_test_sets: typing.Dict[str, ThresholdTestSet] = {
        str(threshold): ThresholdTestSet(threshold=threshold, positive_cases=[], negative_cases=[]) 
        for threshold in AUTHENTICATION_THRESHOLDS
    }
    for subject in hashes_map:
        for threshold in hashes_map[subject]:
            test_set = threshold_test_sets[threshold]
            if subject == target_subject:
                test_set.positive_cases.extend(hashes_map[subject][threshold])
            else:
                test_set.negative_cases.extend(hashes_map[subject][threshold])
    return list(threshold_test_sets.values())

In [80]:
subject_test_sets = [
    SubjectTestSet(subject, make_threshold_test_sets(subject_hashes_map, subject)) 
    for subject in subject_hashes_map
]

## Generating Hash Tests

In [81]:
def make_hash_tests(test_set: ThresholdTestSet) -> typing.List[HashTest]:
    tests = []
    number_of_positive = len(test_set.positive_cases)
    half_point = math.floor(number_of_positive / 2)
    initial_cases = test_set.positive_cases[:half_point]
    should_match_cases = test_set.positive_cases[half_point:]
    should_not_match_cases: typing.List[auth_biohash.hash.BioHash] = random.sample(test_set.negative_cases, half_point)
    for sample, comparison in zip(initial_cases, should_match_cases):
        tests.append(HashTest(True, (sample, comparison)))
    for sample, comparison in zip(initial_cases, should_not_match_cases):
        tests.append(HashTest(False, (sample, comparison)))
    return tests

def make_threshold_tests_map(subject_tests: typing.List[SubjectTestSet]) -> typing.Dict[str, typing.List[HashTest]]:
    threshold_tests = {str(threshold): [] for threshold in AUTHENTICATION_THRESHOLDS}
    for subject_test in subject_tests:
        for threshold_test_data in subject_test.threshold_tests:
            threshold_tests[str(threshold_test_data.threshold)].extend(
                make_hash_tests(threshold_test_data)
            )
    return threshold_tests

In [82]:
threshold_tests_map = make_threshold_tests_map(subject_test_sets)

# Execute Tests

In [83]:
def run_threshold_tests(test_data: typing.Dict[str, typing.List[HashTest]]) -> typing.Dict[str, float]:
    results = {}
    for threshold in test_data:
        hits = 0
        for test in test_data[threshold]:
            is_hit = test.run_test()
            if is_hit:
                hits += 1
        results[threshold] = (hits / len(test_data[threshold])) * 100
    return results

In [84]:
test_results_map = run_threshold_tests(threshold_tests_map)
for threshold_type, accuracy in test_results_map.items():
    print(f'Threshold: {threshold_type}, Accuracy: {accuracy}%')

Threshold: 0.1, Accuracy: 99.51456310679612%
Threshold: 0.2, Accuracy: 99.75728155339806%
Threshold: 0.3, Accuracy: 99.75728155339806%
Threshold: 0.4, Accuracy: 97.81553398058253%
Threshold: 0.5, Accuracy: 70.14563106796116%
Threshold: 0.6, Accuracy: 51.45631067961165%
Threshold: 0.7, Accuracy: 50.0%
Threshold: 0.8, Accuracy: 50.0%
Threshold: 0.9, Accuracy: 50.0%
