# Setup

Initial module setup.

In [85]:
import numpy as np
import dataclasses
import typing
import math
import random
import auth_biohash.hash
import auth_biohash.random_token
import feature_encoding.threshold

from eeg_auth_models_framework import data, pre_process, features, processor
from eeg_auth_models_framework.utils import conversion

# Constants

In [86]:
AUTHENTICATION_THRESHOLDS = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
DATASET_SAMPLE_FREQ_HZ = 200
DATA_CHANNEL_NAMES = ['T7','F8','Cz','P4']
FREQUENCIES = [
    pre_process.FrequencyBand(lower=8.0, upper=12.0, label='Alpha'),
    pre_process.FrequencyBand(lower=12.0, upper=35.0, label='Beta'),
    pre_process.FrequencyBand(lower=4.0, upper=8.0, label='Theta'),
    pre_process.FrequencyBand(lower=35.0, upper=None, label='Gamma'),
    pre_process.FrequencyBand(lower=None, upper=None, label='Raw'),
]
WINDOW_SIZE = 1200
WINDOW_OVERLAP = 0
BINARY_THRESHOLD = 50

# Utilities

In [87]:
@dataclasses.dataclass
class HashTest:
    expected_result: bool
    hashes: typing.Tuple[auth_biohash.hash.BioHash, auth_biohash.hash.BioHash]
    
    def run_test(self):
        result = (self.hashes[0] == self.hashes[1])
        if result != self.expected_result:
            return False
        return True

@dataclasses.dataclass
class ThresholdTestSet:
    threshold: str
    positive_cases: typing.List[auth_biohash.hash.BioHash]
    negative_cases: typing.List[auth_biohash.hash.BioHash]

@dataclasses.dataclass
class SubjectTestSet:
    subject_id: str
    threshold_tests: typing.List[ThresholdTestSet]

# Configuration

In [88]:
downloader = data.AuditoryDataDownloader()
reader = data.AuditoryDataReader()
converter = conversion.MNEDataFrameConverter(
    channels=DATA_CHANNEL_NAMES, 
    sample_frequency=DATASET_SAMPLE_FREQ_HZ
)

# Data Processing Setup

## Pre-Processing Steps

In [89]:
pre_process_steps = pre_process.PreProcessingPipeline([
    pre_process.EEGBandpassFilterStep(
        FREQUENCIES,
        converter
    ),
    pre_process.DataWindowStep(WINDOW_SIZE, WINDOW_OVERLAP)
])

## Feature Extraction Steps

In [90]:
feature_extraction_steps = features.FeatureExtractPipeline([
    features.StatisticalFeatureExtractor([
        features.StatisticalFeature.MIN,
        features.StatisticalFeature.MAX,
        features.StatisticalFeature.MEAN,
        features.StatisticalFeature.ZERO_CROSSING_RATE
    ])
])

## Data Processor

In [91]:
data_processor = processor.DataProcessor(
    pre_process=pre_process_steps,
    feature_extraction=feature_extraction_steps
)

# Subject Data

In [92]:
data_path = downloader.retrieve()
subject_data_map = reader.format_data(data_path)
subject_data_map

{'S01': [               T7           F8          Cz          P4
  13200  431.251617 -1189.493896  454.405334  345.306824
  13201  444.240265 -1194.415649  471.231140  363.666016
  13202  439.064270 -1188.719727  457.135437  325.425537
  13203  442.071136 -1193.476929  458.751099  340.463654
  13204  435.933960 -1197.149414  442.688232  333.630859
  ...           ...          ...         ...         ...
  37195  453.833130 -1196.178955  476.693268  332.022705
  37196  434.430511 -1195.045776  459.874939  336.181488
  37197  417.862762 -1210.839355  458.289215  340.183167
  37198  431.681702 -1197.667480  463.925232  339.917633
  37199  446.768463 -1189.394775  482.639709  349.117798
  
  [24000 rows x 4 columns]],
 'S02': [               T7           F8          Cz          P4
  11000  457.670258 -1004.481018  508.474915  187.082886
  11001  433.761078 -1013.741028  507.588562  186.177826
  11002  435.257050 -1009.795410  508.231842  188.552673
  11003  444.831177 -1006.882019  512.4541

## Token Setup

In [93]:
subject_tokens_map = {subject: auth_biohash.random_token.generate_token() for subject in subject_data_map}
subject_tokens_map

{'S01': '83df8244561f2436a13d260c291d25fcd38244d834c898a539bf5621d0933827c9c8531dcd60f9c4dae30e0f30d46a66e56009a5de2aaa60d1bc1ce323e98791905819a190945c9605d4aa2c6315a1113ab4cb427dd5a4c4f37fc52a9402be31fc9505f4c55f1a6070798e7fedb923b52ca776b5f8b8d162786c67b625639d2368eb13a480d3314fbafe3f6afd3dd9e807242cde340fa3929d4b77468119f5d39982a9b2662bc3405bf30363bb9fffc3d67a442b97ed7fb3df88438379e65f6a3db9c5f99779dc4fdc9f0a6b21131ed14520619740268adc172c6c2d3a40df132b9c16749d4bfe462eaf0b918d2b6286f6b5b55f5d23e4be516babafe2087ea2',
 'S02': '9f4331048372b3c600fae9689ad1c6901643636d6c99625c21b2a904d0872c636a9eb39109066fdf5bc7fa87b185985521aca0f84bdf02fb0d0a56d6466db81d6186e0a2610633b971e9f6fe3df17fe3b4c070173719d3d2a2ee8e3aa6e45c4b189dfd1848511762e28e1b078726e17befbc55668e09eb6294c3e90ad4556d0067e81b65551c3775cb381a4897a9a800a16e50489410acf7cc5fe12e80aba112220d771fb9c2e1dc11834e54e9cc6c127c2f511b854ba16faa16db7bac84ae627cc992c7a521b93720575fc0dc70655369570e80567578781582263c0ccf8b0d58735b40263d72bdf50

## Processing

In [94]:
processed_data_map = {subject: data_processor.process(subject_data_map[subject]) for subject in subject_data_map}
processed_data_map

Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=40114
    Range : 0 ... 40113 =      0.000 ...   200.565 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ..

{'S01': [array([-1.38215250e+07,  1.39524481e+07,  2.30669831e+04,  1.04166667e-01,
         -1.33188938e+07,  1.23118873e+07,  1.07595349e+04,  2.13333333e-01,
         -8.50688623e+06,  7.58143167e+06,  5.38479622e+03,  6.33333333e-02,
         -3.11927915e+07,  3.15010114e+07,  3.08707558e+03,  5.43333333e-01,
          3.86584106e+08,  4.81055847e+08,  4.40934159e+08,  0.00000000e+00,
         -1.63194953e+07,  1.54157468e+07, -1.19110791e+04,  1.05000000e-01,
         -1.22098452e+07,  1.36056335e+07,  9.85539811e+02,  2.15833333e-01,
         -1.09752728e+07,  9.74591110e+06, -1.56998768e+04,  6.33333333e-02,
         -3.36370308e+07,  2.89119063e+07, -3.10105768e+03,  5.55833333e-01,
         -1.23803223e+09, -1.14405408e+09, -1.19490520e+09,  0.00000000e+00,
         -1.44408963e+07,  1.33289983e+07,  2.09954706e+04,  1.02500000e-01,
         -1.29073672e+07,  9.43481925e+06, -5.55832882e+03,  2.05000000e-01,
         -6.66783987e+06,  6.50344327e+06, -5.37477503e+03,  6.166666

# Hashing

## Token Normalization

In [95]:
def normalize_vectors(vectors_to_normalize: typing.List[np.ndarray], token: str) -> typing.List[np.ndarray]:
    matrix_generator = auth_biohash.random_token.MatrixGenerator(token)
    normalization = auth_biohash.hash.TokenMatrixNormalization(matrix_generator)
    return [normalization.normalize(v) for v in vectors_to_normalize]

In [96]:
normalized_data_map = {subject: normalize_vectors(processed_data_map[subject], subject_tokens_map[subject]) for subject in processed_data_map}
normalized_data_map

{'S01': [array([-3.64967812e+07, -3.44723721e+08, -1.21342992e+08,  1.26701345e+08,
         -2.34380432e+08, -4.81277219e+08,  6.74642235e+08, -8.20720586e+07,
          3.68080978e+08,  9.65435584e+07, -3.08941602e+08,  4.80956223e+08,
         -1.93238606e+08,  3.51142717e+08,  1.26419371e+08, -3.35559510e+07,
          4.47855074e+08, -2.90719789e+08,  1.29059110e+08, -1.11074010e+07,
         -9.50848890e+07, -5.87412616e+08,  1.74695831e+08, -9.58835291e+07,
          3.25013041e+08, -1.68329731e+08, -1.23642929e+06,  1.42645049e+07,
         -2.26708973e+08,  7.10460471e+06,  1.11504615e+08, -2.15915180e+08,
         -4.00338528e+08,  3.22027765e+08,  1.36812886e+07, -9.16808926e+07,
          9.63989689e+07,  4.41906701e+08,  4.37274591e+08, -1.57424680e+08,
          2.42324526e+08, -1.15720262e+08, -9.42186795e+07,  1.15376606e+07,
         -3.11236229e+08, -1.72042708e+08, -2.77215405e+08,  1.62523222e+07,
         -4.06487897e+08,  1.36898164e+08,  2.32487778e+08,  1.902455

## Encoding

In [97]:
def hash_vectors(vectors_to_hash: typing.List[np.ndarray], threshold: float) -> typing.List[auth_biohash.hash.BioHash]:
    encoder = feature_encoding.threshold.ThresholdBinaryEncoder(BINARY_THRESHOLD)
    return [auth_biohash.hash.BioHash.generate_hash(v, threshold, encoder) for v in vectors_to_hash]


def make_map_of_threshold_hashes(vectors_to_hash: typing.List[np.ndarray]) -> typing.Dict[str, typing.List[auth_biohash.hash.BioHash]]:
    result = {}
    for threshold in AUTHENTICATION_THRESHOLDS:
        result[str(threshold)] = hash_vectors(vectors_to_hash, threshold)
    return result

In [98]:
subject_hashes_map = {subject: make_map_of_threshold_hashes(normalized_data_map[subject]) for subject in normalized_data_map}
subject_hashes_map

{'S01': {'0.1': [<auth_biohash.hash.BioHash at 0x1e7bde2de10>,
   <auth_biohash.hash.BioHash at 0x1e7bde2d0f0>,
   <auth_biohash.hash.BioHash at 0x1e7bde2c130>,
   <auth_biohash.hash.BioHash at 0x1e7bde2ea10>,
   <auth_biohash.hash.BioHash at 0x1e7bde2eaa0>,
   <auth_biohash.hash.BioHash at 0x1e7bde2f040>,
   <auth_biohash.hash.BioHash at 0x1e7bde2d420>,
   <auth_biohash.hash.BioHash at 0x1e7bde2d510>,
   <auth_biohash.hash.BioHash at 0x1e7bde2d6c0>,
   <auth_biohash.hash.BioHash at 0x1e7bde2dcf0>,
   <auth_biohash.hash.BioHash at 0x1e7bde2dd50>,
   <auth_biohash.hash.BioHash at 0x1e7bde2dd80>,
   <auth_biohash.hash.BioHash at 0x1e7bde2e4d0>,
   <auth_biohash.hash.BioHash at 0x1e7bde2ee30>,
   <auth_biohash.hash.BioHash at 0x1e7bde2f760>,
   <auth_biohash.hash.BioHash at 0x1e7bde2ded0>,
   <auth_biohash.hash.BioHash at 0x1e7bde2e110>,
   <auth_biohash.hash.BioHash at 0x1e7bde2d5a0>,
   <auth_biohash.hash.BioHash at 0x1e7bde2c4f0>,
   <auth_biohash.hash.BioHash at 0x1e7bde2d270>],
  '0.

# Test Set Assembly

## Gathering Test Sets

In [99]:
def make_threshold_test_sets(hashes_map: typing.Dict[str, typing.Dict[str, typing.List[auth_biohash.hash.BioHash]]], 
                             target_subject: str) -> typing.List[ThresholdTestSet]:
    threshold_test_sets: typing.Dict[str, ThresholdTestSet] = {
        str(threshold): ThresholdTestSet(threshold=threshold, positive_cases=[], negative_cases=[]) 
        for threshold in AUTHENTICATION_THRESHOLDS
    }
    for subject in hashes_map:
        for threshold in hashes_map[subject]:
            test_set = threshold_test_sets[threshold]
            if subject == target_subject:
                test_set.positive_cases.extend(hashes_map[subject][threshold])
            else:
                test_set.negative_cases.extend(hashes_map[subject][threshold])
    return list(threshold_test_sets.values())

In [100]:
subject_test_sets = [
    SubjectTestSet(subject, make_threshold_test_sets(subject_hashes_map, subject)) 
    for subject in subject_hashes_map
]
subject_test_sets

[SubjectTestSet(subject_id='S01', threshold_tests=[ThresholdTestSet(threshold=0.1, positive_cases=[<auth_biohash.hash.BioHash object at 0x000001E7BDE2DE10>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2D0F0>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2C130>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2EA10>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2EAA0>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2F040>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2D420>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2D510>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2D6C0>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2DCF0>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2DD50>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2DD80>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2E4D0>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2EE30>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2F760>, <auth_biohash.hash.BioHash obje

## Generating Hash Tests

In [101]:
def make_hash_tests(test_set: ThresholdTestSet) -> typing.List[HashTest]:
    tests = []
    number_of_positive = len(test_set.positive_cases)
    half_point = math.floor(number_of_positive / 2)
    initial_cases = test_set.positive_cases[:half_point]
    should_match_cases = test_set.positive_cases[half_point:]
    should_not_match_cases: typing.List[auth_biohash.hash.BioHash] = random.sample(test_set.negative_cases, half_point)
    for sample, comparison in zip(initial_cases, should_match_cases):
        tests.append(HashTest(True, (sample, comparison)))
    for sample, comparison in zip(initial_cases, should_not_match_cases):
        tests.append(HashTest(False, (sample, comparison)))
    return tests

def make_threshold_tests_map(subject_tests: typing.List[SubjectTestSet]) -> typing.Dict[str, typing.List[HashTest]]:
    threshold_tests = {str(threshold): [] for threshold in AUTHENTICATION_THRESHOLDS}
    for subject_test in subject_tests:
        for threshold_test_data in subject_test.threshold_tests:
            threshold_tests[str(threshold_test_data.threshold)].extend(
                make_hash_tests(threshold_test_data)
            )
    return threshold_tests

In [102]:
threshold_tests_map = make_threshold_tests_map(subject_test_sets)
threshold_tests_map

{'0.1': [HashTest(expected_result=True, hashes=(<auth_biohash.hash.BioHash object at 0x000001E7BDE2DE10>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2DD50>)),
  HashTest(expected_result=True, hashes=(<auth_biohash.hash.BioHash object at 0x000001E7BDE2D0F0>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2DD80>)),
  HashTest(expected_result=True, hashes=(<auth_biohash.hash.BioHash object at 0x000001E7BDE2C130>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2E4D0>)),
  HashTest(expected_result=True, hashes=(<auth_biohash.hash.BioHash object at 0x000001E7BDE2EA10>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2EE30>)),
  HashTest(expected_result=True, hashes=(<auth_biohash.hash.BioHash object at 0x000001E7BDE2EAA0>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2F760>)),
  HashTest(expected_result=True, hashes=(<auth_biohash.hash.BioHash object at 0x000001E7BDE2F040>, <auth_biohash.hash.BioHash object at 0x000001E7BDE2DED0>)),
  HashTest(expected_result=True, hashes

# Execute Tests

In [103]:
def run_threshold_tests(test_data: typing.Dict[str, typing.List[HashTest]]) -> typing.Dict[str, float]:
    results = {}
    for threshold in test_data:
        hits = 0
        for test in test_data[threshold]:
            is_hit = test.run_test()
            if is_hit:
                hits += 1
        results[threshold] = (hits / len(test_data[threshold])) * 100
    return results

In [104]:
test_results_map = run_threshold_tests(threshold_tests_map)
for threshold_type, accuracy in test_results_map.items():
    print(f'Threshold: {threshold_type}, Accuracy: {accuracy}%')

Threshold: 0.1, Accuracy: 99.02912621359224%
Threshold: 0.2, Accuracy: 99.75728155339806%
Threshold: 0.3, Accuracy: 99.75728155339806%
Threshold: 0.4, Accuracy: 98.30097087378641%
Threshold: 0.5, Accuracy: 72.0873786407767%
Threshold: 0.6, Accuracy: 52.66990291262136%
Threshold: 0.7, Accuracy: 50.0%
Threshold: 0.8, Accuracy: 50.0%
Threshold: 0.9, Accuracy: 50.0%
