# Setup

In [91]:
import numpy as np
import dataclasses
import typing
import math
import random
import fuzzy_hash_lib
import feature_encoding.threshold

from eeg_auth_models_framework import data, pre_process, features, processor, normalization
from eeg_auth_models_framework.utils import conversion

# Constants

In [92]:
AUTHENTICATION_THRESHOLDS = [10, 20, 30, 40, 50, 60, 70, 80, 90]
DATASET_SAMPLE_FREQ_HZ = 200
DATA_CHANNEL_NAMES = ['T7','F8','Cz','P4']
FREQUENCIES = [
    pre_process.FrequencyBand(lower=8.0, upper=12.0, label='Alpha'),
    pre_process.FrequencyBand(lower=12.0, upper=35.0, label='Beta'),
    pre_process.FrequencyBand(lower=4.0, upper=8.0, label='Theta'),
    pre_process.FrequencyBand(lower=35.0, upper=None, label='Gamma'),
    pre_process.FrequencyBand(lower=None, upper=None, label='Raw'),
]
WINDOW_SIZE = 1200
WINDOW_OVERLAP = 0
BINARY_THRESHOLD = 127
RESCALE_LOWER = 0
RESCALE_UPPER = 255

# Utilities

In [93]:
@dataclasses.dataclass
class HashTest:
    expected_result: bool
    threshold: int
    hashes: typing.Tuple[fuzzy_hash_lib.FuzzyHash, fuzzy_hash_lib.FuzzyHash]
    
    def run_test(self):
        similarity = fuzzy_hash_lib.FuzzyHash.compare(self.hashes[0], self.hashes[1])
        is_match = similarity >= self.threshold
        if is_match != self.expected_result:
            return False
        return True
    
@dataclasses.dataclass
class ThresholdTestSet:
    threshold: int
    positive_cases: typing.List[fuzzy_hash_lib.FuzzyHash]
    negative_cases: typing.List[fuzzy_hash_lib.FuzzyHash]
    
@dataclasses.dataclass
class SubjectTestSet:
    subject_id: str
    threshold_tests: typing.List[ThresholdTestSet]

# Configuration

In [94]:
downloader = data.AuditoryDataDownloader()
reader = data.AuditoryDataReader()
converter = conversion.MNEDataFrameConverter(
    channels=DATA_CHANNEL_NAMES, 
    sample_frequency=DATASET_SAMPLE_FREQ_HZ
)

# Data Processing Setup

## Pre-Processing Steps

In [95]:
pre_process_steps = pre_process.PreProcessingPipeline([
    pre_process.EEGBandpassFilterStep(
        FREQUENCIES,
        converter
    ),
    pre_process.DataWindowStep(WINDOW_SIZE, WINDOW_OVERLAP)
])

## Feature Extraction Steps

In [96]:
feature_extraction_steps = features.FeatureExtractPipeline([
    features.StatisticalFeatureExtractor([
        features.StatisticalFeature.MIN,
        features.StatisticalFeature.MAX,
        features.StatisticalFeature.MEAN,
        features.StatisticalFeature.ZERO_CROSSING_RATE
    ])
])

## Data Processor

In [97]:
data_processor = processor.DataProcessor(
    pre_process=pre_process_steps,
    feature_extraction=feature_extraction_steps
)

# Subject Data

In [98]:
data_path = downloader.retrieve()
subject_data_map = reader.format_data(data_path)
subject_data_map

{'S01': [               T7           F8          Cz          P4
  13200  431.251617 -1189.493896  454.405334  345.306824
  13201  444.240265 -1194.415649  471.231140  363.666016
  13202  439.064270 -1188.719727  457.135437  325.425537
  13203  442.071136 -1193.476929  458.751099  340.463654
  13204  435.933960 -1197.149414  442.688232  333.630859
  ...           ...          ...         ...         ...
  37195  453.833130 -1196.178955  476.693268  332.022705
  37196  434.430511 -1195.045776  459.874939  336.181488
  37197  417.862762 -1210.839355  458.289215  340.183167
  37198  431.681702 -1197.667480  463.925232  339.917633
  37199  446.768463 -1189.394775  482.639709  349.117798
  
  [24000 rows x 4 columns]],
 'S02': [               T7           F8          Cz          P4
  11000  457.670258 -1004.481018  508.474915  187.082886
  11001  433.761078 -1013.741028  507.588562  186.177826
  11002  435.257050 -1009.795410  508.231842  188.552673
  11003  444.831177 -1006.882019  512.4541

# Processing

## Metadata

In [99]:
metadata_map = {subject: data_processor.extract_metadata(subject_data_map[subject]) for subject in subject_data_map}
metadata_map

Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=40114
    Range : 0 ... 40113 =      0.000 ...   200.565 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ..

{'S01': [FeatureMetaData(std_dev=2096944.1149435174, median_abs_dev=1726596.0982965212, mean=-10396321.719805066, median=-10808777.094044454, min=-14424378.979548188, max=-7238479.692338924),
  FeatureMetaData(std_dev=2079935.410999143, median_abs_dev=1793046.0548945442, mean=10269358.11383951, median=10598499.364171408, min=7172003.839554481, max=13952448.12252462),
  FeatureMetaData(std_dev=13171.176818779444, median_abs_dev=7919.963904167902, mean=278.1357476047748, median=5807.190716188714, min=-25585.73710349899, max=23066.983067869634),
  FeatureMetaData(std_dev=0.0031277765443060534, median_abs_dev=0.00208333333333334, mean=0.10562500000000001, median=0.10458333333333333, min=0.10083333333333333, max=0.11166666666666666),
  FeatureMetaData(std_dev=8897717.200830884, median_abs_dev=6188167.491192213, mean=-23040999.11890118, median=-22161310.768497907, min=-41616074.440649, max=-9863264.660145074),
  FeatureMetaData(std_dev=8449184.086131306, median_abs_dev=5391237.872631128, mea

## Subject Data

In [100]:
original_data_normalization = data_processor.normalization_steps
processed_data_map = {}
for subject in subject_data_map:
    data_processor.normalization_steps = normalization.NormalizationPipeline([
        normalization.RescaleNormalizationStep(RESCALE_LOWER, RESCALE_UPPER),
        normalization.HistogramEqualizationStep(metadata_map[subject], RESCALE_LOWER, RESCALE_UPPER)
    ])
    processed_data_map[subject] = data_processor.process(subject_data_map[subject])
data_processor.normalization_steps = original_data_normalization
processed_data_map

Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=40114
    Range : 0 ... 40113 =      0.000 ...   200.565 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ..

{'S01': [array([239.,  73., 197., 197., 239.,  73., 197., 197., 216.,  73., 197.,
         197., 242.,  35., 197., 197.,  19.,   6.,  12., 197., 239.,  44.,
         197., 197., 216.,  73., 197., 197., 216.,  73., 197., 197., 245.,
          35., 197., 197.,   0., 248., 251., 197., 239.,  73., 197., 197.,
         239.,  73., 197., 197., 216.,  79., 197., 197., 239.,  41., 197.,
         197.,  15.,   3.,   9., 197., 216.,  73., 197., 197., 216.,  73.,
         197., 197., 197.,  79., 197., 197., 239.,  41., 197., 197.,  28.,
          22.,  25., 197.]),
  array([232.,  54., 194., 194., 219.,  54., 194., 194., 219.,  79., 194.,
         194., 245.,  41., 194., 194.,  19.,   6.,  12., 194., 219.,  79.,
         194., 194., 232.,  54., 194., 194., 219.,  79., 194., 194., 245.,
          31., 194., 194.,   0., 248., 251., 194., 232.,  54., 194., 194.,
         232.,  79., 194., 194., 219.,  79., 194., 194., 235.,  41., 194.,
         194.,  15.,   3.,   9., 194., 219.,  79., 194., 194., 2

# Encoding

In [101]:
def hash_vectors(vectors_to_hash: typing.List[np.ndarray]) -> typing.List[fuzzy_hash_lib.FuzzyHash]:
    encoder = feature_encoding.threshold.ThresholdBinaryEncoder(BINARY_THRESHOLD)
    binary_vectors = [encoder.encode(v) for v in vectors_to_hash]
    return [fuzzy_hash_lib.FuzzyHash.from_text(bv) for bv in binary_vectors]

In [102]:
subject_hashes_map = {subject: hash_vectors(processed_data_map[subject]) for subject in processed_data_map}
subject_hashes_map

{'S01': [<fuzzy_hash_lib.fuzzy_hash.FuzzyHash at 0x17ef4f37a60>,
  <fuzzy_hash_lib.fuzzy_hash.FuzzyHash at 0x17ef4f35ff0>,
  <fuzzy_hash_lib.fuzzy_hash.FuzzyHash at 0x17ef4f36c50>,
  <fuzzy_hash_lib.fuzzy_hash.FuzzyHash at 0x17ef4f37dc0>,
  <fuzzy_hash_lib.fuzzy_hash.FuzzyHash at 0x17ef4f36350>,
  <fuzzy_hash_lib.fuzzy_hash.FuzzyHash at 0x17ef4f37a00>,
  <fuzzy_hash_lib.fuzzy_hash.FuzzyHash at 0x17ef4f36290>,
  <fuzzy_hash_lib.fuzzy_hash.FuzzyHash at 0x17ef4f361a0>,
  <fuzzy_hash_lib.fuzzy_hash.FuzzyHash at 0x17ef4f36800>,
  <fuzzy_hash_lib.fuzzy_hash.FuzzyHash at 0x17ef4f35a50>,
  <fuzzy_hash_lib.fuzzy_hash.FuzzyHash at 0x17ef4f36ad0>,
  <fuzzy_hash_lib.fuzzy_hash.FuzzyHash at 0x17ef4f35c30>,
  <fuzzy_hash_lib.fuzzy_hash.FuzzyHash at 0x17ef4f369b0>,
  <fuzzy_hash_lib.fuzzy_hash.FuzzyHash at 0x17ef4f37760>,
  <fuzzy_hash_lib.fuzzy_hash.FuzzyHash at 0x17ef4f358a0>,
  <fuzzy_hash_lib.fuzzy_hash.FuzzyHash at 0x17ef4f36320>,
  <fuzzy_hash_lib.fuzzy_hash.FuzzyHash at 0x17ef4f36bf0>,
  <fuzz

# Test Set Assembly

## Gathering Test Sets

In [103]:
def make_threshold_test_sets(hashes_map: typing.Dict[str, typing.List[fuzzy_hash_lib.FuzzyHash]], target_subject) -> typing.List[ThresholdTestSet]:
    results = []
    for threshold in AUTHENTICATION_THRESHOLDS:
        threshold_test_set = ThresholdTestSet(
            threshold=threshold, positive_cases=[], negative_cases=[]
        )
        for subject in hashes_map:
            if subject == target_subject:
                threshold_test_set.positive_cases.extend(hashes_map[subject])
            else:
                threshold_test_set.negative_cases.extend(hashes_map[subject])
        results.append(threshold_test_set)
    return results

In [104]:
subject_test_sets = [
    SubjectTestSet(subject, make_threshold_test_sets(subject_hashes_map, subject))
    for subject in subject_hashes_map
]
subject_test_sets

[SubjectTestSet(subject_id='S01', threshold_tests=[ThresholdTestSet(threshold=10, positive_cases=[<fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F37A60>, <fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F35FF0>, <fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F36C50>, <fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F37DC0>, <fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F36350>, <fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F37A00>, <fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F36290>, <fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F361A0>, <fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F36800>, <fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F35A50>, <fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F36AD0>, <fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F35C30>, <fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F369B0>, <fuzzy_hash_lib.fu

## Generating Hash Tests

In [105]:
def make_hash_tests(test_set: ThresholdTestSet) -> typing.List[HashTest]:
    tests = []
    number_of_positive = len(test_set.positive_cases)
    half_point = math.floor(number_of_positive / 2)
    initial_cases = test_set.positive_cases[:half_point]
    should_match_cases = test_set.positive_cases[half_point:]
    should_not_match_cases: typing.List[fuzzy_hash_lib.FuzzyHash] = random.sample(test_set.negative_cases, half_point)
    for sample, comparison in zip(initial_cases, should_match_cases):
        tests.append(HashTest(True, test_set.threshold, (sample, comparison)))
    for sample, comparison in zip(initial_cases, should_not_match_cases):
        tests.append(HashTest(False, test_set.threshold, (sample, comparison)))
    return tests

def make_threshold_tests_map(subject_tests: typing.List[SubjectTestSet]) -> typing.Dict[str, typing.List[HashTest]]:
    threshold_tests = {str(threshold): [] for threshold in AUTHENTICATION_THRESHOLDS}
    for subject_test in subject_tests:
        for threshold_test_data in subject_test.threshold_tests:
            threshold_tests[str(threshold_test_data.threshold)].extend(
                make_hash_tests(threshold_test_data)
            )
    return threshold_tests

In [106]:
threshold_tests_map = make_threshold_tests_map(subject_test_sets)
threshold_tests_map

{'10': [HashTest(expected_result=True, threshold=10, hashes=(<fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F37A60>, <fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F36AD0>)),
  HashTest(expected_result=True, threshold=10, hashes=(<fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F35FF0>, <fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F35C30>)),
  HashTest(expected_result=True, threshold=10, hashes=(<fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F36C50>, <fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F369B0>)),
  HashTest(expected_result=True, threshold=10, hashes=(<fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F37DC0>, <fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F37760>)),
  HashTest(expected_result=True, threshold=10, hashes=(<fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F36350>, <fuzzy_hash_lib.fuzzy_hash.FuzzyHash object at 0x0000017EF4F358A0>)),
  HashTest(expected_result=Tr

# Execute Tests

In [107]:
def run_threshold_tests(test_data: typing.Dict[str, typing.List[HashTest]]) -> typing.Dict[str, float]:
    results = {}
    for threshold in test_data:
        hits = 0
        for test in test_data[threshold]:
            is_hit = test.run_test()
            if is_hit:
                hits += 1
        results[threshold] = (hits / len(test_data[threshold])) * 100
    return results

In [108]:
test_results_map = run_threshold_tests(threshold_tests_map)
for threshold_type, accuracy in test_results_map.items():
    print(f'Threshold: {threshold_type}, Accuracy: {accuracy}%')

Threshold: 10, Accuracy: 50.728155339805824%
Threshold: 20, Accuracy: 51.213592233009706%
Threshold: 30, Accuracy: 61.89320388349514%
Threshold: 40, Accuracy: 86.40776699029125%
Threshold: 50, Accuracy: 85.67961165048543%
Threshold: 60, Accuracy: 85.67961165048543%
Threshold: 70, Accuracy: 87.13592233009709%
Threshold: 80, Accuracy: 85.67961165048543%
Threshold: 90, Accuracy: 85.92233009708737%
