# Setup

Initial module setup.

In [113]:
import numpy as np
import dataclasses
import typing
import auth_biohash.hash
import auth_biohash.random_token
import feature_encoding.threshold

from eeg_auth_models_framework import data, pre_process, features, processor
from eeg_auth_models_framework.utils import conversion

# Constants

In [114]:
AUTHENTICATION_THRESHOLDS = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
DATASET_SAMPLE_FREQ_HZ = 200
DATA_CHANNEL_NAMES = ['T7','F8','Cz','P4']
FREQUENCIES = [
    pre_process.FrequencyBand(lower=8.0, upper=12.0, label='Alpha'),
    pre_process.FrequencyBand(lower=12.0, upper=35.0, label='Beta'),
    pre_process.FrequencyBand(lower=4.0, upper=8.0, label='Theta'),
    pre_process.FrequencyBand(lower=35.0, upper=None, label='Gamma'),
    pre_process.FrequencyBand(lower=None, upper=None, label='Raw'),
]
WINDOW_SIZE = 1200
WINDOW_OVERLAP = 0
REDUCTION_WINDOW_SIZE = 5
BINARY_THRESHOLD = 0.5

# Utilities

In [ ]:
@dataclasses.dataclass
class ThresholdTestSet:
    threshold: str
    positive_cases: typing.List[auth_biohash.hash.BioHash]
    negative_cases: typing.List[auth_biohash.hash.BioHash]

@dataclasses.dataclass
class SubjectTestSet:
    subject_id: str
    threshold_tests: typing.List[ThresholdTestSet]

# Configuration

In [115]:
downloader = data.AuditoryDataDownloader()
reader = data.AuditoryDataReader()
converter = conversion.MNEDataFrameConverter(
    channels=DATA_CHANNEL_NAMES, 
    sample_frequency=DATASET_SAMPLE_FREQ_HZ
)

# Data Processing Setup

## Pre-Processing Steps

In [116]:
pre_process_steps = pre_process.PreProcessingPipeline([
    pre_process.EEGBandpassFilterStep(
        FREQUENCIES,
        converter
    ),
    pre_process.DataWindowStep(WINDOW_SIZE, WINDOW_OVERLAP)
])

## Feature Extraction Steps

In [117]:
feature_extraction_steps = features.FeatureExtractPipeline([
    features.StatisticalFeatureExtractor([
        features.StatisticalFeature.MIN,
        features.StatisticalFeature.MAX,
        features.StatisticalFeature.MEAN,
        features.StatisticalFeature.ZERO_CROSSING_RATE
    ])
])

## Data Processor

In [118]:
data_processor = processor.DataProcessor(
    pre_process=pre_process_steps,
    feature_extraction=feature_extraction_steps
)

# Subject Data

In [119]:
data_path = downloader.retrieve()
subject_data_map = reader.format_data(data_path)

## Token Setup

In [120]:
subject_tokens_map = {subject: auth_biohash.random_token.generate_token() for subject in subject_data_map}

## Processing

In [121]:
processed_data_map = {subject: data_processor.process(subject_data_map[subject]) for subject in subject_data_map}

Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=40114
    Range : 0 ... 40113 =      0.000 ...   200.565 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ...   119.995 secs
Ready.
Creating RawArray with float64 data, n_channels=4, n_times=24000
    Range : 0 ... 23999 =      0.000 ..

# Hashing

## Token Normalization

In [122]:
def normalize_vectors(vectors: typing.List[np.ndarray], token: str) -> typing.List[np.ndarray]:
    matrix_generator = auth_biohash.random_token.MatrixGenerator(token)
    normalization = auth_biohash.hash.TokenMatrixNormalization(matrix_generator)
    return [normalization.normalize(v) for v in vectors]

In [123]:
normalized_data_map = {subject: normalize_vectors(processed_data_map[subject], subject_tokens_map[subject]) for subject in processed_data_map}

In [124]:
for subject, vectors in normalized_data_map.items():
    print(f'Subject {subject}: {len(vectors)} normalized feature vectors')

Subject S01: 20 normalized feature vectors
Subject S02: 20 normalized feature vectors
Subject S03: 20 normalized feature vectors
Subject S04: 20 normalized feature vectors
Subject S05: 33 normalized feature vectors
Subject S06: 20 normalized feature vectors
Subject S07: 20 normalized feature vectors
Subject S08: 20 normalized feature vectors
Subject S09: 20 normalized feature vectors
Subject S10: 20 normalized feature vectors
Subject S11: 20 normalized feature vectors
Subject S12: 20 normalized feature vectors
Subject S13: 20 normalized feature vectors
Subject S14: 20 normalized feature vectors
Subject S15: 20 normalized feature vectors
Subject S16: 20 normalized feature vectors
Subject S17: 20 normalized feature vectors
Subject S18: 20 normalized feature vectors
Subject S19: 20 normalized feature vectors
Subject S20: 20 normalized feature vectors


## Encoding

In [125]:
def hash_vectors(vectors: typing.List[np.ndarray], threshold: float) -> typing.List[auth_biohash.hash.BioHash]:
    encoder = feature_encoding.threshold.ThresholdBinaryEncoder(BINARY_THRESHOLD)
    return [auth_biohash.hash.BioHash.generate_hash(v, threshold, encoder) for v in vectors]


def make_map_of_threshold_hashes(vectors: typing.List[np.ndarray]) -> typing.Dict[str, typing.List[auth_biohash.hash.BioHash]]:
    result = {}
    for threshold in AUTHENTICATION_THRESHOLDS:
        result[str(threshold)] = hash_vectors(vectors, threshold)
    return result

In [126]:
subject_hashes_map = {subject: make_map_of_threshold_hashes(normalized_data_map[subject]) for subject in normalized_data_map}

In [127]:
for subject, hashes in subject_hashes_map.items():
    hash_counts = ', '.join([f'{hash_threshold} --> {len(hash_instances)}' for hash_threshold, hash_instances in hashes.items()])
    print(f'Subject {subject} BioHash counts (per threshold): {hash_counts}')

Subject S01 BioHash counts (per threshold): 0.1 --> 20, 0.2 --> 20, 0.3 --> 20, 0.4 --> 20, 0.5 --> 20, 0.6 --> 20, 0.7 --> 20, 0.8 --> 20, 0.9 --> 20
Subject S02 BioHash counts (per threshold): 0.1 --> 20, 0.2 --> 20, 0.3 --> 20, 0.4 --> 20, 0.5 --> 20, 0.6 --> 20, 0.7 --> 20, 0.8 --> 20, 0.9 --> 20
Subject S03 BioHash counts (per threshold): 0.1 --> 20, 0.2 --> 20, 0.3 --> 20, 0.4 --> 20, 0.5 --> 20, 0.6 --> 20, 0.7 --> 20, 0.8 --> 20, 0.9 --> 20
Subject S04 BioHash counts (per threshold): 0.1 --> 20, 0.2 --> 20, 0.3 --> 20, 0.4 --> 20, 0.5 --> 20, 0.6 --> 20, 0.7 --> 20, 0.8 --> 20, 0.9 --> 20
Subject S05 BioHash counts (per threshold): 0.1 --> 33, 0.2 --> 33, 0.3 --> 33, 0.4 --> 33, 0.5 --> 33, 0.6 --> 33, 0.7 --> 33, 0.8 --> 33, 0.9 --> 33
Subject S06 BioHash counts (per threshold): 0.1 --> 20, 0.2 --> 20, 0.3 --> 20, 0.4 --> 20, 0.5 --> 20, 0.6 --> 20, 0.7 --> 20, 0.8 --> 20, 0.9 --> 20
Subject S07 BioHash counts (per threshold): 0.1 --> 20, 0.2 --> 20, 0.3 --> 20, 0.4 --> 20, 0.

# Test Set Assembly

In [ ]:
def make_threshold_test_sets(hashes_map: typing.Dict[str, typing.Dict[str, typing.List[auth_biohash.hash.BioHash]]], 
                             target_subject: str) -> typing.List[ThresholdTestSet]:
    threshold_test_sets: typing.Dict[str, ThresholdTestSet] = {
        str(threshold): ThresholdTestSet(threshold=threshold, positive_cases=[], negative_cases=[]) 
        for threshold in AUTHENTICATION_THRESHOLDS
    }
    for subject in hashes_map:
        for threshold in hashes_map[subject]:
            test_set = threshold_test_sets[threshold]
            if subject == target_subject:
                test_set.positive_cases.extend(hashes_map[subject][threshold])
            else:
                test_set.negative_cases.extend(hashes_map[subject][threshold])
    return list(threshold_test_sets.values())

In [ ]:
subject_test_sets = [
    SubjectTestSet(subject, make_threshold_test_sets(subject_hashes_map, subject)) 
    for subject in subject_hashes_map
]

# Execute Tests

In [ ]:
# TODO: mix test data and get scores/accuracy