In [1]:
import os
import numpy as np
import pandas as pd

from hmog import HmogHelper
from tqdm import tqdm

dataset_path = 'preprocessed_data/'

In [2]:
df = pd.read_csv(os.path.join(dataset_path, 'hmog_dataset_unified_df_ids_types_indices.csv'), index_col=0)
df_20s = pd.read_csv(os.path.join(dataset_path, 'hmog_dataset_unified_df_20s.csv'), index_col=0)
df_40s = pd.read_csv(os.path.join(dataset_path, 'hmog_dataset_unified_df_40s.csv'), index_col=0)
df_60s = pd.read_csv(os.path.join(dataset_path, 'hmog_dataset_unified_df_60s.csv'), index_col=0)
df_80s = pd.read_csv(os.path.join(dataset_path, 'hmog_dataset_unified_df_80s.csv'), index_col=0)
df_100s = pd.read_csv(os.path.join(dataset_path, 'hmog_dataset_unified_df_100s.csv'), index_col=0)
df_120s = pd.read_csv(os.path.join(dataset_path, 'hmog_dataset_unified_df_120s.csv'), index_col=0)
df_140s = pd.read_csv(os.path.join(dataset_path, 'hmog_dataset_unified_df_140s.csv'), index_col=0)

In [10]:
subjects_list = list(df['user_id'].unique())

train_negative_users = subjects_list[:10]
subjects_list = subjects_list[10:]

In [None]:
# df_test = df[df['user_id'].isin(test_subjects)]
# df = df[df['user_id'].isin(subjects_list)]
# 
# df_20s_test = df_20s[df_20s['user_id'].isin(test_subjects)]
# df_20s = df_20s[df_20s['user_id'].isin(subjects_list)]
# 
# df_40s_test = df_20s[df_20s['user_id'].isin(test_subjects)]
# df_40s = df_20s[df_20s['user_id'].isin(subjects_list)]

In [4]:
# Let's scale the data

from sklearn.preprocessing import StandardScaler

hmog_features = list(map(str, range(1, 65)))

scaler = StandardScaler()

df[hmog_features] = scaler.fit_transform(df[hmog_features])
df_20s[hmog_features] = scaler.transform(df_20s[hmog_features])
df_40s[hmog_features] = scaler.transform(df_40s[hmog_features])
df_60s[hmog_features] = scaler.transform(df_60s[hmog_features])
df_80s[hmog_features] = scaler.transform(df_80s[hmog_features])
df_100s[hmog_features] = scaler.transform(df_100s[hmog_features])
df_120s[hmog_features] = scaler.transform(df_120s[hmog_features])
df_140s[hmog_features] = scaler.transform(df_140s[hmog_features])

window_size_to_df = {20000: df_20s,
                     40000: df_40s,
                     60000: df_60s,
                     80000: df_80s,
                     100000: df_100s,
                     120000: df_120s,
                     140000: df_140s}

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

def create_model():
    model = Sequential()
    model.add(Dense(60, input_shape=(64,), activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [11]:
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split

WINDOW_SIZES = [20000, 40000, 60000, 80000, 100000, 120000, 140000]

session_key_to_int = {'s': 0, 'w': 1}

def get_all_users_test_vectors(subject: int, session_type: str,  window_size: int):
    curr_df = window_size_to_df[window_size]
    test_hmog_vectors_all_users = curr_df[( (~(curr_df['user_id'] == subject) & (curr_df['user_id'].isin(subjects_list))) & (curr_df['session_type'] == session_key_to_int[session_type]) & (( curr_df['session_index'] == 2) | (curr_df['session_index'] == 3) ) )]
    test_hmog_vectors_all_users_np = test_hmog_vectors_all_users[hmog_features].to_numpy()
    return test_hmog_vectors_all_users_np

def get_user_test_vector(subject: int, session_type: str, window_size: int):
    curr_df = window_size_to_df[window_size]
    
    test_hmog_vector_positive = curr_df[( (curr_df['user_id'] == subject) & (curr_df['session_type'] == session_key_to_int[session_type]) & (( curr_df['session_index'] == 2) | (curr_df['session_index'] == 3) ) )]
    test_hmog_vector_positive_np = test_hmog_vector_positive[hmog_features].to_numpy()
    
    return test_hmog_vector_positive_np

def get_negative_train_df(users: set, session_type: str):
    return df[ ( ( df['user_id'].isin(users) ) & (df['session_type'] == session_key_to_int[session_type]) & ( (df['session_index'] == 0) | (df['session_index'] == 1) ))]

def test_eer(train_key: str, test_key: str, other_subjects_count: int):
    window_eers = {window_size: [] for window_size in WINDOW_SIZES}

    for i in tqdm(range(len(subjects_list))):
        subject = subjects_list[i]
        used_subjects = set()
        
        train_hmog_vector_positive = df[( (df['user_id'] == subject) & (df['session_type'] == session_key_to_int[train_key]) & ( (df['session_index'] == 0) | (df['session_index'] == 1) ) )]

        train_hmog_vector_negative = get_negative_train_df(set(train_negative_users[:other_subjects_count]), train_key)
        
        data = pd.concat([train_hmog_vector_positive, train_hmog_vector_negative], ignore_index=True)
        data.loc[data['user_id'] != subject, 'user_id'] = 0
        data.loc[data['user_id'] == subject, 'user_id'] = 1
        
        X, y = data[hmog_features], data['user_id']
        # print(X.shape, y.shape)
        # print(y.unique())
        
        model = create_model()
        model.fit(X, y, epochs=3, verbose=0)
        
        for window_size in WINDOW_SIZES:
            test_hmog_vector_positive_np = get_user_test_vector(subject, test_key, window_size)
            test_hmog_vectors_all_users_np = get_all_users_test_vectors(subject, test_key, window_size)
            
            predictions_self = model.predict(test_hmog_vector_positive_np, verbose=0)
            predictions_others = model.predict(test_hmog_vectors_all_users_np, verbose=0)
            
            scores = np.concatenate([predictions_self, predictions_others])
            
            y_true = [1] * len(predictions_self)
            y_true += ([0] * len(predictions_others))
            
            fpr, tpr, thresholds = roc_curve(y_true, scores)
    
            # Find the threshold where FPR equals FNR
            eer_index = np.argmin(np.abs(fpr - (1 - tpr)))
            eer = (fpr[eer_index] + (1 - tpr[eer_index])) / 2
    
            # Append EER to the list
            window_eers[window_size].append(eer)
    
    print(f'Count of negative users: {other_subjects_count}')
    for window_size in WINDOW_SIZES:
        window_eers[window_size] = np.mean(window_eers[window_size])
        print(f'Window size: {window_size / 1000}s\nPopulation EER: {window_eers[window_size]}')
    
    return window_eers

print('Done')

Done


In [12]:
neg_user_n = 5
neg_user_eers = { count: {'ww': {}, 'ss': {}} for count in range(1, neg_user_n + 1) }

In [10]:
import pickle

neg_user_eers = pickle.load(open('results/neg_user_eers_2.pkl', 'rb'))

In [13]:
neg_user_eers[1]['ww'] = test_eer('w', 'w', 1)
neg_user_eers[1]['ss'] = test_eer('s', 's', 1)

100%|██████████| 82/82 [13:38<00:00,  9.98s/it]


Count of negative users: 1
Window size: 20.0s
Population EER: 0.18727449096044818
Window size: 40.0s
Population EER: 0.181422779326252
Window size: 60.0s
Population EER: 0.1795454173233801
Window size: 80.0s
Population EER: 0.17756879245698662
Window size: 100.0s
Population EER: 0.17609543876485692
Window size: 120.0s
Population EER: 0.17542108060727316
Window size: 140.0s
Population EER: 0.17484376386809908


  1%|          | 1/82 [00:13<18:02, 13.36s/it]


KeyboardInterrupt: 

In [15]:
neg_user_eers[2]['ww'] = test_eer('w', 'w', 2)
neg_user_eers[2]['ss'] = test_eer('s', 's', 2)

100%|██████████| 82/82 [13:20<00:00,  9.76s/it]


Count of negative users: 2
Window size: 20.0s
Population EER: 0.17804708405327
Window size: 40.0s
Population EER: 0.17456860126747864
Window size: 60.0s
Population EER: 0.17319471440084613
Window size: 80.0s
Population EER: 0.17203858417342993
Window size: 100.0s
Population EER: 0.17099773117294567
Window size: 120.0s
Population EER: 0.16960695502341905
Window size: 140.0s
Population EER: 0.1688673085299616


  1%|          | 1/82 [00:17<23:20, 17.29s/it]


KeyboardInterrupt: 

In [13]:
neg_user_eers[3]['ww'] = test_eer('w', 'w', 3)
neg_user_eers[3]['ss'] = test_eer('s', 's', 3)

100%|██████████| 92/92 [16:23<00:00, 10.69s/it]


Count of negative users: 3
Window size: 20.0s
Population EER: 0.22828150163569869
Window size: 40.0s
Population EER: 0.22980169882944648
Window size: 60.0s
Population EER: 0.23029180321791715
Window size: 80.0s
Population EER: 0.23031096419903274
Window size: 100.0s
Population EER: 0.23035470099484953
Window size: 120.0s
Population EER: 0.2303240247121612
Window size: 140.0s
Population EER: 0.23025844734288956


100%|██████████| 92/92 [15:44<00:00, 10.26s/it]

Count of negative users: 3
Window size: 20.0s
Population EER: 0.2427218387594926
Window size: 40.0s
Population EER: 0.24335400897632178
Window size: 60.0s
Population EER: 0.24366371482877386
Window size: 80.0s
Population EER: 0.24367654356694016
Window size: 100.0s
Population EER: 0.24371201933440034
Window size: 120.0s
Population EER: 0.24366502978055346
Window size: 140.0s
Population EER: 0.24362169383784463





In [14]:
import pickle

pickle.dump(neg_user_eers, open('results/neg_user_eers_3.pkl', 'wb'))