In [1]:
import pandas as pd
from dummy_rg import create_dummy_rg_df
from itertools import combinations, product

from tqdm import tqdm

In [2]:
data = create_dummy_rg_df()

In [3]:
user_ids = data['user'].unique()
user_ids

array([1, 2, 3, 4, 5])

In [None]:
pairs = []

for user_id1, user_id2 in tqdm(product(user_ids, repeat=2), desc='User pairs'):
    session_ids_user1 = data[data['user'] == user_id1]['session'].unique()
    session_ids_user2 = data[data['user'] == user_id2]['session'].unique()
    for session_user1, session_user2 in tqdm(product(session_ids_user1, session_ids_user2), desc='Session pairs'):
        if session_user1 == session_user2 and user_id1 == user_id2:
            continue
        session_data_user1 = data[(data['user'] == user_id1) & (data['session'] == session_user1)]
        session_data_user2 = data[(data['user'] == user_id2) & (data['session'] == session_user2)]
        pair_type = '-'
        if user_id1 == user_id2:
            pair_type = '+'
        for idx_session_user_1, idx_session_user_2 in product(session_data_user1.index, session_data_user2.index):
            pairs.append(((idx_session_user_1, idx_session_user_2), pair_type))

25it [00:00, 2939.82it/s]
20it [00:00, 2456.83it/s]
15it [00:00, 2391.82it/s]
15it [00:00, 2388.37it/s]
15it [00:00, 2495.03it/s]
20it [00:00, 2425.01it/s]
16it [00:00, 3149.91it/s]
12it [00:00, 2385.84it/s]
12it [00:00, 2391.17it/s]
12it [00:00, 2349.97it/s]
15it [00:00, 2371.63it/s]
12it [00:00, 2419.79it/s]
9it [00:00, 3338.23it/s]
9it [00:00, 2421.50it/s]
9it [00:00, 2325.86it/s]
15it [00:00, 2196.74it/s]
12it [00:00, 2414.34it/s]
9it [00:00, 2339.56it/s]
9it [00:00, 3462.55it/s]
9it [00:00, 2389.46it/s]
15it [00:00, 2483.40it/s]
12it [00:00, 2484.29it/s]
9it [00:00, 2377.12it/s]
9it [00:00, 2374.73it/s]
9it [00:00, 3595.46it/s]
25it [00:00, 145.01it/s]


In [5]:
positive_pairs = [pair[0] for pair in pairs if pair[1] == '+']
negative_pairs = [pair[0] for pair in pairs if pair[1] == '-']

In [6]:
def generate_df_from_pairs(pairs, data: pd.DataFrame, label: int, preffixes = ['accel-x', 'accel-y', 'accel-z'], additional_columns = ['user', 'session']):
    columns = [val for val in data.columns for preffix in preffixes if val.startswith(preffix)] + additional_columns
    # Extract the ids  
    pair_elem_1 = [val[0] for val in pairs]
    pair_elem_2 = [val[1] for val in pairs]
    # Extract the data from the pairs
    s1 = data.loc[pair_elem_1, columns].reset_index(drop=True)
    s2 = data.loc[pair_elem_2, columns].reset_index(drop=True)
    # Add a preffix to the columns
    s1 = s1.rename(lambda x: f'S1-{x}', axis=1)
    s2 = s2.rename(lambda x: f'S2-{x}', axis=1)
    # Concatenate the data and add the label
    result_df = pd.concat([s1, s2], axis=1)
    result_df['label'] = label
    return result_df

In [7]:
generate_df_from_pairs(positive_pairs, data, 1)

Unnamed: 0,S1-accel-x-0,S1-accel-x-1,S1-accel-x-2,S1-accel-x-3,S1-accel-x-4,S1-accel-y-0,S1-accel-y-1,S1-accel-y-2,S1-accel-y-3,S1-accel-y-4,...,S2-accel-y-3,S2-accel-y-4,S2-accel-z-0,S2-accel-z-1,S2-accel-z-2,S2-accel-z-3,S2-accel-z-4,S2-user,S2-session,label
0,1-1-1,1-1-1,1-1-1,1-1-1,1-1-1,1-1-1,1-1-1,1-1-1,1-1-1,1-1-1,...,1-2-1,1-2-1,1-2-1,1-2-1,1-2-1,1-2-1,1-2-1,1,2,1
1,1-1-1,1-1-1,1-1-1,1-1-1,1-1-1,1-1-1,1-1-1,1-1-1,1-1-1,1-1-1,...,1-2-2,1-2-2,1-2-2,1-2-2,1-2-2,1-2-2,1-2-2,1,2,1
2,1-1-2,1-1-2,1-1-2,1-1-2,1-1-2,1-1-2,1-1-2,1-1-2,1-1-2,1-1-2,...,1-2-1,1-2-1,1-2-1,1-2-1,1-2-1,1-2-1,1-2-1,1,2,1
3,1-1-2,1-1-2,1-1-2,1-1-2,1-1-2,1-1-2,1-1-2,1-1-2,1-1-2,1-1-2,...,1-2-2,1-2-2,1-2-2,1-2-2,1-2-2,1-2-2,1-2-2,1,2,1
4,1-1-1,1-1-1,1-1-1,1-1-1,1-1-1,1-1-1,1-1-1,1-1-1,1-1-1,1-1-1,...,1-3-1,1-3-1,1-3-1,1-3-1,1-3-1,1-3-1,1-3-1,1,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,5-3-2,5-3-2,5-3-2,5-3-2,5-3-2,5-3-2,5-3-2,5-3-2,5-3-2,5-3-2,...,5-1-2,5-1-2,5-1-2,5-1-2,5-1-2,5-1-2,5-1-2,5,1,1
196,5-3-1,5-3-1,5-3-1,5-3-1,5-3-1,5-3-1,5-3-1,5-3-1,5-3-1,5-3-1,...,5-2-1,5-2-1,5-2-1,5-2-1,5-2-1,5-2-1,5-2-1,5,2,1
197,5-3-1,5-3-1,5-3-1,5-3-1,5-3-1,5-3-1,5-3-1,5-3-1,5-3-1,5-3-1,...,5-2-2,5-2-2,5-2-2,5-2-2,5-2-2,5-2-2,5-2-2,5,2,1
198,5-3-2,5-3-2,5-3-2,5-3-2,5-3-2,5-3-2,5-3-2,5-3-2,5-3-2,5-3-2,...,5-2-1,5-2-1,5-2-1,5-2-1,5-2-1,5-2-1,5-2-1,5,2,1


In [8]:
data = pd.read_csv('../data/authentication/unbalanced/RecodGait_v2/standartized_unbalanced.csv')