# Dataset Creation

In [1]:
import pandas as pd

data = pd.read_csv("data/full-game.csv", names=['sid', 'ts', 'x', 'y', 'z', '|v|', '|a|', 'vx', 'vy', 'vz', 'ax', 'ay', 'az'])

df_passList = pd.read_csv("data/passList.csv", names=['ts', 'sid', 'tsRec', 'sidRec'])

df_randomList = pd.read_csv("data/randomListFin.csv", names=['ts', 'sid'])

In [57]:
# df to compute imbalanced test sets

df_addRL1 = pd.read_csv("data/addRL1.csv", names=['ts', 'sid'])
df_addRL2 = pd.read_csv("data/addRL2.csv", names=['ts', 'sid'])
df_addRL3 = pd.read_csv("data/addRL3.csv", names=['ts', 'sid'])

# Balanced Dataset

## Mean Computation

In [2]:
mean_df = pd.DataFrame()

for i, row in df_passList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['ts', '|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        mean_dict = dataFilter.mean().to_dict()
        mean_df = pd.concat([mean_df, pd.DataFrame(mean_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_randomList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['ts', '|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        mean_dict = dataFilter.mean().to_dict()
        mean_df = pd.concat([mean_df, pd.DataFrame(mean_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

mean_df.rename(columns={'ts': 'ts','|v|': '|v|_mean','|a|': '|a|_mean','vz': 'vz_mean','az': 'az_mean'}, inplace=True)
mean_df.to_csv('data/meanFin_df.csv', index=False, header=True)

## Standard Deviation Computation

In [3]:
std_df = pd.DataFrame()

for i, row in df_passList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        std_dict = dataFilter.std().to_dict()
        std_df = pd.concat([std_df, pd.DataFrame(std_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_randomList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        std_dict = dataFilter.std().to_dict()
        std_df = pd.concat([std_df, pd.DataFrame(std_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

std_df.rename(columns={'|v|': '|v|_std', '|a|': '|a|_std', 'vz': 'vz_std', 'az': 'az_std'}, inplace=True)
std_df.to_csv('data/stdFin_df.csv', index=False, header=True)

## Minimum Computation

In [4]:
min_df = pd.DataFrame()

for i, row in df_passList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        min_dict = dataFilter.min().to_dict()
        min_df = pd.concat([min_df, pd.DataFrame(min_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_randomList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        min_dict = dataFilter.min().to_dict()
        min_df = pd.concat([min_df, pd.DataFrame(min_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

min_df.rename(columns={'|v|': '|v|_min', '|a|': '|a|_min', 'vz': 'vz_min', 'az': 'az_min'}, inplace=True)
min_df.to_csv('data/minFin_df.csv', index=False, header=True)

## Maximum Computation

In [5]:
max_df = pd.DataFrame()

for i, row in df_passList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        max_dict = dataFilter.max().to_dict()
        max_df = pd.concat([max_df, pd.DataFrame(max_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_randomList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        max_dict = dataFilter.max().to_dict()
        max_df = pd.concat([max_df, pd.DataFrame(max_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

max_df.rename(columns={'|v|': '|v|_max', '|a|': '|a|_max', 'vz': 'vz_max', 'az': 'az_max'}, inplace=True)
max_df.to_csv('data/maxFin_df.csv', index=False, header=True)

## Balanced Dataset with labels

In [8]:
mean_df = pd.read_csv("data/meanFin_df.csv")
std_df = pd.read_csv("data/stdFin_df.csv")
min_df = pd.read_csv("data/minFin_df.csv")
max_df = pd.read_csv("data/maxFin_df.csv")
label_df = pd.read_csv("data/labelListFin.csv")

dataset = pd.concat([mean_df, std_df, min_df, max_df, label_df], axis=1)
dataset.to_csv('data/datasetFin.csv', index=False, header=True)

## Ordered Balanced Dataset

In [9]:
# Load dataset into a pandas DataFrame
old_ds = pd.read_csv('data/datasetFin.csv')

# Sort the DataFrame by 'timestamp'
old_ds = old_ds.sort_values(by='ts')

old_ds.to_csv('data/new_dataset_fin.csv', index=False, header=True)

# Imbalanced Test Set 40-60

In [12]:
mean_df = pd.DataFrame()

for i, row in df_passList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['ts', '|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        mean_dict = dataFilter.mean().to_dict()
        mean_df = pd.concat([mean_df, pd.DataFrame(mean_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_randomList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['ts', '|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        mean_dict = dataFilter.mean().to_dict()
        mean_df = pd.concat([mean_df, pd.DataFrame(mean_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_addRL1.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['ts', '|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        mean_dict = dataFilter.mean().to_dict()
        mean_df = pd.concat([mean_df, pd.DataFrame(mean_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

mean_df.rename(columns={'ts': 'ts','|v|': '|v|_mean','|a|': '|a|_mean','vz': 'vz_mean','az': 'az_mean'}, inplace=True)
mean_df.to_csv('data/meanRL1_df.csv', index=False, header=True)

In [13]:
std_df = pd.DataFrame()

for i, row in df_passList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        std_dict = dataFilter.std().to_dict()
        std_df = pd.concat([std_df, pd.DataFrame(std_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_randomList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        std_dict = dataFilter.std().to_dict()
        std_df = pd.concat([std_df, pd.DataFrame(std_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_addRL1.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        std_dict = dataFilter.std().to_dict()
        std_df = pd.concat([std_df, pd.DataFrame(std_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

std_df.rename(columns={'|v|': '|v|_std', '|a|': '|a|_std', 'vz': 'vz_std', 'az': 'az_std'}, inplace=True)
std_df.to_csv('data/stdRL1_df.csv', index=False, header=True)

In [14]:
min_df = pd.DataFrame()

for i, row in df_passList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        min_dict = dataFilter.min().to_dict()
        min_df = pd.concat([min_df, pd.DataFrame(min_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_randomList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        min_dict = dataFilter.min().to_dict()
        min_df = pd.concat([min_df, pd.DataFrame(min_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_addRL1.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        min_dict = dataFilter.min().to_dict()
        min_df = pd.concat([min_df, pd.DataFrame(min_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

min_df.rename(columns={'|v|': '|v|_min', '|a|': '|a|_min', 'vz': 'vz_min', 'az': 'az_min'}, inplace=True)
min_df.to_csv('data/minRL1_df.csv', index=False, header=True)

In [16]:
max_df = pd.DataFrame()

for i, row in df_passList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        max_dict = dataFilter.max().to_dict()
        max_df = pd.concat([max_df, pd.DataFrame(max_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_randomList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        max_dict = dataFilter.max().to_dict()
        max_df = pd.concat([max_df, pd.DataFrame(max_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_addRL1.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        max_dict = dataFilter.max().to_dict()
        max_df = pd.concat([max_df, pd.DataFrame(max_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

max_df.rename(columns={'|v|': '|v|_max', '|a|': '|a|_max', 'vz': 'vz_max', 'az': 'az_max'}, inplace=True)
max_df.to_csv('data/maxRL1_df.csv', index=False, header=True)

In [18]:
mean_df = pd.read_csv("data/meanRL1_df.csv")
std_df = pd.read_csv("data/stdRL1_df.csv")
min_df = pd.read_csv("data/minRL1_df.csv")
max_df = pd.read_csv("data/maxRL1_df.csv")
label_df = pd.read_csv("data/labelListRL1.csv")

dataset = pd.concat([mean_df, std_df, min_df, max_df, label_df], axis=1)
dataset.to_csv('data/datasetRL1.csv', index=False, header=True)

In [19]:
# Load dataset into a pandas DataFrame
old_ds = pd.read_csv('data/datasetRL1.csv')

# Sort the DataFrame by 'timestamp'
old_ds = old_ds.sort_values(by='ts')

old_ds.to_csv('data/new_dataset_4060.csv', index=False, header=True)

# Imbalanced Test Set 30-70

In [36]:
mean_df = pd.DataFrame()

for i, row in df_passList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['ts', '|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        mean_dict = dataFilter.mean().to_dict()
        mean_df = pd.concat([mean_df, pd.DataFrame(mean_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_randomList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['ts', '|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        mean_dict = dataFilter.mean().to_dict()
        mean_df = pd.concat([mean_df, pd.DataFrame(mean_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_addRL2.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['ts', '|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        mean_dict = dataFilter.mean().to_dict()
        mean_df = pd.concat([mean_df, pd.DataFrame(mean_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

mean_df.rename(columns={'ts': 'ts','|v|': '|v|_mean','|a|': '|a|_mean','vz': 'vz_mean','az': 'az_mean'}, inplace=True)
mean_df.to_csv('data/meanRL2_df.csv', index=False, header=True)

In [37]:
std_df = pd.DataFrame()

for i, row in df_passList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        std_dict = dataFilter.std().to_dict()
        std_df = pd.concat([std_df, pd.DataFrame(std_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_randomList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        std_dict = dataFilter.std().to_dict()
        std_df = pd.concat([std_df, pd.DataFrame(std_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_addRL2.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        std_dict = dataFilter.std().to_dict()
        std_df = pd.concat([std_df, pd.DataFrame(std_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

std_df.rename(columns={'|v|': '|v|_std', '|a|': '|a|_std', 'vz': 'vz_std', 'az': 'az_std'}, inplace=True)
std_df.to_csv('data/stdRL2_df.csv', index=False, header=True)

In [38]:
min_df = pd.DataFrame()

for i, row in df_passList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        min_dict = dataFilter.min().to_dict()
        min_df = pd.concat([min_df, pd.DataFrame(min_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_randomList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        min_dict = dataFilter.min().to_dict()
        min_df = pd.concat([min_df, pd.DataFrame(min_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_addRL2.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        min_dict = dataFilter.min().to_dict()
        min_df = pd.concat([min_df, pd.DataFrame(min_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

min_df.rename(columns={'|v|': '|v|_min', '|a|': '|a|_min', 'vz': 'vz_min', 'az': 'az_min'}, inplace=True)
min_df.to_csv('data/minRL2_df.csv', index=False, header=True)

In [39]:
max_df = pd.DataFrame()

for i, row in df_passList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        max_dict = dataFilter.max().to_dict()
        max_df = pd.concat([max_df, pd.DataFrame(max_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_randomList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        max_dict = dataFilter.max().to_dict()
        max_df = pd.concat([max_df, pd.DataFrame(max_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_addRL2.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        max_dict = dataFilter.max().to_dict()
        max_df = pd.concat([max_df, pd.DataFrame(max_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

max_df.rename(columns={'|v|': '|v|_max', '|a|': '|a|_max', 'vz': 'vz_max', 'az': 'az_max'}, inplace=True)
max_df.to_csv('data/maxRL2_df.csv', index=False, header=True)

In [40]:
mean_df = pd.read_csv("data/meanRL2_df.csv")
std_df = pd.read_csv("data/stdRL2_df.csv")
min_df = pd.read_csv("data/minRL2_df.csv")
max_df = pd.read_csv("data/maxRL2_df.csv")
label_df = pd.read_csv("data/labelListRL2.csv")

dataset = pd.concat([mean_df, std_df, min_df, max_df, label_df], axis=1)
dataset.to_csv('data/datasetRL2.csv', index=False, header=True)

In [41]:
# Load dataset into a pandas DataFrame
old_ds = pd.read_csv('data/datasetRL2.csv')

# Sort the DataFrame by 'timestamp'
old_ds = old_ds.sort_values(by='ts')

old_ds.to_csv('data/new_dataset_3070.csv', index=False, header=True)

# Imbalanced Test Set 20-80

In [58]:
mean_df = pd.DataFrame()

for i, row in df_passList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['ts', '|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        mean_dict = dataFilter.mean().to_dict()
        mean_df = pd.concat([mean_df, pd.DataFrame(mean_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_randomList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['ts', '|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        mean_dict = dataFilter.mean().to_dict()
        mean_df = pd.concat([mean_df, pd.DataFrame(mean_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_addRL3.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['ts', '|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        mean_dict = dataFilter.mean().to_dict()
        mean_df = pd.concat([mean_df, pd.DataFrame(mean_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

mean_df.rename(columns={'ts': 'ts','|v|': '|v|_mean','|a|': '|a|_mean','vz': 'vz_mean','az': 'az_mean'}, inplace=True)
mean_df.to_csv('data/meanRL3_df.csv', index=False, header=True)

In [59]:
std_df = pd.DataFrame()

for i, row in df_passList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        std_dict = dataFilter.std().to_dict()
        std_df = pd.concat([std_df, pd.DataFrame(std_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_randomList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        std_dict = dataFilter.std().to_dict()
        std_df = pd.concat([std_df, pd.DataFrame(std_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_addRL3.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        std_dict = dataFilter.std().to_dict()
        std_df = pd.concat([std_df, pd.DataFrame(std_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

std_df.rename(columns={'|v|': '|v|_std', '|a|': '|a|_std', 'vz': 'vz_std', 'az': 'az_std'}, inplace=True)
std_df.to_csv('data/stdRL3_df.csv', index=False, header=True)

In [60]:
min_df = pd.DataFrame()

for i, row in df_passList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        min_dict = dataFilter.min().to_dict()
        min_df = pd.concat([min_df, pd.DataFrame(min_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_randomList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        min_dict = dataFilter.min().to_dict()
        min_df = pd.concat([min_df, pd.DataFrame(min_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_addRL3.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        min_dict = dataFilter.min().to_dict()
        min_df = pd.concat([min_df, pd.DataFrame(min_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

min_df.rename(columns={'|v|': '|v|_min', '|a|': '|a|_min', 'vz': 'vz_min', 'az': 'az_min'}, inplace=True)
min_df.to_csv('data/minRL3_df.csv', index=False, header=True)

In [61]:
max_df = pd.DataFrame()

for i, row in df_passList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        max_dict = dataFilter.max().to_dict()
        max_df = pd.concat([max_df, pd.DataFrame(max_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_randomList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        max_dict = dataFilter.max().to_dict()
        max_df = pd.concat([max_df, pd.DataFrame(max_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_addRL3.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        max_dict = dataFilter.max().to_dict()
        max_df = pd.concat([max_df, pd.DataFrame(max_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

max_df.rename(columns={'|v|': '|v|_max', '|a|': '|a|_max', 'vz': 'vz_max', 'az': 'az_max'}, inplace=True)
max_df.to_csv('data/maxRL3_df.csv', index=False, header=True)

In [62]:
mean_df = pd.read_csv("data/meanRL3_df.csv")
std_df = pd.read_csv("data/stdRL3_df.csv")
min_df = pd.read_csv("data/minRL3_df.csv")
max_df = pd.read_csv("data/maxRL3_df.csv")
label_df = pd.read_csv("data/labelListRL3.csv")

dataset = pd.concat([mean_df, std_df, min_df, max_df, label_df], axis=1)
dataset.to_csv('data/datasetRL3.csv', index=False, header=True)

In [63]:
# Load dataset into a pandas DataFrame
old_ds = pd.read_csv('data/datasetRL3.csv')

# Sort the DataFrame by 'timestamp'
old_ds = old_ds.sort_values(by='ts')

old_ds.to_csv('data/new_dataset_2080.csv', index=False, header=True)

# Left-Right Dataset

## Sid Computation

In [11]:
sid_df = pd.DataFrame()

for i, row in df_passList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['ts', '|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        sid_dict = {'sid': sid}  # Create a dictionary with only the 'sid' key and value
        sid_df = pd.concat([sid_df, pd.DataFrame(sid_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

for i, row in df_randomList.iterrows():
    sid = row['sid']
    ts = row['ts']
    tsDataFilter = data[(data['ts'] > ts - 0.3 * 1e12) & (data['ts'] <= ts + 0.3 * 1e12)]
    sidDataFilter = tsDataFilter[(tsDataFilter['sid'] == sid)]
    dataFilter = sidDataFilter[['ts', '|v|', '|a|', 'vz', 'az']]

    if not dataFilter.empty:
        sid_dict = {'sid': sid}  # Create a dictionary with only the 'sid' key and value
        sid_df = pd.concat([sid_df, pd.DataFrame(sid_dict, index=[0])], ignore_index=True, verify_integrity=False, sort=False)

sid_df.to_csv('data/sid_df.csv', index=False, header=True)

## Left-Right Computation

In [12]:
# Read the csv file
sid_df = pd.read_csv('data/sid_df.csv')

# Rename the column from 'sid' to 'foot'
sid_df = sid_df.rename(columns={'sid': 'foot'})

# Map the values of the 'foot' column to 'left' or 'right' based on whether the value is odd or even
sid_df['foot'] = sid_df['foot'].apply(lambda x: 'right' if x % 2 == 0 else 'left')

# Save the modified dataframe to a new CSV file
sid_df.to_csv('data/foot_df.csv', index=False)

In [13]:
# Load the 'meanFin_df.csv' file into a dataframe
meanFin_df = pd.read_csv('data/meanFin_df.csv')

# Load the 'foot_df.csv' file into a dataframe
foot_df = pd.read_csv('data/foot_df.csv')

# Add the 'foot' column to the 'meanFin_df' dataframe as the second column
meanFin_df.insert(1, 'foot', foot_df['foot'])

# Save the modified dataframe to a new CSV file
meanFin_df.to_csv('data/meanFoot_df.csv', index=False)

## Add Labels

In [14]:
mean_df = pd.read_csv("data/meanFoot_df.csv")
std_df = pd.read_csv("data/stdFin_df.csv")
min_df = pd.read_csv("data/minFin_df.csv")
max_df = pd.read_csv("data/maxFin_df.csv")
label_df = pd.read_csv("data/labelListFin.csv")

dataset = pd.concat([mean_df, std_df, min_df, max_df, label_df], axis=1)
dataset.to_csv('data/datasetFoot.csv', index=False, header=True)

## Ordered Left-Right Dataset

In [15]:
# Load dataset into a pandas DataFrame
old_ds = pd.read_csv('data/datasetFoot.csv')

# Sort the DataFrame by 'timestamp'
old_ds = old_ds.sort_values(by='ts')

old_ds.to_csv('data/new_dataset_foot.csv', index=False, header=True)

# Player Dataset

In [16]:
# Define the mapping of sid values to player numbers
mapping = {62: 1, 61: 1, 64: 2, 63: 2, 66: 3, 65: 3, 68: 8, 67: 8, 38: 7, 69: 7,
           40: 15, 71: 15, 74: 6, 73: 6, 44: 9, 75: 9, 14: 12, 13: 12, 16: 4, 47: 4,
           88: 5, 49: 5, 52: 11, 19: 11, 54: 14, 53: 14, 24: 13, 23: 13, 58: 16, 57: 16,
           28: 10, 59: 10}

# Load the CSV file into a DataFrame
sid_df = pd.read_csv('data/sid_df.csv')

# Rename the column as 'player'
sid_df = sid_df.rename(columns={'sid': 'player'})

# Map the sid values to player numbers
sid_df['player'] = sid_df['player'].map(mapping)

# Save the modified DataFrame to a new CSV file
sid_df.to_csv('data/player_df.csv', index=False)

In [17]:
# Load the 'meanFin_df.csv' file into a dataframe
meanFin_df = pd.read_csv('data/meanFin_df.csv')

# Load the 'player_df.csv' file into a dataframe
player_df = pd.read_csv('data/player_df.csv')

# Add the 'player' column to the 'meanFin_df' dataframe as the second column
meanFin_df.insert(1, 'player', player_df['player'])

# Save the modified dataframe to a new CSV file
meanFin_df.to_csv('data/meanPlayer_df.csv', index=False)

## Add Labels

In [18]:
mean_df = pd.read_csv("data/meanPlayer_df.csv")
std_df = pd.read_csv("data/stdFin_df.csv")
min_df = pd.read_csv("data/minFin_df.csv")
max_df = pd.read_csv("data/maxFin_df.csv")
label_df = pd.read_csv("data/labelListFin.csv")

dataset = pd.concat([mean_df, std_df, min_df, max_df, label_df], axis=1)
dataset.to_csv('data/datasetPlayer.csv', index=False, header=True)

## Ordered Player Dataset

In [19]:
# Load dataset into a pandas DataFrame
old_ds = pd.read_csv('data/datasetPlayer.csv')

# Sort the DataFrame by 'timestamp'
old_ds = old_ds.sort_values(by='ts')

old_ds.to_csv('data/new_dataset_player.csv', index=False, header=True)

# PlayerFoot Dataset

In [20]:
# Load the data frames
player_df = pd.read_csv('data/player_df.csv')
foot_df = pd.read_csv('data/foot_df.csv')
meanFin_df = pd.read_csv('data/meanFin_df.csv')

# Add the player column to meanFin_df
meanFin_df.insert(1, 'player', player_df['player'])

# Add the foot column to meanFin_df
meanFin_df.insert(2, 'foot', foot_df['foot'])

# Save the updated data frame
meanFin_df.to_csv('data/meanPlayerFoot_df.csv', index=False)

## Add Labels

In [21]:
mean_df = pd.read_csv("data/meanPlayerFoot_df.csv")
std_df = pd.read_csv("data/stdFin_df.csv")
min_df = pd.read_csv("data/minFin_df.csv")
max_df = pd.read_csv("data/maxFin_df.csv")
label_df = pd.read_csv("data/labelListFin.csv")

dataset = pd.concat([mean_df, std_df, min_df, max_df, label_df], axis=1)
dataset.to_csv('data/datasetPlayerFoot.csv', index=False, header=True)

## Ordered PlayerFoot Dataset

In [22]:
# Load dataset into a pandas DataFrame
old_ds = pd.read_csv('data/datasetPlayerFoot.csv')

# Sort the DataFrame by 'timestamp'
old_ds = old_ds.sort_values(by='ts')

old_ds.to_csv('data/new_dataset_player-foot.csv', index=False, header=True)