In [None]:
!adb exec-out run-as si.fri.matevzfa.approxhpvmdemo cat "databases/classification-log" > classification-log
!adb exec-out run-as si.fri.matevzfa.approxhpvmdemo cat "databases/classification-log-shm" > classification-log-shm
!adb exec-out run-as si.fri.matevzfa.approxhpvmdemo cat "databases/classification-log-wal" > classification-log-wal

In [None]:
import sqlite3
import pandas as pd
import re


def info_mapper(key, transform):
    def mapper(info):
        m = re.search(f"{key}=([^\s]+)", info)
        return transform(m[1])
    return mapper


def fix_datetime(series):
    return pd.to_datetime(series, infer_datetime_format=True)


def map_timestamp_to_seconds(grp):
    grp["timestamp"] = (grp["timestamp"] - grp["timestamp"].min()).map(pd.Timedelta.total_seconds)
    return grp


def label_mapper():
    labels = [
        "Walking",
        "W. Upstairs",
        "W. Downstairs",
        "Sitting",
        "Standing",
        "Lying",
    ]
    return lambda x: labels[x]


con = sqlite3.connect("classification-log")

df = pd.read_sql_query("SELECT * FROM trace_classification", con)

df["timestamp"] = fix_datetime(df["timestamp"])
df["run_start"] = fix_datetime(df["run_start"])
df["trace_run_start"] = fix_datetime(df["trace_run_start"])

print(df.shape)

df["user"] = df["info"].map(info_mapper("user", int))
df["ground_truth"] = df["info"].map(info_mapper("baseline", int))

print(df.user.sort_values().unique())
print(df.ground_truth.sort_values().unique())

df = df.groupby("user").apply(map_timestamp_to_seconds)

df["timestamp_s"] = df["timestamp"]

selection = df[[
    "user",
    "timestamp_s",
    "used_config",
    "argmax",
    "argmax_baseline",
    "ground_truth",
    "used_engine",
]].copy()


selection["argmax_NAME"] = selection["argmax"].map(label_mapper())
selection["argmax_baseline_NAME"] = selection["argmax_baseline"].map(label_mapper())
selection["ground_truth_NAME"] = selection["ground_truth"].map(label_mapper())

selection.sort_values(["user", "timestamp_s"], inplace=True)

selection.to_csv("adaptation_v2.csv", index=False)


In [None]:
records = []

for (user, engine), grp in selection.groupby(["user", "used_engine"]):
    correct_count = grp["argmax"] == grp["ground_truth"]
    accuracy = correct_count.sum() / len(correct_count)

    records.append([user, engine, accuracy])

pd.DataFrame(records, columns=["user", "engine", "accuracy"]).groupby("user").mean()