# Imports

In [1]:
# Import components
import datetime as dt
import math

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib import pyplot as pltw
from sklearn import metrics

plt.rcParams["font.family"] = "Hiragino Maru Gothic Pro"
plt.style.use("ggplot")  # グラフのスタイル
plt.rcParams["figure.figsize"] = [12, 9]  # グラフサイズ設定

df = pd.read_csv("./datasets/fixed_battery_log_2.csv")
df["date"] = pd.to_datetime(df["date"])
users = df["User"].unique()
first = 1
last = 31


def Frequency_Distribution(data, bins, class_width=None):
    data = np.asarray(data)

    #     bins = np.arange(0, 110, 10)
    hist = np.histogram(data, bins)[0]
    cumsum = hist.cumsum()

    return pd.DataFrame(
        {
            "階級値": (bins[1:] + bins[:-1]) / 2,
            "度数": hist,
            "累積度数": cumsum,
            "相対度数": hist / cumsum[-1],
            "累積相対度数": cumsum / cumsum[-1],
        },
        index=pd.Index(
            [f"{bins[i]}以上{bins[i+1]}未満" for i in range(hist.size)], name="階級"
        ),
    )

# ターゲットのテンプレート作成

In [75]:
target = users[0]
t_T = pd.DataFrame(
    df.loc[
        (df["User"] == target)
        & (df["state"] == "ON")
        & (df["date"] > dt.datetime(2021, 12, first))
        & (df["date"] < dt.datetime(2021, 12, last))
    ]["battery"].copy()
)
t_T = t_T.set_index(np.arange(0, len(t_T)))
t_T = Frequency_Distribution(t_T, np.arange(0, 105, 5))

# ターゲットのクエリ作成

In [86]:
first_date_cand = pd.date_range("2022-01-01", "2022-02-01")
t_Q_list = []
for i in range(30):
    first_date = first_date_cand[np.random.randint(0, len(first_date_cand), 1)]
    last_date = first_date + dt.timedelta(days=30)
    t_Q = pd.DataFrame(
        df.loc[
            (df["User"] == target)
            & (df["state"] == "ON")
            & (df["date"] > first_date[0])
            & (df["date"] < last_date[0])
        ]["battery"].copy()
    )
    t_Q = t_Q.set_index(np.arange(0, len(t_Q)))
    t_Q = Frequency_Distribution(t_Q, np.arange(0, 105, 5))
    t_Q_list.append(t_Q)

# 他人のクエリ作成

In [126]:
first_date_cand = pd.date_range("2022-01-01", "2022-02-01")
i_Q_list = []
target = users[0]
imposters = users.copy()
imposters = imposters[~(imposters == target)]

for i in range(30):
    first_date = first_date_cand[np.random.randint(0, len(first_date_cand), 1)]
    last_date = first_date + dt.timedelta(days=30)
    i_Q = pd.DataFrame(
        df.loc[
            (df["User"] == np.random.choice(imposters))
            & (df["state"] == "ON")
            & (df["date"] > first_date[0])
            & (df["date"] < last_date[0])
        ]["battery"].copy()
    )
    i_Q = i_Q.set_index(np.arange(0, len(i_Q)))
    i_Q = Frequency_Distribution(i_Q, np.arange(0, 105, 5))
    i_Q_list.append(i_Q)

# 本人類似度チェック(ユークリッド距離使用) def check_sim_gen(t_last, q_last):

In [2]:
def check_sim_gen(t_last, q_last):
    diff_list_gen = []
    for h in range(len(users)):
        target = users[h]
        t_first_date = dt.datetime(2021, 12, 1)

        t_T = pd.DataFrame(
            df.loc[
                (df["User"] == target)
                & (df["state"] == "ON")
                & (df["date"] > t_first_date)
                & (df["date"] < (t_first_date + dt.timedelta(days=t_last)))
            ]["battery"].copy()
        )

        t_T = t_T.set_index(np.arange(0, len(t_T)))
        t_T = Frequency_Distribution(t_T, np.arange(0, 105, 5))
        first_date_cand = pd.date_range("2022-01-01", "2022-02-01")
        t_Q_list = []

        for i in range(0, 31):
            first_date = first_date_cand[np.random.randint(0, len(first_date_cand), 1)]
            last_date = first_date + dt.timedelta(days=q_last)
            t_Q = pd.DataFrame(
                df.loc[
                    (df["User"] == target)
                    & (df["state"] == "ON")
                    & (df["date"] > first_date[0])
                    & (df["date"] < last_date[0])
                ]["battery"].copy()
            )
            t_Q = t_Q.set_index(np.arange(0, len(t_Q)))
            t_Q = Frequency_Distribution(t_Q, np.arange(0, 105, 5))
            t_Q_list.append(t_Q)

        for j in range(len(t_Q_list)):
            diff = 0
            for g in range(len(t_T)):
                diff += (t_T["度数"][g] - t_Q_list[j]["度数"][g]) ** 2
            diff = 1 / (math.sqrt(diff) + 1)
            diff_list_gen.append(diff)
    return diff_list_gen

# 他人類似度調査(ユークリッド距離) def check_sim_im(t_last, q_last):

In [3]:
def check_sim_im(t_last, q_last):
    diff_list_im = []
    for h in range(len(users)):
        target = users[h]
        t_first_date = dt.datetime(2021, 12, 1)

        t_T = pd.DataFrame(
            df.loc[
                (df["User"] == target)
                & (df["state"] == "ON")
                & (df["date"] > t_first_date)
                & (df["date"] < (t_first_date + dt.timedelta(days=t_last)))
            ]["battery"].copy()
        )

        t_T = t_T.set_index(np.arange(0, len(t_T)))
        t_T = Frequency_Distribution(t_T, np.arange(0, 105, 5))

        first_date_cand = pd.date_range("2022-01-01", "2022-02-01")
        i_Q_list = []
        target = users[h]
        imposters = users.copy()
        imposters = imposters[~(imposters == target)]

        for i in range(30):
            first_date = first_date_cand[np.random.randint(0, len(first_date_cand), 1)]
            last_date = first_date + dt.timedelta(days=q_last)
            i_Q = pd.DataFrame(
                df.loc[
                    (df["User"] == np.random.choice(imposters))
                    & (df["state"] == "ON")
                    & (df["date"] > first_date[0])
                    & (df["date"] < last_date[0])
                ]["battery"].copy()
            )
            i_Q = i_Q.set_index(np.arange(0, len(i_Q)))
            i_Q = Frequency_Distribution(i_Q, np.arange(0, 105, 5))
            i_Q_list.append(i_Q)

            for j in range(len(i_Q_list)):
                diff = 0
                for g in range(len(t_T)):
                    diff += (t_T["度数"][g] - i_Q_list[j]["度数"][g]) ** 2
                diff = 1 / (math.sqrt(diff) + 1)
                diff_list_im.append(diff)
    return diff_list_im

In [74]:
# hoge = check_sim_gen(30, 30)
# sim_FD_gen = Frequency_Distribution(hoge, np.arange(0, 0.15, 0.01))
# sim_FD_gen["相対度数"].plot()

# foo = check_sim_im(30,30)
# sim_FD_im = Frequency_Distribution(foo, np.arange(0, 0.15, 0.01))
# sim_FD_im["相対度数"].plot()
# sim_FD_gen["相対度数"].plot(c="r")
# sim_FD_im["相対度数"].plot(c="b")

# def visualizer_roc_curve(gen, im, flag):

In [4]:
def visualizer_roc_curve(gen, im, flag):
    grand_truth = np.concatenate((np.ones(len(gen)), np.zeros(len(im))))
    score = np.concatenate((gen, im))
    far, tpr, threshold = metrics.roc_curve(grand_truth, score)
    auc = metrics.auc(far, tpr)
    frr = 1.0 - tpr
    eer = far[np.where((far - frr) < 0)[0][-1]]
    if flag == 0:
        print(eer)
        plt.plot(far, tpr, label="ROC curve (area = %.2f)" % auc)
        plt.legend()
        plt.xlabel("FPR: False positive rate")
        plt.ylabel("TPR: True positive rate")
        plt.grid()
        plt.show()

        plt.plot(far, frr)
        plt.xlabel("FAR")
        plt.ylabel("FRR")
        plt.show()

        far_kai = pd.DataFrame(far[1:-1]).copy().set_index(threshold[1:-1])
        frr_kai = pd.DataFrame(frr[1:-1]).copy().set_index(threshold[1:-1])
        plt.plot(far_kai, color="r")
        plt.plot(frr_kai, color="b")
        plt.show()
    else:
        return eer

# T_7days

In [5]:
q_last_list = [7, 14, 21, 30]
t_day = 7
for a in q_last_list:
    print("T: %2d days, Q: %d days" % (t_day, a))
    eer_list = []
    for b in range(0, 100):
        Gen = check_sim_gen(t_day, a)
        Im = check_sim_im(t_day, a)
        eer_list.append(visualizer_roc_curve(Gen, Im, 1))
    print("Avg EER: %.4f" % np.mean(eer_list))

T:  7 days, Q: 7 days
Avg EER: 0.3630
T:  7 days, Q: 14 days
Avg EER: 0.4383
T:  7 days, Q: 21 days
Avg EER: 0.4397
T:  7 days, Q: 30 days
Avg EER: 0.4777


# T_14days

In [6]:
q_last_list = [7, 14, 21, 30]
t_day = 7
for a in q_last_list:
    print("T: %2d days, Q: %d days" % (t_day, a))
    eer_list = []
    for b in range(0, 100):
        Gen = check_sim_gen(t_day, a)
        Im = check_sim_im(t_day, a)
        eer_list.append(visualizer_roc_curve(Gen, Im, 1))
    print("Avg EER: %.4f" % np.mean(eer_list))
    print("Std EER: %.4f" % np.std(eer_list))

q_last_list = [7, 14, 21, 30]
t_day = 14
for a in q_last_list:
    print("T: %2d days, Q: %d days" % (t_day, a))
    eer_list = []
    for b in range(0, 100):
        Gen = check_sim_gen(t_day, a)
        Im = check_sim_im(t_day, a)
        eer_list.append(visualizer_roc_curve(Gen, Im, 1))
    print("Avg EER: %.4f" % np.mean(eer_list))
    print("Std EER: %.4f" % np.std(eer_list))

q_last_list = [7, 14, 21, 30]
t_day = 21
for a in q_last_list:
    print("T: %2d days, Q: %d days" % (t_day, a))
    eer_list = []
    for b in range(0, 100):
        Gen = check_sim_gen(t_day, a)
        Im = check_sim_im(t_day, a)
        eer_list.append(visualizer_roc_curve(Gen, Im, 1))
    print("Avg EER: %.4f" % np.mean(eer_list))
    print("Std EER: %.4f" % np.std(eer_list))

q_last_list = [7, 14, 21, 30]
t_day = 30
for a in q_last_list:
    print("T: %2d days, Q: %d days" % (t_day, a))
    eer_list = []
    for b in range(0, 100):
        Gen = check_sim_gen(t_day, a)
        Im = check_sim_im(t_day, a)
        eer_list.append(visualizer_roc_curve(Gen, Im, 1))
    print("Avg EER: %.4f" % np.mean(eer_list))
    print("Std EER: %.4f" % np.std(eer_list))

T:  7 days, Q: 7 days
Avg EER: 0.3597
Std EER: 0.0214
T:  7 days, Q: 14 days
Avg EER: 0.4323
Std EER: 0.0213
T:  7 days, Q: 21 days
Avg EER: 0.4397
Std EER: 0.0237
T:  7 days, Q: 30 days
Avg EER: 0.4790
Std EER: 0.0190
T: 14 days, Q: 7 days
Avg EER: 0.3227
Std EER: 0.0172
T: 14 days, Q: 14 days
Avg EER: 0.3365
Std EER: 0.0185
T: 14 days, Q: 21 days
Avg EER: 0.3618
Std EER: 0.0187
T: 14 days, Q: 30 days
Avg EER: 0.4208
Std EER: 0.0283
T: 21 days, Q: 7 days
Avg EER: 0.2968
Std EER: 0.0187
T: 21 days, Q: 14 days
Avg EER: 0.2266
Std EER: 0.0196
T: 21 days, Q: 21 days
Avg EER: 0.2167
Std EER: 0.0174
T: 21 days, Q: 30 days
Avg EER: 0.2745
Std EER: 0.0173
T: 30 days, Q: 7 days
Avg EER: 0.3913
Std EER: 0.0102
T: 30 days, Q: 14 days
Avg EER: 0.2717
Std EER: 0.0125
T: 30 days, Q: 21 days
Avg EER: 0.2043
Std EER: 0.0177
T: 30 days, Q: 30 days
Avg EER: 0.1971
Std EER: 0.0207
