# Imports

In [1]:
# Import components
import datetime as dt
import math

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib import pyplot as pltw
from sklearn import metrics

plt.rcParams["font.family"] = "Hiragino Maru Gothic Pro"
plt.style.use("ggplot")  # グラフのスタイル
plt.rcParams["figure.figsize"] = [12, 9]  # グラフサイズ設定

df = pd.read_csv("./datasets/fixed_battery_log_2.csv")
df["date"] = pd.to_datetime(df["date"])
users = df["User"].unique()
first = 1
last = 31


def Frequency_Distribution(data, bins, class_width=None):
    data = np.asarray(data)

    #     bins = np.arange(0, 110, 10)
    hist = np.histogram(data, bins)[0]
    cumsum = hist.cumsum()

    return pd.DataFrame(
        {
            "階級値": (bins[1:] + bins[:-1]) / 2,
            "度数": hist,
            "累積度数": cumsum,
            "相対度数": hist / cumsum[-1],
            "累積相対度数": cumsum / cumsum[-1],
        },
        index=pd.Index(
            [f"{bins[i]}以上{bins[i+1]}未満" for i in range(hist.size)], name="階級"
        ),
    )

In [2]:
df

Unnamed: 0,User,date,battery,state,weekday,time_to_num
0,Narita,2021-12-04 01:52:00,60,ON,5,6773
1,Narita,2021-12-04 02:22:00,73,OFF,5,8566
2,Narita,2021-12-04 07:06:00,71,ON,5,25564
3,Narita,2021-12-04 07:15:00,73,OFF,5,26105
4,Narita,2021-12-04 07:15:00,73,ON,5,26113
...,...,...,...,...,...,...
3487,Isshin,2022-02-18 14:38:00,100,OFF,4,52734
3488,Isshin,2022-02-18 16:46:00,42,ON,4,60411
3489,Isshin,2022-02-18 17:32:00,80,OFF,4,63150
3490,Isshin,2022-02-18 19:36:00,20,ON,4,70589


# def visualizer_roc_curve(gen, im, flag):

In [19]:
def check_sim_gen(t_last, q_last):
    diff_list_gen = []
    for h in range(len(users)):
        target = users[h]
        # print(target)
        first_date_cand = pd.date_range("2021-12-01", "2022-01-10")
        first_date = first_date_cand[np.random.randint(0, len(first_date_cand), 1)]
        last_date = first_date + dt.timedelta(days=t_last)
        # print(first_date, " : ", last_date)

        t_T = pd.DataFrame(
            df.loc[
                (df["User"] == target)
                & (df["state"] == "ON")
                & (df["date"] > first_date[0])
                & (df["date"] < last_date[0])
            ]["battery"].copy()
        )

        t_T = t_T.set_index(np.arange(0, len(t_T)))
        t_T = Frequency_Distribution(t_T, np.arange(0, 105, 5))

        t_Q_list = []
        first_date = last_date + dt.timedelta(days=1)
        last_date = first_date + dt.timedelta(days=q_last)
        # print(first_date, " : ", last_date)

        t_Q = pd.DataFrame(
            df.loc[
                (df["User"] == target)
                & (df["state"] == "ON")
                & (df["date"] > first_date[0])
                & (df["date"] < last_date[0])
            ]["battery"].copy()
        )
        t_Q = t_Q.set_index(np.arange(0, len(t_Q)))
        t_Q = Frequency_Distribution(t_Q, np.arange(0, 105, 5))

        diff = 0
        for g in range(len(t_T)):
            diff += (t_T["度数"][g] - t_Q["度数"][g]) ** 2
        diff = 1 / (math.sqrt(diff) + 1)
        diff_list_gen.append(diff)
    return diff_list_gen

In [20]:
def check_sim_im(t_last, q_last):
    diff_list_im = []
    for h in range(len(users)):
        target = users[h]
        # print(target)
        first_date_cand = pd.date_range("2021-12-01", "2022-01-10")
        first_date = first_date_cand[np.random.randint(0, len(first_date_cand), 1)]
        last_date = first_date + dt.timedelta(days=t_last)
        # print(first_date, " : ", last_date)

        t_T = pd.DataFrame(
            df.loc[
                (df["User"] == target)
                & (df["state"] == "ON")
                & (df["date"] > first_date[0])
                & (df["date"] < last_date[0])
            ]["battery"].copy()
        )

        t_T = t_T.set_index(np.arange(0, len(t_T)))
        t_T = Frequency_Distribution(t_T, np.arange(0, 105, 5))

        i_Q_list = []
        imposters = users.copy()
        imposters = imposters[~(imposters == target)]

        first_date = last_date + dt.timedelta(days=1)
        last_date = first_date + dt.timedelta(days=q_last)
        # print(first_date, " : ", last_date)

        i_Q = pd.DataFrame(
            df.loc[
                (df["User"] == np.random.choice(imposters))
                & (df["state"] == "ON")
                & (df["date"] > first_date[0])
                & (df["date"] < last_date[0])
            ]["battery"].copy()
        )
        i_Q = i_Q.set_index(np.arange(0, len(i_Q)))
        i_Q = Frequency_Distribution(i_Q, np.arange(0, 105, 5))

        diff = 0
        for g in range(len(t_T)):
            diff += (t_T["度数"][g] - i_Q["度数"][g]) ** 2
        diff = 1 / (math.sqrt(diff) + 1)
        diff_list_im.append(diff)
    return diff_list_im

In [31]:
def visualizer_roc_curve(gen, im, flag):
    grand_truth = np.concatenate((np.ones(len(gen)), np.zeros(len(im))))
    score = np.concatenate((gen, im))
    far, tpr, threshold = metrics.roc_curve(grand_truth, score)
    auc = metrics.auc(far, tpr)
    frr = 1.0 - tpr
    eer = far[np.where((far - frr) < 0)[0][-1]]
    if flag == 0:
        print(eer)
        plt.plot(far, tpr, label="ROC curve (area = %.2f)" % auc)
        plt.legend()
        plt.xlabel("FPR: False positive rate")
        plt.ylabel("TPR: True positive rate")
        plt.grid()
        plt.show()

        plt.plot(far, frr)
        plt.xlabel("FAR")
        plt.ylabel("FRR")
        plt.show()

        far_kai = pd.DataFrame(far[1:-1]).copy().set_index(threshold[1:-1])
        frr_kai = pd.DataFrame(frr[1:-1]).copy().set_index(threshold[1:-1])
        plt.plot(far_kai, color="r")
        plt.plot(frr_kai, color="b")
        plt.show()
    else:
        return eer

In [32]:
q_last_list = [7, 14, 21, 30]
t_day = 7

for a in q_last_list:
    print("T: %2d days, Q: %d days" % (t_day, a))
    eer_list = []
    for b in range(0, 100):
        Gen = check_sim_gen(t_day, a)
        Im = check_sim_im(t_day, a)
        eer_list.append(visualizer_roc_curve(Gen, Im, 1))
    print("Avg EER: %.4f" % np.mean(eer_list))

T:  7 days, Q: 7 days
Avg EER: 0.2475
T:  7 days, Q: 14 days
Avg EER: 0.2550
T:  7 days, Q: 21 days
Avg EER: 0.2900
T:  7 days, Q: 30 days
Avg EER: 0.3387


In [None]:
result_1 = [0, 0, 0, 0]
for i in range(0, 100):
    gen_7 = []
    gen_14 = []
    gen_21 = []
    gen_30 = []
    s = 7
    for a in range(0, 100):
        gen_7 += check_sim_gen(s, 7)
        gen_14 += check_sim_gen(s, 14)
        gen_21 += check_sim_gen(s, 21)
        gen_30 += check_sim_gen(s, 30)

    im_7 = []
    im_14 = []
    im_21 = []
    im_30 = []

    for b in range(0, 100):
        im_7 += check_sim_im(s, 7)
        im_14 += check_sim_im(s, 14)
        im_21 += check_sim_im(s, 21)
        im_30 += check_sim_im(s, 30)

    result_1[0] += visualizer_roc_curve(gen_7, im_7, 1)
    result_1[1] += visualizer_roc_curve(gen_14, im_14, 1)
    result_1[2] += visualizer_roc_curve(gen_21, im_21, 1)
    result_1[3] += visualizer_roc_curve(gen_30, im_30, 1)

In [None]:
result_2 = [0, 0, 0, 0]
for i in range(0, 100):
    gen_7 = []
    gen_14 = []
    gen_21 = []
    gen_30 = []
    s = 14
    for a in range(0, 100):
        gen_7 += check_sim_gen(s, 7)
        gen_14 += check_sim_gen(s, 14)
        gen_21 += check_sim_gen(s, 21)
        gen_30 += check_sim_gen(s, 30)

    im_7 = []
    im_14 = []
    im_21 = []
    im_30 = []

    for b in range(0, 100):
        im_7 += check_sim_im(s, 7)
        im_14 += check_sim_im(s, 14)
        im_21 += check_sim_im(s, 21)
        im_30 += check_sim_im(s, 30)

    result_2[0] += visualizer_roc_curve(gen_7, im_7, 1)
    result_2[1] += visualizer_roc_curve(gen_14, im_14, 1)
    result_2[2] += visualizer_roc_curve(gen_21, im_21, 1)
    result_2[3] += visualizer_roc_curve(gen_30, im_30, 1)

In [None]:
result_3 = [0, 0, 0, 0]
for i in range(0, 100):
    gen_7 = []
    gen_14 = []
    gen_21 = []
    gen_30 = []
    s = 21
    for a in range(0, 100):
        gen_7 += check_sim_gen(s, 7)
        gen_14 += check_sim_gen(s, 14)
        gen_21 += check_sim_gen(s, 21)
        gen_30 += check_sim_gen(s, 30)

    im_7 = []
    im_14 = []
    im_21 = []
    im_30 = []

    for b in range(0, 100):
        im_7 += check_sim_im(s, 7)
        im_14 += check_sim_im(s, 14)
        im_21 += check_sim_im(s, 21)
        im_30 += check_sim_im(s, 30)

    result_3[0] += visualizer_roc_curve(gen_7, im_7, 1)
    result_3[1] += visualizer_roc_curve(gen_14, im_14, 1)
    result_3[2] += visualizer_roc_curve(gen_21, im_21, 1)
    result_3[3] += visualizer_roc_curve(gen_30, im_30, 1)

In [None]:
result_4 = [0, 0, 0, 0]
for i in range(0, 100):
    gen_7 = []
    gen_14 = []
    gen_21 = []
    gen_30 = []
    s = 30
    for a in range(0, 100):
        gen_7 += check_sim_gen(s, 7)
        gen_14 += check_sim_gen(s, 14)
        gen_21 += check_sim_gen(s, 21)
        gen_30 += check_sim_gen(s, 30)

    im_7 = []
    im_14 = []
    im_21 = []
    im_30 = []

    for b in range(0, 100):
        im_7 += check_sim_im(s, 7)
        im_14 += check_sim_im(s, 14)
        im_21 += check_sim_im(s, 21)
        im_30 += check_sim_im(s, 30)

    result_4[0] += visualizer_roc_curve(gen_7, im_7, 1)
    result_4[1] += visualizer_roc_curve(gen_14, im_14, 1)
    result_4[2] += visualizer_roc_curve(gen_21, im_21, 1)
    result_4[3] += visualizer_roc_curve(gen_30, im_30, 1)

In [164]:
import time

t1 = time.time()

# 計測したい処理
hoge = []
hoge2 = []
for a in range(0, 1000):
    hoge += check_sim_gen(7, 21)
    hoge2 += check_sim_gen(21, 7)
foo = []
foo2 = []
for b in range(0, 1000):
    foo += check_sim_im(7, 21)
    foo2 += check_sim_im(21, 7)

print(visualizer_roc_curve(hoge, foo, 1))
print(visualizer_roc_curve(hoge2, foo2, 1))

# 処理後の時刻
t2 = time.time()

# 経過時間を表示
elapsed_time = t2 - t1
print(f"経過時間：{elapsed_time}")

0.403375
0.3765
経過時間：236.23642492294312


In [4]:
df_corr = df_corr = df.corr()
print(df_corr)

              battery   weekday  time_to_num
battery      1.000000 -0.034259     0.000172
weekday     -0.034259  1.000000     0.014776
time_to_num  0.000172  0.014776     1.000000
