In [1]:
# Import components
import datetime as dt
import math
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pmdarima as pm
import seaborn as sns
from matplotlib import pyplot as pltw
from pmdarima import arima, datasets, model_selection, utils
from scipy.spatial import distance
from sklearn import metrics
from sklearn.metrics import mean_absolute_error
from sklearn.neighbors import LocalOutlierFactor
from statsmodels.tsa.seasonal import STL

warnings.simplefilter("ignore")

plt.rcParams["font.family"] = "Hiragino Maru Gothic Pro"
plt.style.use("ggplot")  # グラフのスタイル
plt.rcParams["figure.figsize"] = [12, 9]  # グラフサイズ設定

In [2]:
def DDTW(Q, C):
    """
    Args:
        Q (np.array or list): 一つ目の波形
        C (np.array or list): 二つ目の波形

    Returns:
        γ_mat (np.array): DDTWを計算するための行列
        arrows (np.array): 各時点で←・↙︎・↓のどのマスが最小だったかを示す記号を保存する行列
        ddtw (float): DDTW
    """
    Q, C = np.array(Q), np.array(C)
    assert Q.shape[0] > 3, "一つ目の波形のフォーマットがおかしいです。"
    assert C.shape[0] > 3, "二つ目の波形のフォーマットがおかしいです。"

    # 3.1 Algorithm details の式
    def _Dq(q):
        return ((q[1] - q[0]) + (q[2] - q[0]) / 2) / 2

    # 二つの時点間の距離
    def _γ(x, y):
        return abs(_Dq(x) - _Dq(y))

    # 各変数
    n, m = Q.shape[0] - 2, C.shape[0] - 2
    γ_mat = np.zeros((n, m))
    arrows = np.array(np.zeros((n, m)), dtype=str)  # 可視化用の行列でDDTWの値とは無関係

    # 一番左下のスタート地点
    γ_mat[0, 0] = _γ(Q[0:3], C[0:3])

    # 一列目を計算
    for i in range(1, n):
        γ_mat[i, 0] = γ_mat[i - 1, 0] + _γ(Q[i - 1 : i + 2], C[0:3])
        arrows[i, 0] = "↓"

    # 一行目を計算
    for j in range(1, m):
        γ_mat[0, j] = γ_mat[0, j - 1] + _γ(Q[0:3], C[j - 1 : j + 2])
        arrows[0, j] = "←"

    # 残りのマスを計算
    for i in range(1, n):
        for j in range(1, m):
            # DDTWを求めるためのマトリクスを埋める
            d_ij = _γ(Q[i - 1 : i + 2], C[j - 1 : j + 2])
            γ_mat[i, j] = d_ij + np.min(
                [γ_mat[i - 1, j - 1], γ_mat[i - 1, j], γ_mat[i, j - 1]]
            )

            # 矢印を書くための行列(DDTWの値とは関係無い処理)
            if (
                square_index := np.argmin(
                    [γ_mat[i - 1, j - 1], γ_mat[i - 1, j], γ_mat[i, j - 1]]
                )
            ) == 0:
                arrows[i, j] = "↙︎"
            elif square_index == 1:
                arrows[i, j] = "↓"
            elif square_index == 2:
                arrows[i, j] = "←"

    return γ_mat, arrows, γ_mat[n - 1, m - 1]

In [3]:
df = pd.read_csv("../datasets/fixed_battery_log_2_copy.csv")
df["date"] = pd.to_datetime(df["date"])
users = df["User"].unique()

In [4]:
first_date_cand = pd.date_range("2022-01-01", "2022-02-01")
score_G_raw = []
score_I_raw = []

raw_i_Q_avg_q = []
raw_t_T_avg_q = []
raw_t_Q_avg_q = []

clf = LocalOutlierFactor(n_neighbors=2)
days = [7, 14, 21, 30]
weekdays = df["weekday"].unique()

for a in range(len(days)):
    diff_day_1 = days[a]
    for b in range(len(days)):
        diff_day_2 = days[b]
        for h in range(len(users)):
            user = users[h]
            for i in range(0, 101):
                first_date = first_date_cand[
                    np.random.randint(0, len(first_date_cand), 1)
                ]
                last_date = first_date + dt.timedelta(days=diff_day_1)
                weekday = np.random.choice(weekdays)
                imposters = users.copy()
                imposters = imposters[~(imposters == user)]
                imposter = np.random.choice(imposters)
                t_Q_O = pd.DataFrame(
                    df.loc[
                        (df["User"] == user)
                        & (df["state"] == "ON")
                        & (df["date"] > first_date[0] + dt.timedelta(seconds=1))
                        & (df["date"] < last_date[0])
                        & (df["weekday"] == weekday)
                    ][["battery", "time_to_num"]].copy()
                )
                t_Q_O = t_Q_O.sort_values("time_to_num").reset_index(drop=True)

                t_T_O = pd.DataFrame(
                    df.loc[
                        (df["User"] == user)
                        & (df["state"] == "ON")
                        & (df["date"] > (first_date[0] - dt.timedelta(days=diff_day_2)))
                        & (df["date"] < (first_date[0]))
                        & (df["weekday"] == weekday)
                    ][["battery", "time_to_num"]].copy()
                )
                t_T_O = t_T_O.sort_values("time_to_num").reset_index(drop=True)

                i_Q_O = pd.DataFrame(
                    df.loc[
                        (df["User"] == imposter)
                        & (df["state"] == "ON")
                        & (df["date"] > first_date[0] + dt.timedelta(seconds=1))
                        & (df["date"] < last_date[0])
                        & (df["weekday"] == weekday)
                    ][["battery", "time_to_num"]].copy()
                )
                i_Q_O = i_Q_O.sort_values("time_to_num").reset_index(drop=True)

                raw_t_Q_q = []
                raw_t_T_q = []
                raw_i_Q_q = []
                for j in range(0, 24):
                    # print("{}:H".format(j))
                    raw_t_Q = t_Q_O.loc[
                        (t_Q_O["time_to_num"] > 3600 * j)
                        & (t_Q_O["time_to_num"] < 3600 * (j + 1))
                    ]["battery"]
                    if len(raw_t_Q) != 0:
                        raw_t_Q_q.append(raw_t_Q.values.mean())
                    else:
                        raw_t_Q_q.append(0)

                    raw_t_T = t_T_O.loc[
                        (t_T_O["time_to_num"] > 3600 * j)
                        & (t_T_O["time_to_num"] < 3600 * (j + 1))
                    ]["battery"]
                    if len(raw_t_T) != 0:
                        raw_t_T_q.append(raw_t_T.values.mean())
                    else:
                        raw_t_T_q.append(0)

                    raw_i_Q = i_Q_O.loc[
                        (i_Q_O["time_to_num"] > 3600 * j)
                        & (i_Q_O["time_to_num"] < 3600 * (j + 1))
                    ]["battery"]
                    if len(raw_i_Q) != 0:
                        raw_i_Q_q.append(raw_i_Q.values.mean())
                    else:
                        raw_i_Q_q.append(0)
                try:
                    hoge, foo, ddtw_1 = DDTW(raw_t_T_q, raw_t_Q_q)
                    score_G_raw.append(ddtw_1)
                    hoge, foo, ddtw_2 = DDTW(raw_t_T_q, raw_i_Q_q)
                    score_I_raw.append(ddtw_2)
                except:
                    pass
        grand_truth_raw = np.concatenate(
            (np.ones(len(score_G_raw)), np.zeros(len(score_I_raw)))
        )
        score_raw = np.concatenate((score_G_raw, score_I_raw))
        far, tpr, threshold = metrics.roc_curve(grand_truth_raw, score_raw)
        auc = metrics.auc(far, tpr)
        frr = 1.0 - tpr
        eer_raw = far[np.where((far - frr) < 0)[0][-1]]
        # print("RAW")
        print(eer_raw)
print("FIN")

0.5420792079207921
0.5383663366336634
0.5445544554455446
0.5454826732673267
0.5457920792079208
0.5552805280528053
0.5650636492220651
0.5649752475247525
0.5638063806380638
0.5669554455445545
0.5749324932493249
0.5783828382838284
0.5731150038080731
0.5741690240452617
0.5760726072607261
0.5774288366336634
FIN


In [5]:
first_date_cand = pd.date_range("2022-01-01", "2022-02-01")
score_G_raw = []
score_I_raw = []

raw_i_Q_avg_q = []
raw_t_T_avg_q = []
raw_t_Q_avg_q = []

clf = LocalOutlierFactor(n_neighbors=2)
days = [7, 14, 21, 30]
weekdays = df["weekday"].unique()

for a in range(len(days)):
    diff_day_1 = days[a]
    for b in range(len(days)):
        diff_day_2 = days[b]
        for h in range(len(users)):
            user = users[h]
            for i in range(0, 101):
                first_date = first_date_cand[
                    np.random.randint(0, len(first_date_cand), 1)
                ]
                last_date = first_date + dt.timedelta(days=diff_day_1)
                weekday = np.random.choice(weekdays)
                imposters = users.copy()
                imposters = imposters[~(imposters == user)]
                imposter = np.random.choice(imposters)
                t_Q_O = pd.DataFrame(
                    df.loc[
                        (df["User"] == user)
                        & (df["state"] == "ON")
                        & (df["date"] > first_date[0] + dt.timedelta(seconds=1))
                        & (df["date"] < last_date[0])
                        & (df["weekday"] == weekday)
                    ][["battery", "time_to_num"]].copy()
                )
                t_Q_O = t_Q_O.sort_values("time_to_num").reset_index(drop=True)
                q1 = t_Q_O.battery.quantile(0.25)
                q3 = t_Q_O.battery.quantile(0.75)
                t_Q_O = t_Q_O.query("@q1 < battery < @q3")

                t_T_O = pd.DataFrame(
                    df.loc[
                        (df["User"] == user)
                        & (df["state"] == "ON")
                        & (df["date"] > (first_date[0] - dt.timedelta(days=diff_day_2)))
                        & (df["date"] < (first_date[0]))
                        & (df["weekday"] == weekday)
                    ][["battery", "time_to_num"]].copy()
                )
                t_T_O = t_T_O.sort_values("time_to_num").reset_index(drop=True)
                q1 = t_T_O.battery.quantile(0.25)
                q3 = t_T_O.battery.quantile(0.75)
                t_T_O = t_T_O.query("@q1 < battery < @q3")

                i_Q_O = pd.DataFrame(
                    df.loc[
                        (df["User"] == imposter)
                        & (df["state"] == "ON")
                        & (df["date"] > first_date[0] + dt.timedelta(seconds=1))
                        & (df["date"] < last_date[0])
                        & (df["weekday"] == weekday)
                    ][["battery", "time_to_num"]].copy()
                )
                i_Q_O = i_Q_O.sort_values("time_to_num").reset_index(drop=True)
                q1 = i_Q_O.battery.quantile(0.25)
                q3 = i_Q_O.battery.quantile(0.75)
                i_Q_O = i_Q_O.query("@q1 < battery < @q3")

                raw_t_Q_q = []
                raw_t_T_q = []
                raw_i_Q_q = []
                for j in range(0, 24):
                    # print("{}:H".format(j))
                    raw_t_Q = t_Q_O.loc[
                        (t_Q_O["time_to_num"] > 3600 * j)
                        & (t_Q_O["time_to_num"] < 3600 * (j + 1))
                    ]["battery"]
                    if len(raw_t_Q) != 0:
                        raw_t_Q_q.append(raw_t_Q.values.mean())
                    else:
                        raw_t_Q_q.append(0)

                    raw_t_T = t_T_O.loc[
                        (t_T_O["time_to_num"] > 3600 * j)
                        & (t_T_O["time_to_num"] < 3600 * (j + 1))
                    ]["battery"]
                    if len(raw_t_T) != 0:
                        raw_t_T_q.append(raw_t_T.values.mean())
                    else:
                        raw_t_T_q.append(0)

                    raw_i_Q = i_Q_O.loc[
                        (i_Q_O["time_to_num"] > 3600 * j)
                        & (i_Q_O["time_to_num"] < 3600 * (j + 1))
                    ]["battery"]
                    if len(raw_i_Q) != 0:
                        raw_i_Q_q.append(raw_i_Q.values.mean())
                    else:
                        raw_i_Q_q.append(0)
                try:
                    hoge, foo, ddtw_1 = DDTW(raw_t_T_q, raw_t_Q_q)
                    score_G_raw.append(ddtw_1)
                    hoge, foo, ddtw_2 = DDTW(raw_t_T_q, raw_i_Q_q)
                    score_I_raw.append(ddtw_2)
                except:
                    pass
        grand_truth_raw = np.concatenate(
            (np.ones(len(score_G_raw)), np.zeros(len(score_I_raw)))
        )
        score_raw = np.concatenate((score_G_raw, score_I_raw))
        far, tpr, threshold = metrics.roc_curve(grand_truth_raw, score_raw)
        auc = metrics.auc(far, tpr)
        frr = 1.0 - tpr
        eer_raw = far[np.where((far - frr) < 0)[0][-1]]
        # print("Q13")
        print(eer_raw)
print("FIN")

0.5148514851485149
0.5297029702970297
0.5317656765676567
0.5222772277227723
0.5282178217821782
0.5307343234323433
0.535007072135785
0.5357363861386139
0.5298404840484049
0.5341584158415842
0.5361161116111611
0.5372318481848185
0.5368431073876618
0.5391619519094767
0.5412541254125413
0.5455600247524752
FIN


In [None]:
first_date_cand = pd.date_range("2022-01-01", "2022-02-01")
score_G_raw = []
score_I_raw = []

raw_i_Q_avg_q = []
raw_t_T_avg_q = []
raw_t_Q_avg_q = []

clf = LocalOutlierFactor(n_neighbors=2)
days = [7, 14, 21, 30]
weekdays = df["weekday"].unique()

for a in range(len(days)):
    diff_day_1 = days[a]
    for b in range(len(days)):
        diff_day_2 = days[b]
        for h in range(len(users)):
            user = users[h]
            for i in range(0, 101):
                first_date = first_date_cand[
                    np.random.randint(0, len(first_date_cand), 1)
                ]
                last_date = first_date + dt.timedelta(days=diff_day_1)
                weekday = np.random.choice(weekdays)
                imposters = users.copy()
                imposters = imposters[~(imposters == user)]
                imposter = np.random.choice(imposters)
                t_Q_O = pd.DataFrame(
                    df.loc[
                        (df["User"] == user)
                        & (df["state"] == "ON")
                        & (df["date"] > first_date[0] + dt.timedelta(seconds=1))
                        & (df["date"] < last_date[0])
                        # & (df["weekday"] == weekday)
                    ][["battery", "time_to_num"]].copy()
                )
                t_Q_O = t_Q_O.sort_values("time_to_num").reset_index(drop=True)
                t_Q = np.array(t_Q_O)
                t_T_O = pd.DataFrame(
                    df.loc[
                        (df["User"] == user)
                        & (df["state"] == "ON")
                        & (df["date"] > (first_date[0] - dt.timedelta(days=diff_day_2)))
                        & (df["date"] < (first_date[0]))  # & (df["weekday"] == weekday)
                    ][["battery", "time_to_num"]].copy()
                )
                t_T_O = t_T_O.sort_values("time_to_num").reset_index(drop=True)
                t_T = np.array(t_T_O)
                i_Q_O = pd.DataFrame(
                    df.loc[
                        (df["User"] == imposter)
                        & (df["state"] == "ON")
                        & (df["date"] > first_date[0] + dt.timedelta(seconds=1))
                        & (df["date"] < last_date[0])
                        # & (df["weekday"] == weekday)
                    ][["battery", "time_to_num"]].copy()
                )
                i_Q_O = i_Q_O.sort_values("time_to_num").reset_index(drop=True)
                i_Q = np.array(i_Q_O)
                raw_t_Q_q = []
                raw_t_T_q = []
                raw_i_Q_q = []
                for j in range(0, 24):
                    try:
                        pred = clf.fit_predict(t_Q)
                        raw_t_Q = t_Q_O.loc[
                            (t_Q_O["time_to_num"] > 3600 * j)
                            & (t_Q_O["time_to_num"] < 3600 * (j + 1))
                        ]["battery"]

                        raw_t_Q = pd.DataFrame(
                            [
                                t_Q[:, 0][np.where(pred > 0)],
                                t_Q[:, 1][np.where(pred > 0)],
                            ]
                        ).T
                        raw_t_Q = raw_t_Q.rename(
                            columns={0: "time_to_num", 1: "battery"}
                        )
                        # print(raw_t_Q)
                        raw_t_Q_q.append(raw_t_Q.values.mean())
                    except:
                        raw_t_Q_q.append(0)
                    print(raw_t_Q_q)

                    try:
                        pred = clf.fit_predict(t_T)
                        raw_t_T = t_T_O.loc[
                            (t_T_O["time_to_num"] > 3600 * j)
                            & (t_T_O["time_to_num"] < 3600 * (j + 1))
                        ]["battery"]
                        raw_t_T = pd.DataFrame(
                            [
                                t_T[:, 0][np.where(pred > 0)],
                                t_T[:, 1][np.where(pred > 0)],
                            ]
                        ).T
                        raw_t_T = raw_t_T.rename(
                            columns={0: "time_to_num", 1: "battery"}
                        )
                        # print(raw_t_T)
                        raw_t_T_q.append(raw_t_T.values.mean())
                    except:
                        raw_t_T_q.append(0)

                    print(raw_t_T_q)
                    try:
                        pred = clf.fit_predict(i_Q_O)
                        raw_i_Q = i_Q_O.loc[
                            (i_Q_O["time_to_num"] > 3600 * j)
                            & (i_Q_O["time_to_num"] < 3600 * (j + 1))
                        ]["battery"]
                        raw_i_Q = pd.DataFrame(
                            [
                                i_Q[:, 0][np.where(pred > 0)],
                                i_Q[:, 1][np.where(pred > 0)],
                            ]
                        ).T
                        raw_i_Q = raw_i_Q.rename(
                            columns={0: "time_to_num", 1: "battery"}
                        )
                        # print(raw_i_Q)
                        raw_i_Q_q.append(raw_i_Q.values.mean())
                    except:
                        raw_i_Q_q.append(0)
                    print(raw_i_Q_q)
                try:
                    hoge, foo, ddtw_1 = DDTW(raw_t_T_q, raw_t_Q_q)
                    score_G_raw.append(ddtw_1)
                    hoge, foo, ddtw_2 = DDTW(raw_t_T_q, raw_i_Q_q)
                    score_I_raw.append(ddtw_2)
                except:
                    pass
        grand_truth_raw = np.concatenate(
            (np.ones(len(score_G_raw)), np.zeros(len(score_I_raw)))
        )
        score_raw = np.concatenate((score_G_raw, score_I_raw))
        far, tpr, threshold = metrics.roc_curve(grand_truth_raw, score_raw)
        auc = metrics.auc(far, tpr)
        frr = 1.0 - tpr
        eer_raw = far[np.where((far - frr) < 0)[0][-1]]
        # print("lof")
        print(eer_raw)
print("FIN")