In [9]:
# Import components
import datetime as dt
import math

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

plt.rcParams["font.family"] = "Hiragino Maru Gothic Pro"
plt.style.use("ggplot")  # グラフのスタイル
plt.rcParams["figure.figsize"] = [12, 9]  # グラフサイズ設定

df = pd.read_csv("./datasets/fixed_battery_log_2.csv")
df["date"] = pd.to_datetime(df["date"])
users = df["User"].unique()

In [None]:
target = ["time_to_num", "battery"]
cover_name = ["a", "b", "c", "d", "e", "f", "g", "h"]

for i in range(len(users)):
    user = users[i]
    x_range = np.arange(0, 86400, 3600)
    color = ["red", "green", "blue", "yellow"]
    labels = ["a", "b", "c", "d", "e", "f", "g", "h"]
    fig = plt.figure(figsize=(10, 3))

    first = 1
    last = 31

    ax1 = fig.add_subplot(311, xticks=x_range, ylim=(0, 10))
    ax2 = fig.add_subplot(312, xticks=x_range, ylim=(0, 10))

    ax1.hist(
        df.loc[
            (df["User"] == user)
            & (df["state"] == "ON")
            & (df["date"] > dt.datetime(2021, 12, first))
            & (df["date"] < dt.datetime(2021, 12, last))
        ][target[0]],
        bins=24,
        range=(0, 86400),
        ec="black",
        color=color[0],
        alpha=0.7,
    )

    ax2.hist(
        df.loc[
            (df["User"] == user)
            & (df["state"] == "ON")
            & (df["date"] > dt.datetime(2022, 1, first))
            & (df["date"] < dt.datetime(2022, 1, last))
        ][target[0]],
        bins=24,
        range=(0, 86400),
        ec="black",
        color=color[1],
        alpha=0.7,
    )

    fig.suptitle(cover_name[i])
    # fig.savefig("bs_2_results/2months_{}_simple_compare_time_to_num.jpg".format(user))
    fig.show()

In [None]:
target = ["time_to_num", "battery"]
cover_name = ["a", "b", "c", "d", "e", "f", "g", "h"]


for i in range(len(users)):
    user = users[i]
    x_range = np.arange(0, 105, 5)
    color = ["red", "green", "blue", "yellow"]
    labels = ["a", "b", "c", "d", "e", "f", "g", "h"]
    fig = plt.figure(figsize=(10, 3))

    first = 1
    last = 31

    ax1 = fig.add_subplot(311, xticks=x_range, ylim=(0, 20))
    ax2 = fig.add_subplot(312, xticks=x_range, ylim=(0, 20))

    ax1.hist(
        df.loc[
            (df["User"] == user)
            & (df["state"] == "ON")
            & (df["date"] > dt.datetime(2021, 12, first))
            & (df["date"] < dt.datetime(2021, 12, last))
        ][target[1]],
        bins=20,
        range=(0, 100),
        ec="black",
        color=color[0],
        alpha=0.7,
    )

    ax2.hist(
        df.loc[
            (df["User"] == user)
            & (df["state"] == "ON")
            & (df["date"] > dt.datetime(2022, 1, first))
            & (df["date"] < dt.datetime(2022, 1, last))
        ][target[1]],
        bins=20,
        range=(0, 100),
        ec="black",
        color=color[1],
        alpha=0.7,
    )

    fig.suptitle(cover_name[i])
    # fig.savefig("bs_2_results/2months_{}_simple_compare_BL.jpg".format(user))
    fig.show()

In [4]:
def Frequency_Distribution(data, bins, class_width=None):
    data = np.asarray(data)

    #     bins = np.arange(0, 110, 10)
    hist = np.histogram(data, bins)[0]
    cumsum = hist.cumsum()

    return pd.DataFrame(
        {
            "階級値": (bins[1:] + bins[:-1]) / 2,
            "度数": hist,
            "累積度数": cumsum,
            "相対度数": hist / cumsum[-1],
            "累積相対度数": cumsum / cumsum[-1],
        },
        index=pd.Index(
            [f"{bins[i]}以上{bins[i+1]}未満" for i in range(hist.size)], name="階級"
        ),
    )

In [6]:
diff_list = {}


def lookup(dict, x):
    for k, v in dict.items():
        if x == v:
            return k


for j in range(len(users)):
    target = users[j]
    t_T = pd.DataFrame(
        df.loc[
            (df["User"] == target)
            & (df["state"] == "ON")
            & (df["date"] > dt.datetime(2021, 12, first))
            & (df["date"] < dt.datetime(2021, 12, last))
        ]["battery"].copy()
    )
    t_T = t_T.set_index(np.arange(0, len(t_T)))
    t_T = Frequency_Distribution(t_T, np.arange(0, 105, 5))

    diff_user_all = []

    for i in range(len(users)):
        c_user = users[i]
        c_T = pd.DataFrame(
            df.loc[
                (df["User"] == c_user)
                & (df["state"] == "ON")
                & (df["date"] > dt.datetime(2022, 1, first))
                & (df["date"] < dt.datetime(2022, 1, last))
            ]["battery"].copy()
        )
        c_T = c_T.set_index(np.arange(0, len(c_T)))
        c_T = Frequency_Distribution(c_T, np.arange(0, 105, 5))
        diff = 0

        for g in range(len(t_T)):
            diff += (t_T["度数"][g] - c_T["度数"][g]) ** 2
        diff = 1 / (math.sqrt(diff) + 1)
        names = target + ":" + c_user
        diff_user_all.append(diff)
        diff_list.update({names: diff})
        # print("%-8s : %-8s → %.4f" % (target, c_user, diff))

    print(
        "MAX_SIM: "
        + "%-17s : %.4f" % (lookup(diff_list, max(diff_user_all)), max(diff_user_all))
    )

MAX_SIM: Narita:Shunya     : 0.0753
MAX_SIM: Yusuke:Yusuke     : 0.1035
MAX_SIM: Ayumi:Ayumi       : 0.0777
MAX_SIM: Daiki:Ayumi       : 0.0595
MAX_SIM: Shunya:Shunya     : 0.0710
MAX_SIM: Hayate:Hayate     : 0.0790
MAX_SIM: Moriyama:Moriyama : 0.0708
MAX_SIM: Isshin:Isshin     : 0.0860


In [7]:
diff_list = {}


def lookup(dict, x):
    for k, v in dict.items():
        if x == v:
            return k


for j in range(len(users)):
    target = users[j]
    t_T = pd.DataFrame(
        df.loc[
            (df["User"] == target)
            & (df["state"] == "ON")
            & (df["date"] > dt.datetime(2021, 12, first))
            & (df["date"] < dt.datetime(2021, 12, last))
        ]["time_to_num"].copy()
    )
    t_T = t_T.set_index(np.arange(0, len(t_T)))
    t_T = Frequency_Distribution(t_T, np.arange(0, 86400, 3600))

    diff_user_all = []

    for i in range(len(users)):
        c_user = users[i]
        c_T = pd.DataFrame(
            df.loc[
                (df["User"] == c_user)
                & (df["state"] == "ON")
                & (df["date"] > dt.datetime(2022, 1, first))
                & (df["date"] < dt.datetime(2022, 1, last))
            ]["time_to_num"].copy()
        )
        c_T = c_T.set_index(np.arange(0, len(c_T)))
        c_T = Frequency_Distribution(c_T, np.arange(0, 86400, 3600))
        diff = 0

        for g in range(len(t_T)):
            diff += (t_T["度数"][g] - c_T["度数"][g]) ** 2
        diff = 1 / (math.sqrt(diff) + 1)
        names = target + ":" + c_user
        diff_user_all.append(diff)
        diff_list.update({names: diff})
        # print("%-8s : %-8s → %.4f" % (target, c_user, diff))
    # if max(diff_user_all) in diff_list.values():
    print(
        "MAX_SIM: "
        + "%-17s : %.4f" % (lookup(diff_list, max(diff_user_all)), max(diff_user_all))
    )

MAX_SIM: Narita:Hayate     : 0.0767
MAX_SIM: Yusuke:Yusuke     : 0.0853
MAX_SIM: Ayumi:Ayumi       : 0.1029
MAX_SIM: Daiki:Yusuke      : 0.0612
MAX_SIM: Shunya:Shunya     : 0.0979
MAX_SIM: Hayate:Hayate     : 0.1011
MAX_SIM: Ayumi:Narita      : 0.0676
MAX_SIM: Isshin:Yusuke     : 0.0812
