In [None]:
from itertools import islice
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import os
from tqdm import tqdm 
import sys
import datetime
from scipy.optimize import curve_fit
from scipy import stats
import seaborn as sns

# カレントディレクトリを.pyと合わせるために以下を実行
from pathlib import Path
if Path.cwd().name == "notebook":
    os.chdir("..")

# Windows MatplotlibのデフォルトフォントをMeiryoに設定
plt.rcParams['font.family'] = 'Meiryo'


# 親ディレクトリをsys.pathに追加
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(os.getcwd()))))
from utils.point_history_utils import open_point_history_per_shop, aggregate_date
from RS_filliing_rate.RS_filling_time_detecter import extract_low_recycling_days, extract_high_recycling_days


# 設定
pd.set_option('display.max_rows', 500)
pd.set_option('display.min_rows', 500)
pd.set_option('display.max_columns', 500)

In [None]:
def calc_filling_rate(df, max_filling_hour, kg_threshold):
    """
    各行の'filling_rate'を計算する
    args:
        df: dataframe
        max_filling_hour: float
        kg_threshold: float
    return:
        df2: dataframe    
    """
    kg_threshold = 1300
    df_high = extract_high_recycling_days(df, kg_threshold)
    df_high.loc[df_high['interval_compared_to_next'] > max_filling_hour, 'filling_rate'] = 1

    # 'filling_rate'が1で、次の行の'use_date'が20時以降　かつ　次の行の'use_date'が同じ日の場合、その行の'filling_rate'を1にする
    for i in df_high.index[:-1]:
        if df_high.loc[i, 'filling_rate'] == 1:
            next_index = df_high.index[df_high.index.get_loc(i) + 1]
            if df_high.loc[i, 'use_date'].day == df_high.loc[next_index, 'use_date'].day and df_high.loc[next_index, 'use_date'].hour >= 20:
                df_high.loc[next_index, 'filling_rate'] = 1
    df2 = pd.merge(df, df_high[['use_date', 'filling_rate']], on='use_date', how='left')
    
    # 1700kg以上の日は最終行の'filling_rate'を1にする
    df_high = extract_high_recycling_days(df, 1700)
    df_high['filling_rate'] = np.nan
    # 次の行の'use_date'が別の日の場合、その行の'filling_rate'を1にする
    for i in df_high.index[:-1]:
        next_index = df_high.index[df_high.index.get_loc(i) + 1]
        if df_high.loc[i, 'use_date'].day != df_high.loc[next_index, 'use_date'].day:
            df_high.loc[i, 'filling_rate'] = 1
    index_list = df_high[df_high['filling_rate'] == 1].index
    df2.loc[index_list, 'filling_rate'] = 1

    aggregate_df = aggregate_date(df2)
    for date, max_amount_kg, filling_rate in zip(aggregate_df['年月日'], aggregate_df['amount_kg'], aggregate_df['filling_rate']):
        if filling_rate != 1.0:
            max_amount_kg = 1700
        total_amount_kg_per_day = 0
        for i in df2[df2['年月日'] == date].index:
            total_amount_kg_per_day += df2.loc[i, 'amount_kg']
            df2.loc[i, 'total_amount_kg_per_day'] = total_amount_kg_per_day
            df2.loc[i, 'filling_rate'] = total_amount_kg_per_day / max_amount_kg
    
    return df2


In [None]:
df_shop_list = pd.read_csv('data/input/shop_list.csv', encoding="utf-8")
for super, shop_name_1, max_filling_hour in zip(df_shop_list['super'], df_shop_list['shop_name_1'], df_shop_list['max_filling_hour']):
    break
#     df_temp = open_point_history_per_shop(super, shop_name_1)
#     # super, shop_name_1が一致するdf_shop_listの行に、prefectures,municipality,address, store_latitude,store_longitude列を追加
#     df_shop_list.loc[(df_shop_list['super'] == super) & (df_shop_list['shop_name_1'] == shop_name_1), 'prefectures'] = df_temp['prefectures'].unique()[0]
#     df_shop_list.loc[(df_shop_list['super'] == super) & (df_shop_list['shop_name_1'] == shop_name_1), 'municipality'] = df_temp['municipality'].unique()[0]
#     df_shop_list.loc[(df_shop_list['super'] == super) & (df_shop_list['shop_name_1'] == shop_name_1), 'address'] = df_temp['address'].unique()[0]
#     df_shop_list.loc[(df_shop_list['super'] == super) & (df_shop_list['shop_name_1'] == shop_name_1), 'store_latitude'] = df_temp['store_latitude'].unique()[0]
#     df_shop_list.loc[(df_shop_list['super'] == super) & (df_shop_list['shop_name_1'] == shop_name_1), 'store_longitude'] = df_temp['store_longitude'].unique()[0]


In [None]:
df = open_point_history_per_shop(super, shop_name_1)
df = calc_filling_rate(df, max_filling_hour,kg_threshold=1300)
aggregated_df = aggregate_date(df)
aggregated_df.loc[:,'amount_kg'].mean()
#aggregated_df.loc[aggregated_df['年月日'].dt.year >= 2023,'amount_kg'].mean()

In [None]:
plt.plot(aggregated_df.loc[:,'amount_kg'])

In [None]:
df2 = open_point_history_per_shop('ヨークベニマル', '南中山店')
df2 = calc_filling_rate(df2, max_filling_hour,kg_threshold=1300)
aggregated_df2 = aggregate_date(df2)
aggregated_df2.loc[:,'amount_kg'].mean()
#aggregated_df2.loc[aggregated_df2['年月日'].dt.year == 2022,'amount_kg'].mean()

In [None]:
aggregated_df3 = pd.concat([aggregated_df,aggregated_df2]).reset_index(drop=True)

In [None]:
aggregated_df3[:2]

In [None]:
plt.plot(aggregated_df2.loc[:,'amount_kg'])

In [None]:

df[140:145]

In [None]:
aggregated_df = aggregate_date(df)
aggregated_df[:5]

In [None]:
# aggregated_dfのamount_kgの平均
aggregated_df['amount_kg'].mean()

In [None]:
aggregated_df['filling_rate'].fillna(0, inplace=True)

In [None]:
fig, ax = plt.subplots()
ax.plot(aggregated_df['年月日'], aggregated_df['filling_rate'])

# x軸のラベル表示間隔を調整
ax.xaxis.set_major_locator(mdates.YearLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))

ax.set_xlabel('年月日')
ax.set_ylabel('filling_rate')

# サブプロット間の間隔を調整
plt.subplots_adjust(hspace=0.5, wspace=0.3)

# x軸のラベルを斜めにして読みやすくする
plt.setp(ax.xaxis.get_majorticklabels(), rotation=60)

In [None]:
# 天気データをダミー変数に変換
weather_dummies = pd.get_dummies(aggregated_df['天気']).astype(int)

# ダミー変数を元のデータフレームに結合
df_with_dummies = pd.concat([aggregated_df, weather_dummies], axis=1)

# 文字列データを含む列を除外
numeric_df = df_with_dummies.select_dtypes(include=[np.number])

# 定数列を除外
non_constant_columns = numeric_df.loc[:, numeric_df.nunique() > 1]

# 相関マトリックスを計算
corr = non_constant_columns.corr()

# 相関マップを描画
plt.figure(figsize=(12, 10))
sns.heatmap(corr, annot=True, fmt=".1f",cmap="bwr")
plt.show()

In [None]:
aggregated_df[:30]

In [None]:
df_high[df_high['interval_compared_to_next'] > max_filling_hour].shape

In [None]:
df_high[df_high['interval_compared_to_next'] > max_filling_hour]['use_date']

In [None]:
df_high[df_high['interval_compared_to_previous'] > max_filling_hour]['use_date']

In [None]:
df_maxfilling_date