In [29]:
from pathlib import Path
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import japanize_matplotlib
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = [12, 9]

In [30]:
train_df = pd.read_csv('data/train.csv')
test_df = pd.read_csv('data/test.csv')

# テストデータにない野菜を訓練データから除去
kinds = test_df['kind'].unique()
train_df = train_df[train_df['kind'].isin(kinds)]

vis_df = train_df.copy()
# vis_df = vis_df.query('20181101 <= date <= 20221031').reset_index(drop=True)
# vis_df = vis_df.query('20160101 <= date <= 20211231').reset_index(drop=True)
vis_df = vis_df.query('20060101 <= date <= 20211231').reset_index(drop=True)

vis_df = pd.pivot_table(vis_df, index='date', columns='kind', values='mode_price').reset_index()
vis_df.head()

kind,date,かぼちゃ,きゅうり,さといも,たまねぎ,だいこん,なましいたけ,にんじん,ねぎ,はくさい,ほうれんそう,キャベツ,トマト,ピーマン,ミニトマト,レタス
0,20060105,,1680.0,5250.0,1916.25,1233.75,163.0,1890.0,2100.0,945.0,179.0,1995.0,1312.5,64.25,,5040.0
1,20060106,,1785.0,5250.0,1785.0,1155.0,147.5,1207.5,2205.0,892.5,163.0,1890.0,1155.0,65.5,,6090.0
2,20060107,,1785.0,5250.0,2257.5,1155.0,131.5,1680.0,1890.0,892.5,168.5,1890.0,1155.0,66.75,,6195.0
3,20060110,,2100.0,5250.0,1863.75,1260.0,116.0,1680.0,1890.0,1155.0,168.0,1785.0,1050.0,74.0,,5355.0
4,20060111,,2310.0,1995.0,1680.0,1260.0,110.5,1575.0,1890.0,1050.0,137.0,1680.0,1260.0,74.0,,3780.0


In [31]:
vis_df = vis_df.copy()
vis_df['year'] = vis_df['date']//10000
vis_df['month'] = vis_df['date'].apply(lambda x: int(str(x)[4:6]))
vis_df.index = pd.to_datetime(vis_df['date'], format='%Y%m%d')
vis_df = vis_df.drop(columns='date')
vis_df_month = vis_df.groupby(['year', 'month']).mean()

vis_df_month.index = ['_'.join([str(x) for x in idx]) for idx in  vis_df_month.index.values]
vis_df_month = vis_df_month.fillna(0)
vis_df_month

kind,かぼちゃ,きゅうり,さといも,たまねぎ,だいこん,なましいたけ,にんじん,ねぎ,はくさい,ほうれんそう,キャベツ,トマト,ピーマン,ミニトマト,レタス
2006_1,0.000000,2598.750000,3811.500000,1874.250000,1164.187500,117.375000,1417.500000,1548.750000,1086.750000,126.462500,1428.000000,1126.125000,96.400000,0.000000,3559.500000
2006_2,0.000000,2225.000000,4030.000000,1991.250000,1342.500000,105.523810,1486.250000,1705.000000,965.000000,90.130952,975.000000,1195.000000,120.214286,0.000000,2255.000000
2006_3,3055.937500,2021.250000,4764.375000,1977.500000,1128.750000,96.583333,1520.312500,1308.125000,990.937500,71.239583,905.625000,1502.812500,111.687500,0.000000,1415.312500
2006_4,3445.909091,1412.727273,0.000000,2068.852273,987.954545,106.636364,2135.795455,1405.568182,1260.000000,113.159091,1384.090909,1517.727273,63.681818,0.000000,1737.272727
2006_5,3620.217391,1150.434783,0.000000,1781.347826,939.293478,106.804348,1862.608696,1641.195652,1291.043478,82.065217,808.043478,1027.173913,47.173913,0.000000,1301.086957
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021_8,1738.800000,1765.800000,0.000000,2959.200000,958.500000,0.000000,1145.475000,1115.100000,1225.800000,165.700000,869.400000,1323.000000,69.700000,123.875000,1368.900000
2021_9,1486.350000,1976.400000,3849.882353,3078.000000,1090.800000,113.625000,940.950000,1532.250000,2143.800000,167.950000,1144.800000,2008.800000,84.900000,211.450000,2748.600000
2021_10,1350.000000,1497.272727,3107.454545,3465.818182,945.000000,112.977273,697.704545,1485.000000,866.454545,101.500000,800.181818,1580.727273,57.363636,137.500000,1098.409091
2021_11,1514.454545,1512.000000,2577.272727,4899.272727,648.000000,106.500000,967.090909,967.090909,589.090909,76.090909,687.272727,2160.000000,61.295455,144.068182,1281.272727


In [32]:
from statsmodels.tsa.seasonal import STL 

# df = df.query("year == 2017 or year == 2018" )
for col in vis_df_month.columns:
    print(col)
    #STL分解
    stl=STL(vis_df_month[col], period=12, robust=True)
    # stl=STL(df[col], period=365, robust=True)

    stl_series = stl.fit()

    # STL分解結果のグラフ化
    # stl_series.plot()
    # plt.show()

    # STL分解結果のデータ
    stl_o = stl_series.observed #観測データ（STL分解前の元のデータ）＝トレンド＋季節性＋残差
    stl_t = stl_series.trend    #トレンド（trend）
    stl_s = stl_series.seasonal #季節性（seasonal）
    stl_r = stl_series.resid    #残差（resid）

    stl_r = pd.DataFrame(stl_r)
    stl_r['month'] = stl_r.index.map(lambda x: x.split('_')[-1])
    display(stl_r.query("month == '11'"))



#     stl_t.plot()                                       #トレンド（trend）のグラフ描写
#     stl_s.plot()                                       #季節性（season）のグラフ描写
#     stl_r.plot()                                       #残差（resid）のグラフ描写
#     plt.title('Passengers')                            #グラフタイトル
#     plt.ylabel('Monthly Number of Airline Passengers') #タテ軸のラベル
#     plt.xlabel('Month')                                #ヨコ軸のラベル
#     plt.legend()                                       #凡例表示
    plt.show()

かぼちゃ


Unnamed: 0,resid,month
2006_1,-134.779957,1
2006_2,-123.372027,2
2006_3,54.022666,3
2006_4,299.947496,4
2006_5,-14.083547,5
...,...,...
2021_8,-91.346194,8
2021_9,105.557909,9
2021_10,-107.394919,10
2021_11,-24.941102,11


きゅうり


Unnamed: 0,resid,month
2006_1,-103.921038,1
2006_2,72.949644,2
2006_3,132.000026,3
2006_4,-8.674850,4
2006_5,34.247461,5
...,...,...
2021_8,-25.197475,8
2021_9,103.282919,9
2021_10,-55.342071,10
2021_11,62.359005,11


さといも


Unnamed: 0,resid,month
2006_1,1718.522048,1
2006_2,718.393479,2
2006_3,3020.678579,3
2006_4,2.553585,4
2006_5,0.114719,5
...,...,...
2021_8,-0.465399,8
2021_9,6.047258,9
2021_10,1.714499,10
2021_11,-5.925501,11


たまねぎ


Unnamed: 0,resid,month
2006_1,-36.233503,1
2006_2,-67.952199,2
2006_3,-58.556788,3
2006_4,164.047879,4
2006_5,131.452216,5
...,...,...
2021_8,1.522197,8
2021_9,-45.789378,9
2021_10,134.352447,10
2021_11,-5.525659,11


だいこん


Unnamed: 0,resid,month
2006_1,104.170765,1
2006_2,124.535833,2
2006_3,2.482221,3
2006_4,-46.514591,4
2006_5,-70.687264,5
...,...,...
2021_8,-17.500188,8
2021_9,36.099561,9
2021_10,-37.925331,10
2021_11,-1.161849,11


なましいたけ


Unnamed: 0,resid,month
2006_1,-0.119626,1
2006_2,-1.376480,2
2006_3,0.039800,3
2006_4,-0.932739,4
2006_5,0.181403,5
...,...,...
2021_8,-3.010996,8
2021_9,4.327211,9
2021_10,1.772278,10
2021_11,0.934499,11


にんじん


Unnamed: 0,resid,month
2006_1,-62.518608,1
2006_2,9.863898,2
2006_3,20.792618,3
2006_4,59.879701,4
2006_5,-209.119262,5
...,...,...
2021_8,72.344276,8
2021_9,16.009712,9
2021_10,-177.455945,10
2021_11,-10.304174,11


ねぎ


Unnamed: 0,resid,month
2006_1,25.805722,1
2006_2,-35.171785,2
2006_3,71.345362,3
2006_4,-20.367660,4
2006_5,90.438427,5
...,...,...
2021_8,16.210000,8
2021_9,29.467480,9
2021_10,4.998034,10
2021_11,-83.844733,11


はくさい


Unnamed: 0,resid,month
2006_1,9.675575,1
2006_2,30.380690,2
2006_3,-111.778848,3
2006_4,85.031315,4
2006_5,83.788233,5
...,...,...
2021_8,35.122529,8
2021_9,149.321727,9
2021_10,11.014825,10
2021_11,6.486996,11


ほうれんそう


Unnamed: 0,resid,month
2006_1,10.765658,1
2006_2,1.267569,2
2006_3,-0.411952,3
2006_4,1.811375,4
2006_5,-7.403011,5
...,...,...
2021_8,1.038748,8
2021_9,-1.103573,9
2021_10,-5.533029,10
2021_11,1.092357,11


キャベツ


Unnamed: 0,resid,month
2006_1,-11.026732,1
2006_2,53.748050,2
2006_3,-53.072079,3
2006_4,141.848438,4
2006_5,-66.907449,5
...,...,...
2021_8,-57.958217,8
2021_9,-8.868823,9
2021_10,-24.144975,10
2021_11,92.161736,11


トマト


Unnamed: 0,resid,month
2006_1,21.410957,1
2006_2,-105.902032,2
2006_3,-60.204263,3
2006_4,35.531741,4
2006_5,24.207756,5
...,...,...
2021_8,-99.520553,8
2021_9,51.822902,9
2021_10,-56.943071,10
2021_11,-71.646329,11


ピーマン


Unnamed: 0,resid,month
2006_1,-44.520225,1
2006_2,-1.636615,2
2006_3,2.730671,3
2006_4,2.020980,4
2006_5,-2.873805,5
...,...,...
2021_8,-2.427415,8
2021_9,1.849702,9
2021_10,-0.818571,10
2021_11,0.645015,11


ミニトマト


Unnamed: 0,resid,month
2006_1,0.000723,1
2006_2,0.000679,2
2006_3,0.000580,3
2006_4,0.000429,4
2006_5,0.000218,5
...,...,...
2021_8,67.109371,8
2021_9,68.457256,9
2021_10,2.271296,10
2021_11,-22.772062,11


レタス


Unnamed: 0,resid,month
2006_1,1861.678881,1
2006_2,148.314226,2
2006_3,-113.064488,3
2006_4,-16.309967,4
2006_5,22.374024,5
...,...,...
2021_8,-8.067869,8
2021_9,150.682818,9
2021_10,-56.076669,10
2021_11,67.377159,11
