<a href="https://colab.research.google.com/github/ShotaSasaki-HU/ASTRO-CAMP-2025/blob/main/sarimax.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ライブラリ

In [9]:
import pandas as pd
import numpy as np
import statsmodels
import datetime as dt

# データ準備

## 目的変数

### 5年間の時系列化

In [76]:
all_years = []

for year in range(2020, 2024 + 1):
    # セジロウンカ
    path = f"https://github.com/ShotaSasaki-HU/ASTRO-CAMP-2025/raw/refs/heads/main/data/%E9%A6%99%E5%B7%9D%E7%9C%8C%E8%BE%B2%E6%A5%AD%E8%A9%A6%E9%A8%93%E5%A0%B4%E7%97%85%E5%AE%B3%E8%99%AB%E9%98%B2%E9%99%A4%E6%89%80_%E6%A5%AD%E5%8B%99%E5%B9%B4%E5%A0%B1_%E5%90%84%E3%82%A4%E3%83%8D%E3%82%A6%E3%83%B3%E3%82%AB%E9%A1%9E%E3%81%AE%E6%97%A5%E5%88%A5%E8%AA%98%E6%AE%BA%E6%95%B0/%E3%82%BB%E3%82%B8%E3%83%AD%E3%82%A6%E3%83%B3%E3%82%AB%E6%97%A5%E5%88%A5%E8%AA%98%E6%AE%BA%E6%95%B0_{year}.csv"
    df_year = pd.read_csv(path, encoding="utf-8", header=0)
    df_year.drop('Unnamed: 0', axis=1, inplace=True)

    # 「欠測」を0と見做す
    df_year.replace('欠測', 0, inplace=True)

    # メスとオスの和
    for month in range(4, 11 + 1):
        df_year[f'{month}月'] = df_year[f'{month}月_メス'].astype('float64') + df_year[f'{month}月_オス'].astype('float64') # NaNを扱うためにfloat
        df_year.drop([f'{month}月_メス', f'{month}月_オス'], axis=1, inplace=True)

    # 縦持ちへ変換
    df_year = df_year.melt(
        id_vars=['日'], # そのまま残る列
        value_vars=['4月', '5月', '6月', '7月', '8月', '9月', '10月', '11月'], # 値となる列
        var_name='月', # 新しい列名
        value_name='value'
        )

    # 「西向く侍」のNaN値を排除
    df_year.dropna(inplace=True)

    # '月'を数値化
    df_year['月'] = df_year['月'].str.replace('月', '').astype(int)

    # datetime生成
    df_year['date'] = pd.to_datetime({
        'year': year,
        'month': df_year['月'],
        'day': df_year['日'].astype(int)
    }, errors='raise')
    df_year = df_year.sort_values('date').reset_index(drop=True) # 時系列順にソート

    df_year = df_year[['date', 'value']]
    all_years.append(df_year)

# 5年分を結合
df_resp = pd.concat(all_years, ignore_index=True)
df_resp['value'] = df_resp['value'].astype(int)

print(df_resp)


           date  value
0    2020-04-01      0
1    2020-04-02      0
2    2020-04-03      0
3    2020-04-04      0
4    2020-04-05      0
...         ...    ...
1215 2024-11-26      0
1216 2024-11-27      0
1217 2024-11-28      0
1218 2024-11-29      0
1219 2024-11-30      0

[1220 rows x 2 columns]


### 月半旬（5日ごと）で分割

In [77]:
df_resp['year'] = df_resp['date'].dt.year
df_resp['month'] = df_resp['date'].dt.month
df_resp['day'] = df_resp['date'].dt.day

df_resp['hanjun'] = pd.cut(
    df_resp['day'],
    bins=[0, 5, 10, 15, 20, 25, 31], # 区切り
    labels=[1, 2, 3, 4, 5, 6]        # 月半旬番号
)

df_resp = df_resp.groupby(['year', 'month', 'hanjun'], as_index=False)['value'].sum()

df_resp


  df_resp = df_resp.groupby(['year', 'month', 'hanjun'], as_index=False)['value'].sum()


Unnamed: 0,year,month,hanjun,value
0,2020,4,1,0
1,2020,4,2,0
2,2020,4,3,0
3,2020,4,4,0
4,2020,4,5,0
...,...,...,...,...
235,2024,11,2,0
236,2024,11,3,0
237,2024,11,4,0
238,2024,11,5,0


## 説明変数

In [None]:
pass