In [None]:
import requests
import os
from datetime import datetime, timedelta
import pandas as pd
import json
import numpy as np


# カレントディレクトリを.pyと合わせるために以下を実行
from pathlib import Path
if Path.cwd().name == "notebook":
    os.chdir("..")


# 設定
pd.set_option('display.max_rows', 100)
pd.set_option('display.min_rows', 100)
pd.set_option('display.max_columns', 100)

天気データを結合

In [None]:
# 仙台.csvとdata.csv(平均雲量(10分比)  降雪量合計(cm) 降雪量合計(cm).1  日照時間(時間)等を記載）について、年月日で密結合し、data2.csvに上書き
file = 'data/input/weather_data/我孫子.csv'
df1 = pd.read_csv(file, encoding="shift-jis")
df2 = pd.read_csv('data/input/weather_data/data.csv', encoding="cp932")


df_concat = pd.merge(df1, df2, on='年月日')
print(df_concat[:10])
# 天気概況(昼：06時～18時)を天気(06時～18時)に変更
df_concat = df_concat.rename(columns={'天気概況(昼：06時～18時)': '天気'})
# 天気列について[晴, 曇, 雨, 雪]の文字列のみを抽出する。複数抽出された場合は左側の文字を優先（例：雨曇晴→雨）。みぞれは雨とする。
df_concat['天気'] = df_concat['天気'].str.extract('(晴|曇|雨|雪)', expand=False).fillna('雨')
#df_concat = df_concat.drop(columns=['日照時間(時間).1', '降雪量合計(cm).1','降水量の合計(mm).1','天気概況(夜：18時～翌日06時)'])
df_concat = df_concat.drop(columns=['日照時間(時間).1', '天気概況(夜：18時～翌日06時)'])


df_concat.to_csv('data/input/weather_data/data2.csv', index=False, encoding="cp932")

In [None]:
df_concat[:10]

In [None]:
# data/input/CityToFileMapping.csvを読み込む
city_to_file_mapping = pd.read_csv("data/input/weather_data/CityToFileMapping.csv", encoding="utf-8")

df_weather = pd.DataFrame()
# city_to_file_mappingのfile_nameに.csvがついたファイルをdata/input/から順に読み込み、先頭列にファイル名を付け足し、行方向に結合する
for index, row in city_to_file_mapping.iterrows():
    prefecture = row["県"]
    municipality = row["市"]
    file_name = row["ファイル名"]
    lat = row["lat"]
    lon = row["lon"]
    df_tmp = pd.read_csv(f"data/input/weather_data/{file_name}.csv", encoding="cp932")
    df_tmp['県'] = prefecture
    df_tmp['市'] = municipality
    df_tmp['lat'] = lat
    df_tmp['lon'] = lon
    # df_tmpに「日付」列があれば、それを「年月日」列に変更
    if '日付' in df_tmp.columns:
        df_tmp = df_tmp.rename(columns={'日付': '年月日'})
    
    
    while df_tmp['年月日'].isnull().any():
        df_tmp = df_tmp[1:]
        
    df_weather = pd.concat([df_weather, df_tmp], axis=0)
    # '降水量の合計(mm).1'という列があれば、削除
    if '降水量の合計(mm).1' in df_weather.columns:
        df_weather = df_weather.drop(columns=['降水量の合計(mm).1'])
    
    # 降雪量合計(cm).1という列があれば、削除
    if '降雪量合計(cm).1' in df_weather.columns:
        df_weather = df_weather.drop(columns=['降雪量合計(cm).1'])

        

# df_weatherの列順を年月日、県、市、天気、それ以外の順にする
df_weather = df_weather[['年月日', '県', '市', '天気'] + [col for col in df_weather.columns if col not in ['年月日', '県', '市', '天気']]]

In [None]:
# カスタム関数を定義
def parse_date(date):
    try:
        return pd.to_datetime(date)
    except ValueError:
        try:
            return pd.to_datetime(date, format='%Y年%m月%d日')
        except ValueError:
            return pd.to_datetime(date, format='%Y/%m/%d')

In [None]:
df_weather = df_weather.replace('--', np.nan)
df_weather = df_weather.replace('NaN', np.nan)
df_weather = df_weather.replace('///', np.nan)
df_weather = df_weather.replace('nan', np.nan)
column_types = {
    '県': str,
    '市': str,
    '天気': str,
    '平均気温(℃)': np.float16,
    '最高気温(℃)': np.float16,
    '最低気温(℃)': np.float16,
    '降水量の合計(mm)': np.float16,
    '平均風速(m/s)': np.float16,
    '平均湿度(％)': np.float16,
    '平均現地気圧(hPa)': np.float16,
    'lat': np.double,
    'lon': np.double,
    '平均雲量(10分比)': np.float16,
    '降雪量合計(cm)': np.float16,
    '日照時間(時間)': np.float16,
    '合計全天日射量(MJ/㎡)': np.float16,
}
df_weather['年月日'] = df_weather['年月日'].apply(parse_date)
df_weather['年月日'] = pd.to_datetime(df_weather['年月日'])
df_weather = df_weather.astype(column_types)    # 型変換

In [None]:
df_weather = df_weather.replace('--', np.nan)
df_weather = df_weather.replace('NaN', np.nan)
df_weather = df_weather.replace('///', np.nan)
df_weather = df_weather.replace('nan', np.nan)


In [None]:
df_weather[:10]

不足データは近い地点のもので代替

In [None]:
df_weather['天気'].value_counts()

In [None]:
df_weather[df_weather['天気'] == "nan"]

In [None]:
# 同じ「年月日」と「県」のグループでdf_weather2の文字列NaNをmax()に置き換える
df_weather['天気'] = df_weather.groupby(['年月日', '県'])['天気'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['平均気温(℃)'] = df_weather.groupby(['年月日', '県'])['平均気温(℃)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['最高気温(℃)'] = df_weather.groupby(['年月日', '県'])['最高気温(℃)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['最低気温(℃)'] = df_weather.groupby(['年月日', '県'])['最低気温(℃)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['降水量の合計(mm)'] = df_weather.groupby(['年月日', '県'])['降水量の合計(mm)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['日照時間(時間)'] = df_weather.groupby(['年月日', '県'])['日照時間(時間)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['降雪量合計(cm)'] = df_weather.groupby(['年月日', '県'])['降雪量合計(cm)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['平均風速(m/s)'] = df_weather.groupby(['年月日', '県'])['平均風速(m/s)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['平均湿度(％)'] = df_weather.groupby(['年月日', '県'])['平均湿度(％)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['平均雲量(10分比)'] = df_weather.groupby(['年月日', '県'])['平均雲量(10分比)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['合計全天日射量(MJ/㎡)'] = df_weather.groupby(['年月日', '県'])['合計全天日射量(MJ/㎡)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['平均現地気圧(hPa)'] = df_weather.groupby(['年月日', '県'])['平均現地気圧(hPa)'].transform(lambda x: x.fillna(x.dropna().max()))


In [None]:
df_weather['天気'].value_counts()

In [None]:
df_weather[(df_weather["天気"] != "晴") & (df_weather["天気"] != "曇") & (df_weather["天気"] != "雨") & (df_weather["天気"] != "雪")]

In [None]:
df_weather.to_csv('data/input/weather_data/weather.csv', index=False, encoding="utf-8")

point_historyと結合

In [None]:
df_weather = pd.read_csv('data/input/weather_data/weather.csv', encoding="utf-8")

In [None]:
df_point_history = pd.read_csv('data/input/point_history_cleansing_2.csv', encoding="utf-8")  # point_history_2.csv（都道府県、市を含む）を読み込む
df_point_history[:3]

In [None]:
# 同じ「shop_id」のグループでdf_weather2の文字列NaNをmax()に置き換える
df_weather['天気'] = df_weather.groupby(['年月日', '県'])['天気'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['平均気温(℃)'] = df_weather.groupby(['年月日', '県'])['平均気温(℃)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['最高気温(℃)'] = df_weather.groupby(['年月日', '県'])['最高気温(℃)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['最低気温(℃)'] = df_weather.groupby(['年月日', '県'])['最低気温(℃)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['降水量の合計(mm)'] = df_weather.groupby(['年月日', '県'])['降水量の合計(mm)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['日照時間(時間)'] = df_weather.groupby(['年月日', '県'])['日照時間(時間)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['降雪量合計(cm)'] = df_weather.groupby(['年月日', '県'])['降雪量合計(cm)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['平均風速(m/s)'] = df_weather.groupby(['年月日', '県'])['平均風速(m/s)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['平均湿度(％)'] = df_weather.groupby(['年月日', '県'])['平均湿度(％)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['平均雲量(10分比)'] = df_weather.groupby(['年月日', '県'])['平均雲量(10分比)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['合計全天日射量(MJ/㎡)'] = df_weather.groupby(['年月日', '県'])['合計全天日射量(MJ/㎡)'].transform(lambda x: x.fillna(x.dropna().max()))
df_weather['平均現地気圧(hPa)'] = df_weather.groupby(['年月日', '県'])['平均現地気圧(hPa)'].transform(lambda x: x.fillna(x.dropna().max()))


In [None]:
# use_date列をparse_date関数で日付型に変換し、時間は切り捨てし、[use_date_2]列に格納
df_point_history['use_date_2'] = pd.to_datetime(df_point_history['use_date']).dt.floor('d')
df_point_history[:3]

In [None]:
# '市'または'群'に続く文字を削除
df_point_history['municipality'] = df_point_history['municipality'].str.replace(r'(市|郡).*', r'\1', regex=True)

In [None]:
df_point_history.loc[df_point_history['municipality'] == "群"].loc['municipality'] = "郡山群"
df_point_history.loc[df_point_history['municipality'] == "塩竉市"].loc['municipality'] = "塩竈市"

In [None]:
unique_municipalities = df_point_history['municipality'].unique()
unique_municipalities

In [None]:
# 'use_date_2'列をdatetime型に変換
df_point_history['use_date_2'] = pd.to_datetime(df_point_history['use_date_2'])

# '年月日'列をdatetime型に変換
df_weather['年月日'] = pd.to_datetime(df_weather['年月日'])

# df_weatherのlat, lon列を削除
#df_weather = df_weather.drop(columns=['lat', 'lon'])

# df_point_historyのmunicipalityとuse_date_2、df_weatherの市と年月日で結合し、df_point_history_weatherに格納
df_point_history_weather = pd.merge(df_point_history, df_weather, left_on=['municipality', 'use_date_2'], right_on=['市', '年月日'], how='left')

# 削除
# df_point_history_weather = df_point_history_weather.drop(columns=['use_date_2'])

df_point_history_weather = df_point_history_weather.drop(columns=['県','市'])

df_point_history_weather[:3]

In [None]:
df_point_history_weather["天気"].value_counts()

In [None]:
df_point_history = df_point_history.replace('--', np.nan)
df_point_history = df_point_history.replace('NaN', np.nan)
df_point_history = df_point_history.replace('nan', np.nan)
#df_point_history.loc[(df_point_history["shop_id"]==51.0) & (df_point_history["prefectures"]!=np.nan)]

In [None]:
a = df_point_history_weather[(df_point_history_weather["天気"] != "晴") & (df_point_history_weather["天気"] != "曇") & (df_point_history_weather["天気"] != "雨") & (df_point_history_weather["天気"] != "雪")]

In [None]:
a.shape

In [None]:
a["prefectures"].value_counts()

In [None]:
a["shop_name"].value_counts()

In [None]:
df_point_history_weather[(df_point_history_weather["天気"] != "晴") & (df_point_history_weather["天気"] != "曇") & (df_point_history_weather["天気"] != "雨") & (df_point_history_weather["天気"] != "雪")][100:120]

In [None]:
df_point_history_weather = df_point_history_weather.replace('N', np.nan)
df_point_history_weather = df_point_history_weather.replace('nan', np.nan)
df_point_history_weather = df_point_history_weather.replace('NaN', np.nan)
df_point_history_weather[df_point_history_weather["shop_name"] == "スーパービッグ吉成店"][df_point_history_weather["prefectures"] != np.nan]

In [None]:
df_point_history_weather.to_csv('data/input/point_history_cleansing_weather.csv', index=False, encoding="utf-8")

試しに分析

In [None]:
df = pd.read_csv('data/input/point_history_cleansing_weather.csv', encoding="utf-8")

In [None]:
# 天気でグループ化し、amount_kgの平均を取得
df.groupby('天気')['amount_kg'].mean()

# 以降はボツ

In [None]:


def get_weather_forecast(api_key, lat, lon):
    """
    緯度経度を指定してOpenWeatherMap APIから天気予報を取得する
    arges:
        api_key: OpenWeatherMap APIのキー
        lat: 緯度
        lon: 経度
    return:
        天気データのリスト
    """
    url = "https://api.openweathermap.org/data/2.5/forecast"
    
    weather_data = []
    params = {
            'lat': lat,
            'lon': lon,
            'appid': api_key
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        weather_data.extend(data.get('list', []))
    else:
        print(response)
    
    return weather_data




In [None]:
# get_weather_from_cityから仙台市の天気予報を取得
lat = 38.2682
lon = 140.8694
api_key = os.environ["WEATHER_API_KEY"]

jsondata = get_weather_forecast(api_key, lat, lon)
jsondata

In [None]:
print("天気：",jsondata[0]["weather"][0]["main"])
print("天気詳細：",jsondata[0]["weather"][0]["description"])

print("気温：",jsondata[0]["main"]["temp"])
print("最高気温：",jsondata[0]["main"]["temp_max"])
print("最低気温：",jsondata[0]["main"]["temp_min"])
print("体感気温：",jsondata[0]["main"]["feels_like"])
print("最低気温：",jsondata[0]["main"]["temp_min"])
print("最高気温：",jsondata[0]["main"]["temp_max"])
print("気圧：",jsondata[0]["main"]["grnd_level"])
print("湿度：",jsondata[0]["main"]["humidity"])
#print("降水量：",jsondata[0]["rain"]["3h"])

print("風速：",jsondata[0]["wind"]["speed"])
print("風の方角：",jsondata[0]["wind"]["deg"])
print("雲量：",jsondata[0]["clouds"]["all"])
print("降水確率：",jsondata[0]["pop"])
print("日時：",jsondata[0]["dt_txt"])

In [None]:
#jsondataを保存
with open('data/weatherdata.json', 'w') as f:
    json.dump(jsondata, f, indent=4)

In [None]:
def get_weather_history(api_key, lat, lon):
    """
    緯度経度から天気データを取得する
    arges:
        api_key: OpenWeatherMapのAPIキー
        lat: 緯度
        lon: 経度
    return:
        天気データのリスト

    """
    url = "https://api.openweathermap.org/data/2.5/onecall/timemachine"
    
    weather_data = []
    params = {
            'lat': lat,
            'lon': lon,
            'appid': api_key
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        weather_data.extend(data.get('list', []))
    else:
        print(response)
        print(f"Error fetching data for {start_date.strftime('%Y-%m-%d')}: {response.status_code}")
    
    return weather_data

In [None]:
df = pd.read_csv('data/input/point_history_cleansing_2.csv')

In [None]:
# dfのuse_dateがに並び替え
df = df.sort_values('use_date')
df[:10]

In [None]:
# '市'または'群'に続く文字を削除
df['municipality'] = df['municipality'].str.replace(r'(市|郡).*', r'\1', regex=True)
unique_municipalities = df['municipality'].unique()
unique_municipalities

In [None]:
df_municipality = df.groupby(['prefectures', 'municipality'])

In [None]:
# 各市町村のstore_latitude	store_longitudeの平均を求める
date_range_per_municipality = df.groupby(['prefectures', 'municipality']).agg({'store_latitude': 'mean', 'store_longitude': 'mean'})
date_range_per_municipality