In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns #seabornない人はpip installしてね
import os
from datetime import datetime
import numpy as np
import matplotlib as mpl

# 日本語フォントの設定
mpl.rcParams['font.family'] = 'IPAexGothic'

# カレントディレクトリを.pyと合わせるために以下を実行
from pathlib import Path
if Path.cwd().name == "notebook":
    os.chdir("..")


# 設定
pd.set_option('display.max_rows', 500)
pd.set_option('display.min_rows', 500)
pd.set_option('display.max_columns', 500)

# Mac Matplotlibのデフォルトフォントをヒラギノ角ゴシックに設定
plt.rcParams['font.family'] = 'Hiragino Sans'
# Windows MatplotlibのデフォルトフォントをMeiryoに設定
plt.rcParams['font.family'] = 'Meiryo'

In [None]:
df = pd.read_csv('data/input/point_history_cleansing_weather.csv', encoding="utf-8", index_col=0)
df[:3]

In [None]:
df[(df["天気"] != "晴") & (df["天気"] != "曇") & (df["天気"] != "雨") & (df["天気"] != "雪")][:3]

In [None]:
df.columns

型変換

In [None]:
df = df.replace('N', np.nan)
df = df.replace('nan', np.nan)

column_types = {
    'id' : np.float16,
    'user_id' : int,
    'series_id' : np.float16,
    'shop_id' : np.float16,
    'shop_name' : str,
    'card_id' : str,
    'リサイクル分類ID' : np.float16,
    'amount' : np.float16,
    'amount_kg' : np.float16,
    'point' : np.float16,
    'total_point' : np.float16,
    'status' : np.float16,
    'total_amount' : np.float16,
    'coin' : np.float16,
    'rank_id' : np.float16,
    'use_date' :   'datetime64[ns]',
    'created_at' : 'datetime64[ns]',
    'updated_at' : 'datetime64[ns]',
    '支店ID' : np.float16,
    'super' : str,
    'prefectures' : str,
    'municipality' : str,
    'shop_name_1' :  str,
    'shop_id_1' :    str,
    'created_at_1' : 'datetime64[ns]',
    'updated_at_1' : 'datetime64[ns]',
    'store_latitude' : np.double,
    'store_longitude' : np.double,
    'use_date_2' : 'datetime64[ns]',
    '年月日' : 'datetime64[ns]',
    '天気': str,
    '平均気温(℃)': np.float16,
    '最高気温(℃)': np.float16,
    '最低気温(℃)': np.float16,
    '降水量の合計(mm)': np.float16,
    '平均風速(m/s)': np.float16,
    '平均湿度(％)': np.float16,
    '平均現地気圧(hPa)': np.float16,
    '平均雲量(10分比)': np.float16,
    '降雪量合計(cm)': np.float16,
    '日照時間(時間)': np.float16,
    '合計全天日射量(MJ/㎡)': np.float16,
}
df = df.astype(column_types)

In [None]:
df[:2]

相関マップ

In [None]:
# 天気データをダミー変数に変換
weather_dummies = pd.get_dummies(df['天気']).astype(int)

# ダミー変数を元のデータフレームに結合
df_with_dummies = pd.concat([df, weather_dummies], axis=1)

# 文字列データを含む列を除外
numeric_df = df_with_dummies.select_dtypes(include=[np.number])

# 相関マトリックスを計算
corr = numeric_df.corr()

# 相関マップを描画
plt.figure(figsize=(12, 10))
sns.heatmap(corr, annot=True, fmt=".1f",cmap="bwr")
plt.show()

In [None]:
df_tmp = df[(df["天気"] != "晴") & (df["天気"] != "曇") & (df["天気"] != "雨") & (df["天気"] != "雪")]

In [None]:
df["天気"].value_counts()

In [None]:
df_tmp["天気"]