# 개화일 - 최고온도/최저온도 시각화

> 개화일(x축: 1월부터 경과한 날 수)까지의 일일 최고/최저온도를 y축으로 하여 지난 52년(1973-2024)의 데이터를 그래프로 시각화

> 사용한 데이터 [google drive](https://drive.google.com/drive/folders/1NikMqK8_iOisRQf6vHreLQw7X8T8piK4?usp=drive_link)
- weather_data_csv -> weather_서울
- flowering days -> flowering_days_서울


In [19]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np


# 데이터 로드
weather_data = pd.read_csv("weather_서울.csv", sep = ',')
flowering_data = pd.read_csv("flowering_days_서울.csv", sep = ',')

# 날짜 형식으로 변환
weather_data['tm'] = pd.to_datetime(weather_data['tm'])


weather_df = weather_data[['tm', 'maxTa', 'minTa']]

# Flowering days 데이터를 딕셔너리로 변환
flowering_days = dict(zip(flowering_data['Year:'], flowering_data['Flowering Days']))

In [None]:
# 연도별 데이터 분리
weather_df['year'] = weather_df['tm'].dt.year
weather_df['day_of_year'] = weather_df['tm'].dt.dayofyear

# 각 연도별 개화일 전까지의 데이터 필터링
filtered_data = []
for year, group in weather_df.groupby('year'):    # 연도별로 데이터를 그룹화(year: 연도 데이터 할당, group에 나머지 변수 할당)
  if year in flowering_days:
    flowering_day = flowering_days[year]
    filtered_group = group[group['day_of_year'] <= flowering_day]
    filtered_data.append(filtered_group)

filtered_df = pd.concat(filtered_data)

In [None]:
# moving average 계산 (7일/14일/30일 평균)
filtered_df['min_temp_7d'] = filtered_df.groupby('year')['minTa'].transform(lambda x: x.rolling(7).mean())
filtered_df['max_temp_7d'] = filtered_df.groupby('year')['maxTa'].transform(lambda x: x.rolling(7).mean())

filtered_df['min_temp_14d'] = filtered_df.groupby('year')['minTa'].transform(lambda x: x.rolling(14).mean())
filtered_df['max_temp_14d'] = filtered_df.groupby('year')['maxTa'].transform(lambda x: x.rolling(14).mean())

filtered_df['min_temp_30d'] = filtered_df.groupby('year')['minTa'].transform(lambda x: x.rolling(30).mean())
filtered_df['max_temp_30d'] = filtered_df.groupby('year')['maxTa'].transform(lambda x: x.rolling(30).mean())

filtered_df

In [None]:
# 시각화 (moving averge(7일 평균)를 활용한 52년 데이터)
plt.figure(figsize = (14, 10))

colors_max = cm.viridis_r(range(len(filtered_df['year'].unique())))
colors_min = cm.magma_r(range(len(filtered_df['year'].unique())))
colors_day = cm.rainbow_r(np.linspace(0, 1, len(filtered_df['year'].unique())))

# for year, group in filtered_df.groupby('year'):
#   plt.plot(group['day_of_year'], group['max_temp_7d'], label = f'{year} - Max Temp (7d MA)', alpha = 0.5)
#   plt.plot(group['day_of_year'], group['min_temp_7d'], label = f'{year} - Min Temp (7d MA)', alpha = 0.5)

for i, (year, group) in enumerate(filtered_df.groupby('year')):
    plt.plot(group['day_of_year'], group['max_temp_7d'], label=f'{year} - Max Temp (7d MA)', color=colors_max[i], alpha=0.4)
    plt.plot(group['day_of_year'], group['min_temp_7d'], label=f'{year} - Min Temp (7d MA)', color=colors_min[i], alpha=0.4)

for i, (year, flowering_day) in enumerate(flowering_days.items()):
    plt.axvline(x=flowering_day, color=colors_day[i], linestyle='--', label=f'{year}- Flowering Day', alpha = 0.5)

plt.xlabel('Day of Year')
plt.ylabel('Temperature (°C)')
plt.title('Daily Max/Min Temperature with 7 Day Moving Average in Seoul (52 years)')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# 시각화(14일 평균 활용 52년 데이터)
plt.figure(figsize = (14, 10))

colors_max = cm.viridis_r(range(len(filtered_df['year'].unique())))
colors_min = cm.magma_r(range(len(filtered_df['year'].unique())))
colors_day = cm.rainbow_r(np.linspace(0, 1, len(filtered_df['year'].unique())))

# for year, group in filtered_df.groupby('year'):
#   plt.plot(group['day_of_year'], group['max_temp_7d'], label = f'{year} - Max Temp (7d MA)', alpha = 0.5)
#   plt.plot(group['day_of_year'], group['min_temp_7d'], label = f'{year} - Min Temp (7d MA)', alpha = 0.5)

for i, (year, group) in enumerate(filtered_df.groupby('year')):
    plt.plot(group['day_of_year'], group['max_temp_14d'], label=f'{year} - Max Temp (14d MA)', color=colors_max[i], alpha=0.4)
    plt.plot(group['day_of_year'], group['min_temp_14d'], label=f'{year} - Min Temp (14d MA)', color=colors_min[i], alpha=0.4)

for i, (year, flowering_day) in enumerate(flowering_days.items()):
    plt.axvline(x=flowering_day, color=colors_day[i], linestyle='--', label=f'{year}- Flowering Day', alpha = 0.5)

plt.xlabel('Day of Year')
plt.ylabel('Temperature (°C)')
plt.title('Daily Max/Min Temperature with 14 Day Moving Average in Seoul (52 years)')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# 시각화(30일 평균 활용 52년 데이터)
plt.figure(figsize = (14, 10))

colors_max = cm.viridis_r(range(len(filtered_df['year'].unique())))
colors_min = cm.magma_r(range(len(filtered_df['year'].unique())))
colors_day = cm.rainbow_r(np.linspace(0, 1, len(filtered_df['year'].unique())))

# for year, group in filtered_df.groupby('year'):
#   plt.plot(group['day_of_year'], group['max_temp_7d'], label = f'{year} - Max Temp (7d MA)', alpha = 0.5)
#   plt.plot(group['day_of_year'], group['min_temp_7d'], label = f'{year} - Min Temp (7d MA)', alpha = 0.5)

for i, (year, group) in enumerate(filtered_df.groupby('year')):
    plt.plot(group['day_of_year'], group['max_temp_30d'], label=f'{year} - Max Temp (30d MA)', color=colors_max[i], alpha=0.4)
    plt.plot(group['day_of_year'], group['min_temp_30d'], label=f'{year} - Min Temp (30d MA)', color=colors_min[i], alpha=0.4)

for i, (year, flowering_day) in enumerate(flowering_days.items()):
    plt.axvline(x=flowering_day, color=colors_day[i], linestyle='--', label=f'{year}- Flowering Day', alpha = 0.5)

plt.xlabel('Day of Year')
plt.ylabel('Temperature (°C)')
plt.title('Daily Max/Min Temperature with 30 Day Moving Average in Seoul (52 years)')
plt.grid(True)
plt.tight_layout()
plt.show()