# 문화생활과 한파 상관관계

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob

# 그래프 폰트
plt.rcParams['font.family'] = 'Malgun Gothic'
plt.rcParams['axes.unicode_minus'] = False

In [None]:
def preprocessing(data_df) :
    use_cols = ['측정시간', '온도 최소(℃)']
    # 컬럼명 시작 끝 공백 제거
    data_df.columns = data_df.columns.str.strip()

    data_df = data_df[use_cols]

    data_df.columns = data_df.columns.str.strip()

    data_df = data_df.dropna()

    # datetime 이상치 제거
    # 문자열로
    datetime_str = data_df['측정시간'].astype(str).str.strip()

    # _제거
    datetime_str = datetime_str.str.replace('_', ' ', regex=True)
    # 12~14자리 숫자(YYYYMMDDHHMMSS / YYYYMMDDHHMM)만 추출해서 파싱
    dt_num = pd.to_datetime(datetime_str, format='%Y-%m-%d %H:%M:%S', errors='coerce')
    dt_mix = pd.to_datetime(datetime_str, format='mixed', errors='coerce')

    # 포맷 병합
    data_df['측정시간'] = dt_num.fillna(dt_mix)

    # 일시->날짜로 컬럼 추가
    data_df['측정시간'] = pd.to_datetime(data_df['측정시간'])
    data_df['측정시간'] = data_df['측정시간'].dt.date
    
    data_df.columns = ['date', 'min_temp']

    data_df['min_temp'] = pd.to_numeric(data_df['min_temp'], errors='coerce')
    data_df = data_df.dropna(subset=['min_temp'])

    # 이상치 제거
    data_df = data_df[(data_df['min_temp'] >= -30) & (data_df['min_temp'] <= 10)]
    # 최저기온
    data_df = data_df.groupby('date')['min_temp'].min().reset_index()
    
    return data_df

In [None]:
# 한파 데이터 로드 2023
path = "./data/한파/S-DoT_NATURE_2023*.csv"
files = glob.glob(path)
df_list = [pd.read_csv(f, encoding='euc-kr') for f in files]
cold_df1 = pd.concat(df_list, ignore_index=True)

# 한파 데이터 로드 2024
path = "./data/한파/120_서울시 IOT 복합 센서(환경정보)_PUBDATA_2024*.csv"

files = glob.glob(path)

df_list = [pd.read_csv(f, encoding='euc-kr') for f in files]

cold_df2 = pd.concat(df_list, ignore_index=True)

cold_df1 = preprocessing(cold_df1)
cold_df2 = preprocessing(cold_df2)

cold_df1

  df_list = [pd.read_csv(f, encoding='euc-kr') for f in files]


Unnamed: 0,date,min_temp
0,2023-01-01,-21.6
1,2023-01-02,-25.8
2,2023-01-03,-27.4
3,2023-01-04,-24.0
4,2023-01-05,-23.3
...,...,...
140,2023-12-27,-14.1
141,2023-12-28,0.0
142,2023-12-29,0.0
143,2023-12-30,0.0


In [None]:
# 두 dataframe 합치기
cold_df = pd.merge(
    cold_df1,
    cold_df2,
    how='outer',
    on='date',
    suffixes=('_df1', '_df2')
)
# 분리된 컬럼 합치기
cold_df['min_temp'] = cold_df['min_temp_df1'].combine_first(cold_df['min_temp_df2'])
cold_df = cold_df.drop(columns=['min_temp_df1', 'min_temp_df2'])

# 데이터 확인
print(cold_df['date'].min())
print(cold_df['date'].max())

# 중복값 확인
cold_df['date'].duplicated().sum()
cold_df['date'] = pd.to_datetime(cold_df['date'], errors='coerce')

2023-04-03
2024-10-31


In [None]:
cold_df['min_temp'].isna().any()
cold_df['date'].isna().any()

np.False_

In [None]:
cold_df.isna().sum()

date        0
max_temp    0
dtype: int64

In [None]:
# 예술의 전당 문화생활 데이터
path = "./data/문화생활/예술의전당_*.csv"

files = glob.glob(path)

df_list = [pd.read_csv(f) for f in files]

art_center_df = pd.concat(df_list, ignore_index=True)

# 일자 컬럼 데이터 타입 변경
art_center_df['일자'] = pd.to_datetime(art_center_df['일자'], errors='coerce')
art_center_df['일자'] = art_center_df['일자'].dt.date

# 일별 방문 합계
art_center_df = art_center_df.groupby('일자')['합계'].sum().reset_index()
art_center_df.columns = ['date', 'art_center_total_visit']
art_center_df['date'] = pd.to_datetime(art_center_df['date'], errors='coerce')
art_center_df = art_center_df.dropna()
art_center_df

Unnamed: 0,date,art_center_total_visit
0,2023-01-01,6393
1,2023-01-03,4536
2,2023-01-04,6566
3,2023-01-05,4272
4,2023-01-06,5251
...,...,...
940,2025-12-26,3726
941,2025-12-27,7434
942,2025-12-28,5709
943,2025-12-30,5963


In [None]:
cultural_cold = pd.merge(cold_df, art_center_df, on='date', how='inner')

In [None]:
cultural_cold = cultural_cold[(cultural_cold['min_temp'] <= -12)]