In [1]:
import numpy as np
import pandas as pd
#교통사고 데이터(CSV파일) 불러오기
accident_data = pd.read_csv("accident_data.csv",encoding = 'EUC-KR')

In [2]:
accident_data

Unnamed: 0,발생일,발생시간,발생지_시도,발생지_시군구,법정동명,사고건수,사망자수,중상자수,경상자수,부상신고자수
0,2017-01-01,0,서울,강북구,미아동,1,0,1,6,0
1,2017-01-01,0,서울,노원구,상계동,1,0,0,1,0
2,2017-01-01,0,서울,동작구,상도동,1,0,0,1,0
3,2017-01-01,0,서울,서초구,서초동,1,0,0,1,0
4,2017-01-01,0,서울,송파구,잠실동,1,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...
114438,2019-12-31,22,서울,송파구,송파동,1,0,0,1,0
114439,2019-12-31,22,서울,영등포구,대림동,1,0,0,1,0
114440,2019-12-31,22,서울,중랑구,면목동,1,0,0,0,1
114441,2019-12-31,23,서울,강남구,수서동,1,0,0,1,0


In [3]:
#쓰지않는 데이터 열 삭제
del accident_data['발생시간']
del accident_data['발생지_시군구']
del accident_data['발생지_시도']
del accident_data['법정동명']

In [4]:
# '발생일'컬럼을 날짜 데이터로 변환

accident_data['발생일'] = pd.to_datetime(accident_data['발생일'])

In [5]:
#같은 날짜에 발생한 사건은 합치기
accident_data_final = accident_data.groupby('발생일').sum()

# 인덱스 이름 '날짜'로 변환
accident_data_final.index.name = '날짜'
accident_data_final

Unnamed: 0_level_0,사고건수,사망자수,중상자수,경상자수,부상신고자수
날짜,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-01,73,2,25,90,10
2017-01-02,82,4,27,92,17
2017-01-03,88,1,25,80,13
2017-01-04,115,1,39,99,25
2017-01-05,103,1,34,111,8
...,...,...,...,...,...
2019-12-27,94,0,27,88,14
2019-12-28,94,0,27,105,10
2019-12-29,78,1,26,87,7
2019-12-30,91,1,19,93,53


In [6]:
# 강수량 데이터(CSV파일) 불러오기
rain_df = pd.read_csv("rain_data.csv",encoding = 'EUC-KR')

# 날짜' 날짜데이터 형식으로 변환
rain_df['날짜'] = pd.to_datetime(rain_df['날짜'])

# 지점 열 삭제
rain_df.drop('지점', axis=1, inplace=True)

# 강수량 열 이름을 비로 변경
rain_df.rename(columns={'강수량(mm)': '비'}, inplace=True)

rain_df.loc[rain_df['비'].notnull(), '비'] = '비'

rain_df

Unnamed: 0,날짜,비
0,2017-01-01,
1,2017-01-02,비
2,2017-01-03,
3,2017-01-04,
4,2017-01-05,
...,...,...
1090,2019-12-27,
1091,2019-12-28,
1092,2019-12-29,비
1093,2019-12-30,비


In [7]:
# 눈일수 데이터(CSV파일) 불러오기
snow_data = pd.read_csv("snow_data.csv",encoding = 'EUC-KR')
snow_df = snow_data

# 연도와 날짜 열을 합치고 날짜 형식으로 변환
snow_df['날짜'] = pd.to_datetime(snow_df['연도'].astype(str) + snow_df['날짜'].astype(str), format='%Y%m%d')

# 기상현상 열 이름을 눈으로 변경
snow_df.rename(columns={'기상현상': '눈'}, inplace=True)

# 열의 순서 변경, 연도, 지점, 일 최심신적설(cm) 열 삭제
snow_df = snow_df[['날짜', '눈']]

# 데이터프레임의 '눈'열 값들을 '눈'으로 변경
snow_df['눈'] = '눈'
snow_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  snow_df['눈'] = '눈'


Unnamed: 0,날짜,눈
0,2017-11-02,눈
1,2017-11-03,눈
2,2017-11-08,눈
3,2017-01-20,눈
4,2017-12-01,눈
...,...,...
72,2019-12-19,눈
73,2019-12-20,눈
74,2019-12-21,눈
75,2019-12-30,눈


In [8]:
# 교통사고 데이터, 눈, 비 데이터프레임 합치기

# 'accident_data_final' 데이터프레임과 'rain_df' 데이터프레임을 날짜를 기준으로 병합
merged_df = pd.merge(accident_data_final, rain_df, on='날짜', how='left')

# 'merged_df'와 'snow_df'를 날짜를 기준으로 병합
merged_df = pd.merge(merged_df, snow_df, on='날짜', how='left')

merged_df

Unnamed: 0,날짜,사고건수,사망자수,중상자수,경상자수,부상신고자수,비,눈
0,2017-01-01,73,2,25,90,10,,
1,2017-01-02,82,4,27,92,17,비,
2,2017-01-03,88,1,25,80,13,,
3,2017-01-04,115,1,39,99,25,,
4,2017-01-05,103,1,34,111,8,,
...,...,...,...,...,...,...,...,...
1091,2019-12-27,94,0,27,88,14,,
1092,2019-12-28,94,0,27,105,10,,
1093,2019-12-29,78,1,26,87,7,비,
1094,2019-12-30,91,1,19,93,53,비,눈


In [18]:
merged_df['비'].fillna('', inplace=True)
merged_df['눈'].fillna('', inplace=True)

merged_df['날씨'] = merged_df['비'] + merged_df['눈']
merged_df

Unnamed: 0,날짜,사고건수,사망자수,중상자수,경상자수,부상신고자수,비,눈,날씨
0,2017-01-01,73,2,25,90,10,,,
1,2017-01-02,82,4,27,92,17,비,,비
2,2017-01-03,88,1,25,80,13,,,
3,2017-01-04,115,1,39,99,25,,,
4,2017-01-05,103,1,34,111,8,,,
...,...,...,...,...,...,...,...,...,...
1091,2019-12-27,94,0,27,88,14,,,
1092,2019-12-28,94,0,27,105,10,,,
1093,2019-12-29,78,1,26,87,7,비,,비
1094,2019-12-30,91,1,19,93,53,비,눈,비눈


In [13]:
snow_count = merged_df[merged_df['날씨'] == '눈'].shape[0]
rain_count = merged_df[merged_df['날씨'] == '비'].shape[0]
snow_rain_count = merged_df[merged_df['날씨'] == '비눈'].shape[0]

print("눈 개수:", snow_count)
print("비 개수:", rain_count)
print("비눈 개수:", snow_rain_count)

눈 개수: 9
비 개수: 344
비눈 개수: 68
