# 📌 연도별 날씨 데이터프레임을 csv 파일로 저장하기

In [None]:
import numpy as np
import pandas as pd

### 1. 2018년도 날씨 데이터프레임으로 묶기

#### 1.1 데이터프레임 불러오기

In [None]:
df_rain = pd.read_csv("/content/drive/MyDrive/weather_data/2018/청운효자동_강수_201801_201812.csv")
df_rain2 = pd.read_csv("/content/drive/MyDrive/weather_data/2018/청운효자동_강수형태_201801_201812.csv")
df_temp = pd.read_csv("/content/drive/MyDrive/weather_data/2018/청운효자동_기온_201801_201812.csv")
df_hum = pd.read_csv("/content/drive/MyDrive/weather_data/2018/청운효자동_습도_201801_201812.csv")
df_wind = pd.read_csv("/content/drive/MyDrive/weather_data/2018/청운효자동_풍속_201801_201812.csv")
df_wind2 = pd.read_csv("/content/drive/MyDrive/weather_data/2018/청운효자동_풍향_201801_201812.csv")
df_sky = pd.read_csv("/content/drive/MyDrive/weather_data/2018/청운효자동_하늘상태_201801_201812.csv")

#### 1.2 각 데이터프레임 컬럼명 변경 및 삭제

In [None]:
# 컬럼명 변경
df_rain.rename(columns={"value location:60_127 Start : 20180101 ":"Rainfall(mm)"}, inplace=True)
df_rain2.rename(columns={"value location:60_127 Start : 20180101 ":"Precipitation type"}, inplace=True)
df_temp.rename(columns={"value location:60_127 Start : 20180101 ":"Temperature(℃)"}, inplace=True)
df_hum.rename(columns={"value location:60_127 Start : 20180101 ":"Humidity(%)"}, inplace=True)
df_wind.rename(columns={"value location:60_127 Start : 20180101 ":"Wind speed (m/s)"}, inplace=True)
df_wind2.rename(columns={"value location:60_127 Start : 20180101 ":"wind direction"}, inplace=True)
df_sky.rename(columns={"value location:60_127 Start : 20180101 ":"Sky type"}, inplace=True)

In [None]:
# 강수 데이터프레임을 제외한 데이터프레임의 "day", "hour" 컬럼 삭제
df_rain2.drop([" format: day", "hour"], axis=1, inplace=True)
df_temp.drop([" format: day", "hour"], axis=1, inplace=True)
df_hum.drop([" format: day", "hour"], axis=1, inplace=True)
df_wind.drop([" format: day", "hour"], axis=1, inplace=True)
df_wind2.drop([" format: day", "hour"], axis=1, inplace=True)
df_sky.drop([" format: day", "hour"], axis=1, inplace=True)

#### 1.3 하나로 데이터프레임 합치기

In [None]:
df_fin = pd.concat([df_rain, df_rain2, df_temp, df_hum, df_wind, df_wind2, df_sky], axis=1)

#### 1.4 데이터프레임에 NaN 값 삭제
- "format: day"에 "Start : 20180501 "의 형태의 행이 "hour", "value location:60_127 Start : 20180101"의 컬럼에 NaN 값을 가짐.

#### 1.5 "hour" 컬럼을 int 타입으로 변환



In [None]:
df_fin = df_fin[df_fin["hour"].isna() == False]

#### 1.6 "Date" 컬럼 생성

In [None]:
df_fin.columns = ['Date', 'hour', 'Rainfall(mm)', 'Precipitation type',
       'Temperature(℃)', 'Humidity(%)', 'Wind speed (m/s)', 'wind direction',
       'Sky type']

In [None]:
df_fin["Date"] = pd.date_range(start="2018-01-01", end="2018-12-31 23:00:00", freq="H").date

#### 1.7 csv 파일로 저장

In [None]:
df_fin.to_csv(f"/content/drive/MyDrive/weather_data/2018.csv", index=False)

### 2. 2019년도 날씨 데이터프레임으로 묶기

#### 2.1 데이터프레임 불러오기

In [None]:
df_rain = pd.read_csv("/content/drive/MyDrive/weather_data/2019/청운효자동_강수_201901_201912.csv")
df_rain2 = pd.read_csv("/content/drive/MyDrive/weather_data/2019/청운효자동_강수형태_201901_201912.csv")
df_temp = pd.read_csv("/content/drive/MyDrive/weather_data/2019/청운효자동_기온_201901_201912.csv")
df_hum = pd.read_csv("/content/drive/MyDrive/weather_data/2019/청운효자동_습도_201901_201912.csv")
df_wind = pd.read_csv("/content/drive/MyDrive/weather_data/2019/청운효자동_풍속_201901_201912.csv")
df_wind2 = pd.read_csv("/content/drive/MyDrive/weather_data/2019/청운효자동_풍향_201901_201912.csv")
df_sky = pd.read_csv("/content/drive/MyDrive/weather_data/2019/청운효자동_하늘상태_201901_201912.csv")

#### 2.2 각 데이터프레임 컬럼명 변경 및 삭제

In [None]:
# 컬럼명 변경
df_rain.rename(columns={"value location:60_127 Start : 20190101 ":"Rainfall(mm)"}, inplace=True)
df_rain2.rename(columns={"value location:60_127 Start : 20190101 ":"Precipitation type"}, inplace=True)
df_temp.rename(columns={"value location:60_127 Start : 20190101 ":"Temperature(℃)"}, inplace=True)
df_hum.rename(columns={"value location:60_127 Start : 20190101 ":"Humidity(%)"}, inplace=True)
df_wind.rename(columns={"value location:60_127 Start : 20190101 ":"Wind speed (m/s)"}, inplace=True)
df_wind2.rename(columns={"value location:60_127 Start : 20190101 ":"wind direction"}, inplace=True)
df_sky.rename(columns={"value location:60_127 Start : 20190101 ":"Sky type"}, inplace=True)

In [None]:
# 강수 데이터프레임을 제외한 데이터프레임의 "day", "hour" 컬럼 삭제
df_rain2.drop([" format: day", "hour"], axis=1, inplace=True)
df_temp.drop([" format: day", "hour"], axis=1, inplace=True)
df_hum.drop([" format: day", "hour"], axis=1, inplace=True)
df_wind.drop([" format: day", "hour"], axis=1, inplace=True)
df_wind2.drop([" format: day", "hour"], axis=1, inplace=True)
df_sky.drop([" format: day", "hour"], axis=1, inplace=True)

#### 2.3 하나로 데이터프레임 합치기

In [None]:
df_fin = pd.concat([df_rain, df_rain2, df_temp, df_hum, df_wind, df_wind2, df_sky], axis=1)

#### 2.4 데이터프레임에 NaN 값 삭제
- "format: day"에 "Start : 20180501 "의 형태의 행이 "hour", "value location:60_127 Start : 20180101"의 컬럼에 NaN 값을 가짐.

In [None]:
df_fin = df_fin[df_fin["hour"].isna() == False]

#### 2.5 "hour" 컬럼을 int 타입으로 변환

In [None]:
df_fin["hour"] = (df_fin["hour"] * 0.01).astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fin["hour"] = (df_fin["hour"] * 0.01).astype(int)


#### 2.6 "Date" 컬럼 생성

In [None]:
df_fin.columns = ['Date', 'hour', 'Rainfall(mm)', 'Precipitation type',
       'Temperature(℃)', 'Humidity(%)', 'Wind speed (m/s)', 'wind direction',
       'Sky type']

In [None]:
df_fin["Date"] = pd.date_range(start="2019-01-01", end="2019-12-31 23:00:00", freq="H").date

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_fin["Date"] = pd.date_range(start="2019-01-01", end="2019-12-31 23:00:00", freq="H").date


In [None]:
df_fin

Unnamed: 0,Date,hour,Rainfall(mm),Precipitation type,Temperature(℃),Humidity(%),Wind speed (m/s),wind direction,Sky type
0,2019-01-01,0,0.0,0.0,-4.8,48.0,2.2,296.0,-1.0
1,2019-01-01,1,0.0,0.0,-3.6,43.0,2.5,313.0,-1.0
2,2019-01-01,2,0.0,0.0,-2.4,47.0,3.2,280.0,-1.0
3,2019-01-01,3,0.0,0.0,-1.7,51.0,3.7,285.0,-1.0
4,2019-01-01,4,0.0,0.0,-1.7,50.0,3.0,285.0,-1.0
...,...,...,...,...,...,...,...,...,...
8766,2019-12-31,19,0.0,0.0,-5.0,55.0,0.0,0.0,-1.0
8767,2019-12-31,20,0.0,0.0,-4.6,58.0,1.6,111.0,-1.0
8768,2019-12-31,21,0.0,0.0,-4.5,64.0,2.2,63.0,-1.0
8769,2019-12-31,22,0.0,0.0,-4.4,65.0,0.1,37.0,-1.0


#### 2.7 csv 파일로 저장

In [None]:
df_fin.to_csv(f"/content/drive/MyDrive/weather_data/2019.csv", index=False)

### 3. 2020년도 날씨 데이터프레임으로 묶기

#### 3.1 데이터프레임 불러오기

In [None]:
df_rain = pd.read_csv("/content/drive/MyDrive/weather_data/2020/청운효자동_강수_202001_202012.csv")
df_rain2 = pd.read_csv("/content/drive/MyDrive/weather_data/2020/청운효자동_강수형태_202001_202012.csv")
df_temp = pd.read_csv("/content/drive/MyDrive/weather_data/2020/청운효자동_기온_202001_202012.csv")
df_hum = pd.read_csv("/content/drive/MyDrive/weather_data/2020/청운효자동_습도_202001_202012.csv")
df_wind = pd.read_csv("/content/drive/MyDrive/weather_data/2020/청운효자동_풍속_202001_202012.csv")
df_wind2 = pd.read_csv("/content/drive/MyDrive/weather_data/2020/청운효자동_풍향_202001_202012.csv")
df_sky = pd.read_csv("/content/drive/MyDrive/weather_data/2020/청운효자동_하늘상태_202001_202012.csv")

#### 3.2 각 데이터프레임 컬럼명 변경 및 삭제

In [None]:
# 컬럼명 변경
df_rain.rename(columns={"value location:60_127 Start : 20200101 ":"Rainfall(mm)"}, inplace=True)
df_rain2.rename(columns={"value location:60_127 Start : 20200101 ":"Precipitation type"}, inplace=True)
df_temp.rename(columns={"value location:60_127 Start : 20200101 ":"Temperature(℃)"}, inplace=True)
df_hum.rename(columns={"value location:60_127 Start : 20200101 ":"Humidity(%)"}, inplace=True)
df_wind.rename(columns={"value location:60_127 Start : 20200101 ":"Wind speed (m/s)"}, inplace=True)
df_wind2.rename(columns={"value location:60_127 Start : 20200101 ":"wind direction"}, inplace=True)
df_sky.rename(columns={"value location:60_127 Start : 20200101 ":"Sky type"}, inplace=True)

In [None]:
# 강수 데이터프레임을 제외한 데이터프레임의 "day", "hour" 컬럼 삭제
df_rain2.drop([" format: day", "hour"], axis=1, inplace=True)
df_temp.drop([" format: day", "hour"], axis=1, inplace=True)
df_hum.drop([" format: day", "hour"], axis=1, inplace=True)
df_wind.drop([" format: day", "hour"], axis=1, inplace=True)
df_wind2.drop([" format: day", "hour"], axis=1, inplace=True)
df_sky.drop([" format: day", "hour"], axis=1, inplace=True)

#### 3.3 하나로 데이터프레임 합치기

In [None]:
df_fin = pd.concat([df_rain, df_rain2, df_temp, df_hum, df_wind, df_wind2, df_sky], axis=1)

#### 3.4 데이터프레임에 NaN 값 삭제
- "format: day"에 "Start : 20180501 "의 형태의 행이 "hour", "value location:60_127 Start : 20180101"의 컬럼에 NaN 값을 가짐.

In [None]:
df_fin = df_fin[df_fin["hour"].isna() == False]

#### 3.5 "hour" 컬럼을 int 타입으로 변환

In [None]:
df_fin["hour"] = (df_fin["hour"] * 0.01).astype(int)

#### 3.6 "Date" 컬럼 생성

In [None]:
df_fin.columns = ['Date', 'hour', 'Rainfall(mm)', 'Precipitation type',
       'Temperature(℃)', 'Humidity(%)', 'Wind speed (m/s)', 'wind direction',
       'Sky type']

In [None]:
df_fin["Date"] = pd.date_range(start="2020-01-01", end="2020-12-31 23:00:00", freq="H").date

#### 3.7 csv 파일로 저장

In [None]:
df_fin.to_csv(f"/content/drive/MyDrive/weather_data/2020.csv", index=False)

### 4. 2021년도 날씨 데이터프레임으로 묶기

#### 4.1 데이터프레임 불러오기

In [None]:
df_rain = pd.read_csv("/content/drive/MyDrive/weather_data/2021/청운효자동_강수_202101_202112.csv")
df_rain2 = pd.read_csv("/content/drive/MyDrive/weather_data/2021/청운효자동_강수형태_202101_202112.csv")
df_temp = pd.read_csv("/content/drive/MyDrive/weather_data/2021/청운효자동_기온_202101_202112.csv")
df_hum = pd.read_csv("/content/drive/MyDrive/weather_data/2021/청운효자동_습도_202101_202112.csv")
df_wind = pd.read_csv("/content/drive/MyDrive/weather_data/2021/청운효자동_풍속_202101_202112.csv")
df_wind2 = pd.read_csv("/content/drive/MyDrive/weather_data/2021/청운효자동_풍향_202101_202112.csv")
df_sky = pd.read_csv("/content/drive/MyDrive/weather_data/2021/청운효자동_하늘상태_202101_202112.csv")

#### 4.2 각 데이터프레임 컬럼명 변경 및 삭제

In [None]:
# 컬럼명 변경
df_rain.rename(columns={"value location:60_127 Start : 20210101 ":"Rainfall(mm)"}, inplace=True)
df_rain2.rename(columns={"value location:60_127 Start : 20210101 ":"Precipitation type"}, inplace=True)
df_temp.rename(columns={"value location:60_127 Start : 20210101 ":"Temperature(℃)"}, inplace=True)
df_hum.rename(columns={"value location:60_127 Start : 20210101 ":"Humidity(%)"}, inplace=True)
df_wind.rename(columns={"value location:60_127 Start : 20210101 ":"Wind speed (m/s)"}, inplace=True)
df_wind2.rename(columns={"value location:60_127 Start : 20210101 ":"wind direction"}, inplace=True)
df_sky.rename(columns={"value location:60_127 Start : 20210101 ":"Sky type"}, inplace=True)

In [None]:
# 강수 데이터프레임을 제외한 데이터프레임의 "day", "hour" 컬럼 삭제
df_rain2.drop([" format: day", "hour"], axis=1, inplace=True)
df_temp.drop([" format: day", "hour"], axis=1, inplace=True)
df_hum.drop([" format: day", "hour"], axis=1, inplace=True)
df_wind.drop([" format: day", "hour"], axis=1, inplace=True)
df_wind2.drop([" format: day", "hour"], axis=1, inplace=True)
df_sky.drop([" format: day", "hour"], axis=1, inplace=True)

#### 4.3 하나로 데이터프레임 합치기

In [None]:
df_fin = pd.concat([df_rain, df_rain2, df_temp, df_hum, df_wind, df_wind2, df_sky], axis=1)

#### 4.4 데이터프레임에 NaN 값 삭제
- "format: day"에 "Start : 20180501 "의 형태의 행이 "hour", "value location:60_127 Start : 20180101"의 컬럼에 NaN 값을 가짐.

In [None]:
df_fin = df_fin[df_fin["hour"].isna() == False]

#### 4.5 "hour" 컬럼을 int 타입으로 변환

In [None]:
df_fin["hour"] = (df_fin["hour"] * 0.01).astype(int)

#### 4.6 "Date" 컬럼 생성

In [None]:
df_fin.columns = ['Date', 'hour', 'Rainfall(mm)', 'Precipitation type',
       'Temperature(℃)', 'Humidity(%)', 'Wind speed (m/s)', 'wind direction',
       'Sky type']

In [None]:
df_fin["Date"] = pd.date_range(start="2021-01-01", end="2021-12-31 23:00:00", freq="H").date

#### 4.7 csv 파일로 저장

In [None]:
df_fin.to_csv(f"/content/drive/MyDrive/weather_data/2021.csv", index=False)

### 5. 2022년도 날씨 데이터프레임으로 묶기

#### 5.1 데이터프레임 불러오기

In [None]:
df_rain = pd.read_csv("/content/drive/MyDrive/weather_data/2022/청운효자동_강수_202201_202212.csv")
df_rain2 = pd.read_csv("/content/drive/MyDrive/weather_data/2022/청운효자동_강수형태_202201_202212.csv")
df_temp = pd.read_csv("/content/drive/MyDrive/weather_data/2022/청운효자동_기온_202201_202212.csv")
df_hum = pd.read_csv("/content/drive/MyDrive/weather_data/2022/청운효자동_습도_202201_202212.csv")
df_wind = pd.read_csv("/content/drive/MyDrive/weather_data/2022/청운효자동_풍속_202201_202212.csv")
df_wind2 = pd.read_csv("/content/drive/MyDrive/weather_data/2022/청운효자동_풍향_202201_202212.csv")
#df_sky = pd.read_csv("/content/drive/MyDrive/weather_data/2022/청운효자동_하늘상태_202201_202212.csv")

#### 5.2 각 데이터프레임 컬럼명 변경 및 삭제

In [None]:
# 컬럼명 변경
df_rain.rename(columns={"value location:60_127 Start : 20220101 ":"Rainfall(mm)"}, inplace=True)
df_rain2.rename(columns={"value location:60_127 Start : 20220101 ":"Precipitation type"}, inplace=True)
df_temp.rename(columns={"value location:60_127 Start : 20220101 ":"Temperature(℃)"}, inplace=True)
df_hum.rename(columns={"value location:60_127 Start : 20220101 ":"Humidity(%)"}, inplace=True)
df_wind.rename(columns={"value location:60_127 Start : 20220101 ":"Wind speed (m/s)"}, inplace=True)
df_wind2.rename(columns={"value location:60_127 Start : 20220101 ":"wind direction"}, inplace=True)
#df_sky.rename(columns={"value location:59_75  format: day":"Sky type"}, inplace=True)

In [None]:
# 강수 데이터프레임을 제외한 데이터프레임의 "day", "hour" 컬럼 삭제
df_rain2.drop([" format: day", "hour"], axis=1, inplace=True)
df_temp.drop([" format: day", "hour"], axis=1, inplace=True)
df_hum.drop([" format: day", "hour"], axis=1, inplace=True)
df_wind.drop([" format: day", "hour"], axis=1, inplace=True)
df_wind2.drop([" format: day", "hour"], axis=1, inplace=True)
#df_sky.drop([" format: day", "hour"], axis=1, inplace=True)

#### 5.3 하나로 데이터프레임 합치기

In [None]:
df_fin = pd.concat([df_rain, df_rain2, df_temp, df_hum, df_wind, df_wind2], axis=1)

#### 5.4 데이터프레임에 NaN 값 삭제
- "format: day"에 "Start : 20180501 "의 형태의 행이 "hour", "value location:60_127 Start : 20180101"의 컬럼에 NaN 값을 가짐.

In [None]:
df_fin = df_fin[df_fin["hour"].isna() == False]

#### 5.5 "hour" 컬럼을 int 타입으로 변환

In [None]:
df_fin["hour"] = (df_fin["hour"] * 0.01).astype(int)

#### 5.6 "Date" 컬럼 생성

In [None]:
df_fin.columns = ['Date', 'hour', 'Rainfall(mm)', 'Precipitation type',
       'Temperature(℃)', 'Humidity(%)', 'Wind speed (m/s)', 'wind direction']

In [None]:
df_fin["Date"] = pd.date_range(start="2022-01-01", end="2022-12-31 23:00:00", freq="H").date

#### 5.7 csv 파일로 저장

In [None]:
df_fin.to_csv(f"/content/drive/MyDrive/weather_data/2022.csv", index=False)

### 6. 2023년도 날씨 데이터프에미으로 묶기

#### 6.1 데이터프레임 불러오기

In [None]:
df_rain = pd.read_csv("/content/drive/MyDrive/weather_data/2023/청운효자동_강수_202301_202312.csv")
df_rain2 = pd.read_csv("/content/drive/MyDrive/weather_data/2023/청운효자동_강수형태_202301_202312.csv")
df_temp = pd.read_csv("/content/drive/MyDrive/weather_data/2023/청운효자동_기온_202301_202312.csv")
df_hum = pd.read_csv("/content/drive/MyDrive/weather_data/2023/청운효자동_습도_202301_202312.csv")
df_wind = pd.read_csv("/content/drive/MyDrive/weather_data/2023/청운효자동_풍속_202301_202312.csv")
df_wind2 = pd.read_csv("/content/drive/MyDrive/weather_data/2023/청운효자동_풍향_202301_202312.csv")
df_sky = pd.read_csv("/content/drive/MyDrive/weather_data/2023/청운효자동_하늘상태_202301_202312.csv")

#### 6.2 각 데이터프레임 컬럼명 변경 및 삭제

In [None]:
# 컬럼명 변경
df_rain.rename(columns={"value location:60_127 Start : 20230101 ":"Rainfall(mm)"}, inplace=True)
df_rain2.rename(columns={"value location:60_127 Start : 20230101 ":"Precipitation type"}, inplace=True)
df_temp.rename(columns={"value location:60_127 Start : 20230101 ":"Temperature(℃)"}, inplace=True)
df_hum.rename(columns={"value location:60_127 Start : 20230101 ":"Humidity(%)"}, inplace=True)
df_wind.rename(columns={"value location:60_127 Start : 20230101 ":"Wind speed (m/s)"}, inplace=True)
df_wind2.rename(columns={"value location:60_127 Start : 20230101 ":"wind direction"}, inplace=True)
df_sky.rename(columns={"value location:60_127 Start : 20230101 ":"Sky type"}, inplace=True)

In [None]:
# 강수 데이터프레임을 제외한 데이터프레임의 "day", "hour" 컬럼 삭제
df_rain2.drop([" format: day", "hour"], axis=1, inplace=True)
df_temp.drop([" format: day", "hour"], axis=1, inplace=True)
df_hum.drop([" format: day", "hour"], axis=1, inplace=True)
df_wind.drop([" format: day", "hour"], axis=1, inplace=True)
df_wind2.drop([" format: day", "hour"], axis=1, inplace=True)
df_sky.drop([" format: day", "hour"], axis=1, inplace=True)

#### 6.3 하나로 데이터프레임 합치기

In [None]:
df_fin = pd.concat([df_rain, df_rain2, df_temp, df_hum, df_wind, df_wind2, df_sky], axis=1)

#### 6.4 데이터프레임에 NaN 값 삭제
- "format: day"에 "Start : 20180501 "의 형태의 행이 "hour", "value location:60_127 Start : 20180101"의 컬럼에 NaN 값을 가짐.

In [None]:
df_fin = df_fin[df_fin["hour"].isna() == False]

#### 6.5 "hour" 컬럼을 int 타입으로 변환

In [None]:
df_fin["hour"] = (df_fin["hour"] * 0.01).astype(int)

#### 6.6 "Date" 컬럼 생성

In [None]:
df_fin.columns = ['Date', 'hour', 'Rainfall(mm)', 'Precipitation type',
       'Temperature(℃)', 'Humidity(%)', 'Wind speed (m/s)', 'wind direction',
       'Sky type']

In [None]:
df_fin["Date"] = pd.date_range(start="2023-01-01", end="2023-12-31 23:00:00", freq="H").date

#### 6.7 csv 파일로 저장

In [None]:
df_fin.to_csv(f"/content/drive/MyDrive/weather_data/2023.csv", index=False)