In [1]:
from google.colab import drive 
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
import os
import pathlib
import datetime
import numpy as np
import pandas as pd

## ASOS data

In [None]:
def LoadData(path):
  file_list = os.listdir(path)
  df = pd.DataFrame()

  #ASOS 데이터 불러오기
  for i in file_list:
    print(i)
    ASOSdata = pd.read_csv(path+i, encoding='CP949')
    df = pd.concat([df, ASOSdata])
  df = df.sort_values('일시').reset_index(drop=True)

  # 시간, 년도 변수 생성
  df['시간'] = pd.to_datetime(df['일시']).dt.hour
  df['년도'] = pd.to_datetime(df['일시']).dt.year

  #변수 단위 제거하고 이름만 출력
  col_list = df.columns
  col_rename = []
  for i in col_list:
    name = i.strip().split("(")[0]
    col_rename.append(name)
  df.columns = col_rename 
  return df

In [None]:
path = '/content/gdrive/MyDrive/SolarPower/ASOSdata/'
ASOSdata = LoadData(path)

OBS_ASOS_TIM_2019.csv
OBS_ASOS_TIM_2018.csv
OBS_ASOS_TIM_2017.csv
OBS_ASOS_TIM_2020.csv


In [None]:
ASOSdata = ASOSdata[ASOSdata['지점명'].isin(['진주', '강릉'])]
print(ASOSdata.shape)
ASOSdata.head()

(70121, 15)


Unnamed: 0,지점,지점명,일시,기온,강수량,풍속,풍향,습도,해면기압,일조,일사,전운량,시정,시간,년도
1,105,강릉,2017-01-01 00:00,4.3,,4.1,290.0,45.0,1026.8,,,,3870.0,0,2017
2,192,진주,2017-01-01 00:00,-2.4,,0.0,0.0,85.0,1030.4,,,,720.0,0,2017
3,105,강릉,2017-01-01 01:00,4.3,,5.1,290.0,46.0,1027.2,,,,3874.0,1,2017
4,192,진주,2017-01-01 01:00,-3.0,,0.1,0.0,86.0,1030.1,,,,700.0,1,2017
6,105,강릉,2017-01-01 02:00,4.4,,4.4,290.0,44.0,1027.0,,,,4019.0,2,2017


In [None]:
path = '/content/gdrive/MyDrive/SolarPower/ASOS_Mokpo/'
Mokpo = LoadData(path)

KMA_Mokpo_2013.csv
KMA_Mokpo_2014.csv
KMA_Mokpo_2015.csv
KMA_Mokpo_2016.csv
KMA_Mokpo_2017.csv
KMA_Mokpo_2018.csv
KMA_Mokpo_2019.csv


In [None]:
print(Mokpo.shape)
Mokpo.head()

(61343, 15)


Unnamed: 0,지점,지점명,일시,기온,강수량,풍속,풍향,습도,해면기압,일조,일사,전운량,시정,시간,년도
0,165,목포,2013-01-01 00:00,-2.0,,4.1,160.0,93.0,1021.4,,,3.0,2000.0,0,2013
1,165,목포,2013-01-01 01:00,-2.2,,3.7,160.0,95.0,1021.3,,,,,1,2013
2,165,목포,2013-01-01 02:00,-2.4,,3.6,160.0,95.0,1021.2,,,,,2,2013
3,165,목포,2013-01-01 03:00,-2.7,,3.6,160.0,95.0,1020.8,,,7.0,1800.0,3,2013
4,165,목포,2013-01-01 04:00,-2.9,,4.0,160.0,94.0,1020.0,,,7.0,1800.0,4,2013


In [None]:
ASOSdata = pd.concat([ASOSdata, Mokpo])
ASOSdata.shape

(131464, 15)

In [None]:
ASOSdata['일자'] = pd.to_datetime(ASOSdata['일시'].str[:10])
ASOSdata.drop(columns=['지점', '일시'], inplace=True)
ASOSdata

Unnamed: 0,지점명,기온,강수량,풍속,풍향,습도,해면기압,일조,일사,전운량,시정,시간,년도,일자
1,강릉,4.3,,4.1,290.0,45.0,1026.8,,,,3870.0,0,2017,2017-01-01
2,진주,-2.4,,0.0,0.0,85.0,1030.4,,,,720.0,0,2017,2017-01-01
3,강릉,4.3,,5.1,290.0,46.0,1027.2,,,,3874.0,1,2017,2017-01-01
4,진주,-3.0,,0.1,0.0,86.0,1030.1,,,,700.0,1,2017,2017-01-01
6,강릉,4.4,,4.4,290.0,44.0,1027.0,,,,4019.0,2,2017,2017-01-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61338,목포,-2.0,,8.2,360.0,61.0,1034.0,,,5.0,2000.0,19,2019,2019-12-31
61339,목포,-2.8,,4.6,20.0,65.0,1033.9,,,7.0,2000.0,20,2019,2019-12-31
61340,목포,-2.8,,3.3,20.0,65.0,1033.9,,,9.0,2000.0,21,2019,2019-12-31
61341,목포,-2.6,,4.7,20.0,63.0,1033.5,,,9.0,2000.0,22,2019,2019-12-31


## Particulate Matter Data

In [None]:
def LoadPM(name, area):
  PMdata = pd.DataFrame()
  path = '/content/gdrive/MyDrive/SolarPower/PMdata/'
  file_list = os.listdir(path)
  
  #형식에 맞게 데이터 불러오기
  for i in file_list:
    p = pathlib.Path(path+i)
    Extension = p.suffix
    if Extension == '.xlsx':
      data = pd.read_excel(p)
    else:
      try:
        data = pd.read_csv(p, encoding = 'CP949')
      except:
        data = pd.read_csv(p, encoding = 'utf8')
    data = data[data[name].isin(area)]
    PMdata = pd.concat([PMdata, data]).reset_index(drop=True)

  # 일시 컬럼 수정
  PMdata['시간'] = PMdata['측정일시'].astype(str).str[-2:]
  PMdata['시간'] = PMdata['시간'].astype(int)
  PMdata['일자'] = PMdata['측정일시'].astype(str).str[:-2]
  PMdata['년도'] = PMdata['측정일시'].astype(str).str[:4]

  PMdata.sort_values(['일자'], inplace=True)
  return PMdata

In [None]:
PMdata = LoadPM('지역',['경남 진주시', '강원 강릉시', '전남 목포시'])
PMdata.to_csv('/content/gdrive/MyDrive/SolarPower/PMdata.csv', index = None)
PMdata

Unnamed: 0,지역,측정소코드,측정소명,측정일시,SO2,CO,O3,NO2,PM10,PM25,주소,망,시간,일자,년도
216168,경남 진주시,238133,상대동,2013010101,0.006,0.6,0.009,0.030,45.0,,경남 진주시 상대동,,1,20130101,2013
218349,전남 목포시,336111,용당동,2013010122,0.010,0.8,0.009,0.038,133.0,,전남 목포시 용당2동,,22,20130101,2013
218350,전남 목포시,336111,용당동,2013010123,0.010,0.6,0.010,0.031,134.0,,전남 목포시 용당2동,,23,20130101,2013
218351,전남 목포시,336111,용당동,2013010124,0.011,0.6,0.015,0.024,135.0,,전남 목포시 용당2동,,24,20130101,2013
211869,경남 진주시,238131,상봉동,2013010122,0.010,1.2,0.007,0.050,148.0,,경남 진주시 상봉동,,22,20130101,2013
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
208106,경남 진주시,238132,대안동,2020123103,0.002,0.3,0.038,0.002,16.0,3.0,경남 진주시 진주대로 1052,도시대기,3,20201231,2020
208107,경남 진주시,238132,대안동,2020123104,0.002,0.3,0.038,0.002,8.0,2.0,경남 진주시 진주대로 1052,도시대기,4,20201231,2020
208108,경남 진주시,238132,대안동,2020123105,0.002,0.3,0.037,0.003,7.0,2.0,경남 진주시 진주대로 1052,도시대기,5,20201231,2020
210359,전남 목포시,336111,용당동,2020123124,0.001,0.4,0.030,0.005,20.0,6.0,전남 목포시 동부로 31번길 20(용당1동 주민센터 옥상),도시대기,24,20201231,2020


In [None]:
for i in ['경남 진주시', '강원 강릉시', '전남 목포시']:
  data = PMdata[PMdata['지역'] == i ]
  area = data['측정소명'].unique()
  print(f'{i} : {data.shape}, 측정소 : {area}')

경남 진주시 : (211848, 15), 측정소 : ['상대동' '상봉동' '대안동' '상대동(진주)' '정촌면']
강원 강릉시 : (70128, 15), 측정소 : ['옥천동']
전남 목포시 : (134424, 15), 측정소 : ['용당동' '부흥동']


In [None]:
Jinju_df = PMdata[PMdata['지역'] == '경남 진주시']
for i in ['상봉동','상대동(진주)','대안동', '상대동', '정촌면']:
  j = Jinju_df[Jinju_df['측정소명'] == i]
  print(f'{i} : {j.shape}')

상봉동 : (70128, 15)
상대동(진주) : (26304, 15)
대안동 : (70128, 15)
상대동 : (43824, 15)
정촌면 : (1464, 15)


In [None]:
Mokpo_df = PMdata[PMdata['지역'] == '전남 목포시']
for i in ['용당동', '부흥동']:
  j = Mokpo_df[Mokpo_df['측정소명'] == i]
  print(f'{i} : {j.shape}')

용당동 : (70128, 15)
부흥동 : (64296, 15)


In [None]:
PMdata = PMdata[PMdata['측정소명'].isin(['대안동','옥천동','용당동'])]
PMdata = PMdata[['지역','일자','년도','시간','PM10','PM25']]
PMdata['지점명'] = PMdata['지역'].str[-3:-1]
PMdata.reset_index(drop=True, inplace=True)
PMdata

Unnamed: 0,지역,일자,년도,시간,PM10,PM25,지점명
0,전남 목포시,20130101,2013,22,133.0,,목포
1,전남 목포시,20130101,2013,23,134.0,,목포
2,전남 목포시,20130101,2013,24,135.0,,목포
3,강원 강릉시,20130101,2013,2,28.0,,강릉
4,경남 진주시,20130101,2013,24,140.0,,진주
...,...,...,...,...,...,...,...
210379,경남 진주시,20201231,2020,3,16.0,3.0,진주
210380,경남 진주시,20201231,2020,4,8.0,2.0,진주
210381,경남 진주시,20201231,2020,5,7.0,2.0,진주
210382,전남 목포시,20201231,2020,24,20.0,6.0,목포


## Mapping

In [None]:
print(f'ASOS : {ASOSdata.shape}, PM : {PMdata.shape}')

ASOS : (131464, 14), PM : (210384, 7)


In [None]:
PMdata['시간'].unique()

array([22, 23, 24,  2, 21, 20, 19, 18, 17, 16,  3,  4,  5,  6,  7,  8,  9,
       10, 11, 12, 13, 14, 15,  1])

In [None]:
for i in range(len(PMdata)): #매핑하기 위해서 시간이 24인 값은 임의로 0으로 변경
  if PMdata['시간'][i] == 24:
    PMdata.at[i,'시간'] = 0
PMdata['시간'].unique()

array([22, 23,  0,  2, 21, 20, 19, 18, 17, 16,  3,  4,  5,  6,  7,  8,  9,
       10, 11, 12, 13, 14, 15,  1])

In [None]:
PMdata['일자'] = pd.to_datetime(PMdata['일자'], format="%Y%M%d").dt.strftime("%Y-%M-%d")
PMdata['일자'] = pd.to_datetime(PMdata['일자'])
PMdata.head()

Unnamed: 0,지역,일자,년도,시간,PM10,PM25,지점명
0,전남 목포시,2013-01-01,2013,22,133.0,,목포
1,전남 목포시,2013-01-01,2013,23,134.0,,목포
2,전남 목포시,2013-01-01,2013,0,135.0,,목포
3,강원 강릉시,2013-01-01,2013,2,28.0,,강릉
4,경남 진주시,2013-01-01,2013,0,140.0,,진주


In [None]:
ASOSdata.head()

Unnamed: 0,지점명,기온,강수량,풍속,풍향,습도,해면기압,일조,일사,전운량,시정,시간,년도,일자
1,강릉,4.3,,4.1,290.0,45.0,1026.8,,,,3870.0,0,2017,2017-01-01
2,진주,-2.4,,0.0,0.0,85.0,1030.4,,,,720.0,0,2017,2017-01-01
3,강릉,4.3,,5.1,290.0,46.0,1027.2,,,,3874.0,1,2017,2017-01-01
4,진주,-3.0,,0.1,0.0,86.0,1030.1,,,,700.0,1,2017,2017-01-01
6,강릉,4.4,,4.4,290.0,44.0,1027.0,,,,4019.0,2,2017,2017-01-01


In [None]:
feature_df = pd.merge(ASOSdata, PMdata, on=['지점명','일자','시간','년도'], how='left')
feature_df

Unnamed: 0,지점명,기온,강수량,풍속,풍향,습도,해면기압,일조,일사,전운량,시정,시간,년도,일자,지역,PM10,PM25
0,강릉,4.3,,4.1,290.0,45.0,1026.8,,,,3870.0,0,2017,2017-01-01,강원 강릉시,58.0,46.0
1,진주,-2.4,,0.0,0.0,85.0,1030.4,,,,720.0,0,2017,2017-01-01,경남 진주시,79.0,
2,강릉,4.3,,5.1,290.0,46.0,1027.2,,,,3874.0,1,2017,2017-01-01,강원 강릉시,63.0,51.0
3,진주,-3.0,,0.1,0.0,86.0,1030.1,,,,700.0,1,2017,2017-01-01,경남 진주시,58.0,
4,강릉,4.4,,4.4,290.0,44.0,1027.0,,,,4019.0,2,2017,2017-01-01,강원 강릉시,63.0,57.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131459,목포,-2.0,,8.2,360.0,61.0,1034.0,,,5.0,2000.0,19,2019,2019-12-31,전남 목포시,17.0,7.0
131460,목포,-2.8,,4.6,20.0,65.0,1033.9,,,7.0,2000.0,20,2019,2019-12-31,전남 목포시,13.0,6.0
131461,목포,-2.8,,3.3,20.0,65.0,1033.9,,,9.0,2000.0,21,2019,2019-12-31,전남 목포시,17.0,8.0
131462,목포,-2.6,,4.7,20.0,63.0,1033.5,,,9.0,2000.0,22,2019,2019-12-31,전남 목포시,13.0,5.0


In [None]:
feature_df.to_csv('/content/gdrive/MyDrive/SolarPower/FeatureData.csv', index = None)