In [19]:
import os
import pandas as pd
from datetime import datetime
import requests
import dotenv


dotenv.load_dotenv()
API_KEY = os.getenv("API_KEY")
tm = datetime.now().strftime("%Y%m%H%M")
stn = 0  # 지상 관측 지점 코드

In [20]:
class CreateDataFrameFromText:
    """
    기상청에서 api 추출 후 데이터 파싱
    - 형식이 제대로 되어있지 않아서 공백split()을 통해서 파싱 처리
    - 헤더 정보 추출 시 주석에서 헤더 정보 추출
    - 데이터 파싱 시 예외 케이스 처리
    """

    def __init__(self, url, header=None):
        self.url = url
        self.header = header

    def get_response(self):
        response = requests.get(self.url)
        response = response.text
        return response

    def get_data(self, txt):
        """
        데이터 파싱
        """

        # 예외 케이스
        txt = txt.replace(" Gun", "gun")

        lines = txt.strip().split("\n")
        for idx, line in enumerate(lines):
            if not line.startswith("#"):
                break
        data_rows = lines[idx:-1]
        data = [data_row.strip().split() for data_row in data_rows]
        return data

    def get_header_data(self, txt):
        """주석에서 헤더 정보 추출"""

        headers = []
        lines = txt.strip().split("\n")

        for line in lines:
            if line.startswith("#") and ":" in line:
                header_name = line.split(":")[0].split()[-1]
                headers.append(header_name)

        return headers

    def check_data(self, header, columns):
        print(f"header {header}, len: {len(header)}")
        for i in columns:
            if len(i) != len(header):
                print(f"column len: {i},{len(i)}")
                return False
        return True

    def create_dataframe(self):
        txt = self.get_response()
        data = self.get_data(txt)

        if self.header:
            header = self.header
        else:
            header = self.get_header_data(txt)

        if not self.check_data(header, data):
            raise ValueError("header와 columns의 길이가 다릅니다.")

        df = pd.DataFrame(data, columns=header)
        return df

In [22]:
url_location = f"https://apihub.kma.go.kr/api/typ01/url/stn_inf.php?inf=SFC&tm={tm}&help=1&authKey={API_KEY}"
# 지상 관측 지점 정보

header_location = [
    "STN_ID",
    "LON",
    "LAT",
    "STN_SP",
    "HT",
    "HT_PA",
    "HT_TA",
    "HT_WD",
    "HT_RN",
    "STN_CD",
    "STN_KO",
    "STN_EN",
    "FCT_ID",
    "LAW_ID",
    "BASIN",
]

df = CreateDataFrameFromText(url_location, header_location).create_dataframe()

print(df.head())

header ['STN_ID', 'LON', 'LAT', 'STN_SP', 'HT', 'HT_PA', 'HT_TA', 'HT_WD', 'HT_RN', 'STN_CD', 'STN_KO', 'STN_EN', 'FCT_ID', 'LAW_ID', 'BASIN'], len: 15
  STN_ID           LON          LAT STN_SP      HT   HT_PA HT_TA  HT_WD HT_RN  \
0     90  128.56473000  38.25085000  35100   17.53   18.73  1.70  10.00  1.40   
1     93  127.75443000  37.94738000  31201   95.78   96.78  1.50  10.00  1.40   
2     95  127.30420000  38.14787000  31110  155.48  156.98  1.80  13.00  1.50   
3     98  127.06070000  37.90188000  22200  115.62  116.74  1.70  10.00  1.00   
4     99  126.76648000  37.88589000  22300   30.59   31.99  1.70  10.00  1.00   

  STN_CD STN_KO        STN_EN    FCT_ID      LAW_ID BASIN  
0     90     속초        Sokcho  11D20401  5182033035  ----  
1    101    북춘천  Bukchuncheon  11D10301  5111025024  ----  
2    101     철원      Cheorwon  11D10101  5178025624  ----  
3    119    동두천   Dongducheon  11B20401  4125010300  ----  
4    119     파주          Paju  11B20305  4148025025  ----  


In [None]:
location_df = pd.read_csv("./location.csv")

print(location_df.head())

   STN_ID        LON       LAT  STN_SP      HT   HT_PA  HT_TA  HT_WD  HT_RN  \
0      90  128.56473  38.25085   35100   17.53   18.73    1.7   10.0    1.4   
1      93  127.75443  37.94738   31201   95.78   96.78    1.5   10.0    1.4   
2      95  127.30420  38.14787   31110  155.48  156.98    1.8   13.0    1.5   
3      98  127.06070  37.90188   22200  115.62  116.74    1.7   10.0    1.0   
4      99  126.76648  37.88589   22300   30.59   31.99    1.7   10.0    1.0   

   STN_CD STN_KO        STN_EN    FCT_ID      LAW_ID BASIN  
0      90     속초        Sokcho  11D20401  5182033035  ----  
1     101    북춘천  Bukchuncheon  11D10301  5111025024  ----  
2     101     철원      Cheorwon  11D10101  5178025624  ----  
3     119    동두천   Dongducheon  11B20401  4125010300  ----  
4     119     파주          Paju  11B20305  4148025025  ----  


In [2]:
selected_columns = ["STN_ID", "LON", "LAT", "STN_KO", "STN_EN", "FCT_ID"]
df_selected = location_df[selected_columns]

print(df_selected.head())

   STN_ID        LON       LAT STN_KO        STN_EN    FCT_ID
0      90  128.56473  38.25085     속초        Sokcho  11D20401
1      93  127.75443  37.94738    북춘천  Bukchuncheon  11D10301
2      95  127.30420  38.14787     철원      Cheorwon  11D10101
3      98  127.06070  37.90188    동두천   Dongducheon  11B20401
4      99  126.76648  37.88589     파주          Paju  11B20305


In [4]:
headers = ["std_id", "lon", "lat", "stn_ko", "stn_en", "fct_id"]

df_selected.columns = headers

print(df_selected.head())

df_selected.to_csv("./../../data/common/observation_location.csv", index=False)

   std_id        lon       lat stn_ko        stn_en    fct_id
0      90  128.56473  38.25085     속초        Sokcho  11D20401
1      93  127.75443  37.94738    북춘천  Bukchuncheon  11D10301
2      95  127.30420  38.14787     철원      Cheorwon  11D10101
3      98  127.06070  37.90188    동두천   Dongducheon  11B20401
4      99  126.76648  37.88589     파주          Paju  11B20305
