In [36]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import os

In [39]:
os.chdir("c:\\Users\\astro\\Documents\\Python_Project\\ForecastCloudinJapan")

In [6]:
def make_soup(url:str):
    res = requests.get(url)
    return BeautifulSoup(res.content,"html.parser")

In [43]:
year = 2023
month = 1
day = 1
base_url = f"https://www.data.jma.go.jp/obd/stats/etrn/view/hourly_s1.php"

soup = make_soup(base_url)

In [46]:
class Scrape:
    def __init__(self,base_url:str,year:int,month:int,day:int,prec_no=44,block_no=47662):
        self.prec_no = prec_no
        self.block_no = block_no
        self.year = year
        self.month = month
        self.day = day
        self.url = base_url+f"?prec_no={prec_no}&block_no={block_no}&year={year}&month={month}&day={day}&view=p1"
        self.soup = make_soup(self.url)
        self.cols = [
            "時間","現地気圧","海面気圧","降水量","気温",
            "露点温度","蒸気圧","湿度","風速",
            "風向","日照時間","全天日射量","降雪","積雪",
            "天気","雲量","視程"
        ]
        self.astype_cols = {
            "時間":int, "現地気圧":float, "海面気圧":float,
            "気温":float, "露点温度":float,
            "蒸気圧":float, "湿度":int, "風速":float,
            "日照時間":float, "全天日射量":float, "視程":float
        }
    
    def get_raw(self,hour:int) -> list:
        raw = self.soup.select(f"#tablefix1 > tr:nth-child({3+hour})")[0]
        return [i.text for i in raw.find_all("td")]
    
    def get_all_raw(self) -> list:
        return [self.get_raw(i) for i in range(24)]
    
    def ret_df(self) -> pd.DataFrame:
        df = pd.DataFrame(data=self.get_all_raw(),columns=self.cols)

        df.replace({"--":0,"":0},inplace=True)
        df = df.astype(self.astype_cols)
        return df
    
    def write2csv(self,path="obsData/"):
        path += f"{self.prec_no}_{self.block_no}/{self.year}/{self.month}/{self.day}/"

        os.makedirs(path,exist_ok=True)

        self.ret_df().to_csv(path+f"{self.prec_no}_{self.block_no}_{self.year}_{self.month}_{self.day}.csv",index=False)

-----------

In [47]:
sc = Scrape(base_url,2022,1,1)

In [48]:
sc.write2csv()