<a href="https://colab.research.google.com/github/HB-enpitsu/garakuta/blob/main/covid19_ehime.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
from urllib.parse import urljoin

In [3]:
import pandas as pd

In [4]:
pd.options.plotting.backend = "plotly"

# 定点医療機関からの患者報告数

## CSVのURLを抽出

In [5]:
url = "https://www.pref.ehime.jp/h25500/kansen/covid19.html"

In [6]:
r = requests.get(url)
r.raise_for_status()

soup = BeautifulSoup(r.content, "html.parser")

In [7]:
# 定点医療機関あたり新型コロナ患者数の推移のCSVのURLを抽出
href = soup.select_one('div.section > a[href$=".csv"]').get("href")

In [8]:
# 相対アドレスから絶対アドレスに変換
link = urljoin(url, href)

In [9]:
def complete_year(se, year=2023):

    # 日付を月と日に分割しDataFrameに変換
    df = se.str.split("/", expand=True).astype(int)
    df.columns = ["month", "day"]

    # 年を設定
    df["year"] = year

    # 月の変化を検出して年を調整
    df["before"] = df["month"].shift(1).fillna(method="bfill").astype(int)
    df["year"] = df["year"] + (df["before"] > df["month"]).cumsum()

    # 日付に変換
    result = pd.to_datetime(df[["year", "month", "day"]])

    return result

## CSVの日付から年を補完

In [10]:
# 患者数のみ
df0 = pd.read_csv(link, encoding="cp932", skiprows=1, usecols=range(10))

In [11]:
# 年を補完、日付に変換
df0["日付"] = complete_year(df0["日付"].str.split("-").str[1], 2022)

In [12]:
# 日付をindex、週を削除
df1 = df0.set_index("日付").drop("週", axis=1)

## グラフ作成

In [13]:
df1.plot()

In [14]:
df1.to_csv("covid19.csv")

In [15]:
df1

Unnamed: 0_level_0,四国中央,西条,今治,松山市,中予,八幡浜,宇和島,県内合計
日付,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2022-10-02,23,152,24,79,22,16,42,358
2022-10-09,16,238,34,69,20,16,23,416
2022-10-16,11,217,42,42,28,19,32,391
2022-10-23,25,330,23,38,25,5,19,465
2022-10-30,33,425,44,49,28,5,42,626
2022-11-06,83,439,60,95,41,24,30,772
2022-11-13,77,385,46,120,56,34,34,752
2022-11-20,106,316,67,172,76,32,47,816
2022-11-27,70,279,64,190,102,30,95,830
2022-12-04,99,259,76,230,161,100,98,1023
