In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from IPython.display import display
from datetime import datetime

# 지역 이름과 코드 매핑
regions = {
    "남구": "5",
    "부산진구": "8",
    "해운대구": "18",
    "동래구": "7",
    "사하구": "12",
    "북구": "10",
    "사상구": "11"
}

# 고정 날짜 설정 (2020년 4분기 기준 날짜)
target_date = "2020-10-15"

# 데이터 수집 함수
def fetch_region_data(region_name, region_code, date):
    url = "https://www.busan.go.kr/depart/abnecessity01"
    params = {
        "isMart": "true",
        "kind": "",
        "schDate": date,
        "schCode": "0",
        "schLocale": region_code,
        "schMart": "0"
    }

    response = requests.get(url, params=params)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')
    table = soup.find("table", class_="boardList")

    region_data = []

    if table:
        rows = table.find_all("tr")
        for row in rows:
            tds = row.find_all("td")
            td_texts = [td.get_text(strip=True) for td in tds]
            if len(td_texts) >= 5:
                item_name = td_texts[0]
                spec = td_texts[1]
                try:
                    lowest_price = int(td_texts[2].replace(",", ""))
                    highest_price = int(td_texts[3].replace(",", ""))
                except ValueError:
                    continue
                avg_price = td_texts[4]
                region_data.append([date, item_name, spec, lowest_price, highest_price, avg_price, region_name])
    return region_data

# 데이터 수집 실행
all_data = []
for region_name, region_code in regions.items():
    all_data.extend(fetch_region_data(region_name, region_code, target_date))

# DataFrame 생성
columns = ["날짜", "품목", "규격", "최저가", "최고가", "금주평균가", "지역"]
df = pd.DataFrame(all_data, columns=columns)

# 최저가, 최고가 각각 추출
min_df = df.loc[df.groupby("품목")["최저가"].idxmin()][["품목", "규격", "최저가", "지역", "금주평균가"]].reset_index(drop=True)
max_df = df.loc[df.groupby("품목")["최고가"].idxmax()][["품목", "최고가", "지역"]].reset_index(drop=True)

# 컬럼명 변경
min_df = min_df.rename(columns={"지역": "최저가_지역"})
max_df = max_df.rename(columns={"지역": "최고가_지역"})

# 병합 및 계산
merged_df = pd.merge(min_df, max_df, on="품목")
merged_df["가격차이"] = merged_df["최고가"] - merged_df["최저가"]
merged_df["차이비율(%)"] = (merged_df["가격차이"] / merged_df["최저가"] * 100).round(2).astype(str) + "%"

# 날짜 열 추가
merged_df["날짜"] = target_date

# 열 정리 및 출력
merged_df = merged_df[["날짜", "품목", "규격", "최저가", "최저가_지역", "최고가", "최고가_지역", "가격차이", "차이비율(%)", "금주평균가"]]
display(merged_df)

# 저장 (선택)
merged_df.to_csv("2020_total_4q.csv", index=False, encoding="utf-8-sig")

Unnamed: 0,날짜,품목,규격,최저가,최저가_지역,최고가,최고가_지역,가격차이,차이비율(%),금주평균가
0,2020-10-15,가루비누,"세탁용세제(LG테크),가루형,3.0kg",5175,사상구,14850,해운대구,9675,186.96%,8550
1,2020-10-15,간장,오복왕표 0.9ℓ 1병,5200,북구,11900,사상구,6700,128.85%,5775
2,2020-10-15,갈치,60cm정도 500g 1마리(냉동),5000,북구,18000,북구,13000,260.0%,11500
3,2020-10-15,고등어,30㎝정도 500g 1마리,2683,해운대구,5990,사하구,3307,123.26%,3637
4,2020-10-15,달걀,오경슈퍼란60g정도 10개,2650,북구,3290,부산진구,640,24.15%,2815
5,2020-10-15,닭고기,육계 1.0㎏,4280,동래구,9680,사상구,5400,126.17%,6190
6,2020-10-15,대파,1.0㎏,3490,사상구,6458,남구,2968,85.04%,4233
7,2020-10-15,돼지고기,삼겹살 500g,8900,동래구,16950,부산진구,8050,90.45%,9400
8,2020-10-15,두부,500g 판두부(국산포장두부 420g) 1모,1500,북구,4879,해운대구,3379,225.27%,3016
9,2020-10-15,라면,신라면 120g 1봉지,676,남구,3380,사상구,2704,400.0%,676
