In [1]:
import pandas as pd 

In [2]:
rename_dict = {
    "일시": "날짜",
    "평균기온(°C)": "평균 기온",
    "최저기온(°C)": "최저 기온",
    "최고기온(°C)": "최고 기온",
    "일강수량(mm)": "강수량",
    "평균 풍속(m/s)": "풍속",
    "평균 상대습도(%)": "습도",
    "일 최심적설(cm)": "적설량"
}

In [3]:
df = pd.read_csv("./weather_csv/weather_raw_data.csv", encoding="cp949").rename(columns=rename_dict)

In [None]:
# 날짜 및 수치형 처리
df["날짜"] = pd.to_datetime(df["날짜"], errors="coerce")
for col in ["평균 기온", "최저 기온", "최고 기온", "강수량", "풍속", "습도", "적설량"]:
    df[col] = pd.to_numeric(df[col], errors="coerce")

In [5]:
# 결측치 처리
df["강수량"] = df['강수량'].fillna(0)
df['적설량'] = df['적설량'].fillna(0)

In [6]:
df.head()

Unnamed: 0,지점,지점명,날짜,평균 기온,최저 기온,최고 기온,강수량,풍속,습도,적설량
0,90,속초,2020-01-01,0.8,-3.4,4.8,0.0,2.4,36.1,0.0
1,90,속초,2020-01-02,2.8,-2.1,6.6,0.0,2.3,38.6,0.0
2,90,속초,2020-01-03,4.7,0.8,8.5,0.0,2.2,28.9,0.0
3,90,속초,2020-01-04,4.0,1.1,8.1,0.0,2.2,30.1,0.0
4,90,속초,2020-01-05,3.1,-0.6,8.8,0.0,1.4,43.3,0.0


In [8]:
region_map = {
    # 강원도
    "강릉": "강원도", "대관령": "강원도", "동해": "강원도", "북강릉": "강원도",
    "북춘천": "강원도", "삼척": "강원도", "속초": "강원도", "영월": "강원도",
    "원주": "강원도", "인제": "강원도", "정선군": "강원도", "철원": "강원도",
    "춘천": "강원도", "태백": "강원도", "홍천": "강원도",

    # 경기
    "동두천": "경기", "수원": "경기", "양평": "경기", "이천": "경기", "파주": "경기",

    # 경남
    "거제": "경남", "거창": "경남", "김해시": "경남", "남해": "경남", "밀양": "경남",
    "북창원": "경남", "산청": "경남", "양산시": "경남", "의령군": "경남", "진주": "경남",
    "창원": "경남", "통영": "경남", "함양군": "경남", "합천": "경남",

    # 경북
    "경주시": "경북", "구미": "경북", "문경": "경북", "봉화": "경북", "상주": "경북",
    "안동": "경북", "영덕": "경북", "영주": "경북", "영천": "경북", "울릉도": "경북",
    "울진": "경북", "의성": "경북", "청송군": "경북", "포항": "경북",

    # 광역시 및 특별시
    "광주": "광주", "대구": "대구", "대전": "대전",
    "부산": "부산", "북부산": "부산", "서울": "서울",
    "세종": "세종", "울산": "울산", "강화": "인천", "백령도": "인천", "인천": "인천",

    # 전남
    "강진군": "전남", "고흥": "전남", "광양시": "전남", "목포": "전남", "무안": "전남",
    "보성군": "전남", "순천": "전남", "여수": "전남", "영광군": "전남", "완도": "전남",
    "장흥": "전남", "주암": "전남", "진도군": "전남", "해남": "전남", "흑산도": "전남",

    # 전북
    "고창": "전북", "고창군": "전북", "군산": "전북", "남원": "전북", "부안": "전북",
    "순창군": "전북", "임실": "전북", "장수": "전북", "전주": "전북", "정읍": "전북",

    # 제주
    "고산": "제주도", "서귀포": "제주도", "성산": "제주도", "제주": "제주도",

    # 충남
    "금산": "충남", "보령": "충남", "부여": "충남", "서산": "충남", "천안": "충남", "홍성": "충남",

    # 충북
    "보은": "충북", "서청주": "충북", "제천": "충북", "청주": "충북", "추풍령": "충북", "충주": "충북"
}


In [9]:
df["지역"] = df["지점명"].map(region_map)

In [10]:
df.head()

Unnamed: 0,지점,지점명,날짜,평균 기온,최저 기온,최고 기온,강수량,풍속,습도,적설량,지역
0,90,속초,2020-01-01,0.8,-3.4,4.8,0.0,2.4,36.1,0.0,강원도
1,90,속초,2020-01-02,2.8,-2.1,6.6,0.0,2.3,38.6,0.0,강원도
2,90,속초,2020-01-03,4.7,0.8,8.5,0.0,2.2,28.9,0.0,강원도
3,90,속초,2020-01-04,4.0,1.1,8.1,0.0,2.2,30.1,0.0,강원도
4,90,속초,2020-01-05,3.1,-0.6,8.8,0.0,1.4,43.3,0.0,강원도


In [11]:
# 지점, 지점명 제거 및 지역 맨앞으로
df = df.drop(columns=['지점', '지점명'])
cols = ['지역'] + [col for col in df.columns if col != '지역']
df = df[cols]

In [12]:
df.head()

Unnamed: 0,지역,날짜,평균 기온,최저 기온,최고 기온,강수량,풍속,습도,적설량
0,강원도,2020-01-01,0.8,-3.4,4.8,0.0,2.4,36.1,0.0
1,강원도,2020-01-02,2.8,-2.1,6.6,0.0,2.3,38.6,0.0
2,강원도,2020-01-03,4.7,0.8,8.5,0.0,2.2,28.9,0.0
3,강원도,2020-01-04,4.0,1.1,8.1,0.0,2.2,30.1,0.0
4,강원도,2020-01-05,3.1,-0.6,8.8,0.0,1.4,43.3,0.0


In [13]:
df.tail()

Unnamed: 0,지역,날짜,평균 기온,최저 기온,최고 기온,강수량,풍속,습도,적설량
174831,부산,2024-12-27,1.9,-2.5,6.4,0.0,1.8,41.3,0.0
174832,부산,2024-12-28,1.3,-2.0,5.4,0.0,1.8,47.1,0.0
174833,부산,2024-12-29,1.6,-4.8,9.6,0.0,1.2,58.3,0.0
174834,부산,2024-12-30,6.0,-1.6,12.6,0.0,1.3,61.1,0.0
174835,부산,2024-12-31,5.2,-3.3,9.9,0.0,1.9,33.4,0.0
