In [2]:
import pandas as pd

# 파일 경로
train_path = "/Users/eunzinri/Downloads/next-day-air-temperature-forecast-challenge/train_dataset.csv"
station_path = "/Users/eunzinri/Downloads/next-day-air-temperature-forecast-challenge/station_info.csv"

# CSV 파일 불러오기
train_df = pd.read_csv(train_path)
station_df = pd.read_csv(station_path)

# 데이터프레임 크기와 간단한 정보 출력
train_df.info(), station_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13132 entries, 0 to 13131
Columns: 342 entries, id to target
dtypes: float64(338), int64(2), object(2)
memory usage: 34.3+ MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   지점                9 non-null      int64  
 1   시작일               9 non-null      object 
 2   종료일               1 non-null      object 
 3   지점명               9 non-null      object 
 4   위도                9 non-null      float64
 5   경도                9 non-null      float64
 6   노장해발고도(m)         9 non-null      float64
 7   기압계(관측장비지상높이(m))  9 non-null      float64
 8   기온계(관측장비지상높이(m))  9 non-null      float64
 9   풍속계(관측장비지상높이(m))  9 non-null      float64
 10  강우계(관측장비지상높이(m))  9 non-null      float64
dtypes: float64(7), int64(1), object(3)
memory usage: 924.0+ bytes


(None, None)

In [4]:
import numpy as np

def preprocess_data(train_df, station_df):
    df = train_df.copy()
    
    # 1. -9999를 NaN으로 변환
    df.replace(-9999, np.nan, inplace=True)

    # 2. 날짜 분리 (month, day)
    df[['month', 'day']] = df['date'].str.split('-', expand=True).astype(int)
    df.drop(columns=['date'], inplace=True)

    # 3. 결측치 처리
    # - 자연적인 결측치로 보이는 건 0으로
    for col in df.columns:
        if 'sunshine_duration' in col or 'snow_depth' in col:
            df[col].fillna(0, inplace=True)
    
    # - 나머지는 평균값으로 (또는 향후 보간 가능)
    df.fillna(df.mean(numeric_only=True), inplace=True)

    # 4. 관측소 정보 병합
    station_df_renamed = station_df.rename(columns={
        '지점': 'station',
        '위도': 'latitude',
        '경도': 'longitude',
        '노장해발고도(m)': 'altitude'
    })[['station', 'latitude', 'longitude', 'altitude']]

    df = df.merge(station_df_renamed, on='station', how='left')

    # 5. 범주형 처리
    df = pd.get_dummies(df, columns=['station_name'], drop_first=True)

    # 6. 불필요 컬럼 제거
    df.drop(columns=['id'], inplace=True)

    return df

# 전처리 적용
processed_df = preprocess_data(train_df, station_df)
processed_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13132 entries, 0 to 13131
Columns: 349 entries, station to station_name_인천
dtypes: bool(5), float64(341), int64(3)
memory usage: 34.5 MB


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(0, inplace=True)


In [26]:
processed_df.head

<bound method NDFrame.head of        station  cloud_cover_0  cloud_cover_1  cloud_cover_10  cloud_cover_11  \
0           98            0.0            0.0             9.0             0.0   
1           98            0.0            0.0             0.0             0.0   
2           98            0.0            0.0             0.0             0.0   
3           98            0.0            0.0             2.0             0.0   
4           98            0.0            0.0             0.0             0.0   
...        ...            ...            ...             ...             ...   
13127      203            2.0            4.0             0.0             0.0   
13128      203            0.0            0.0             0.0             0.0   
13129      203            4.0            1.0             0.0             0.0   
13130      203            9.0            9.0             0.0             0.0   
13131      203            6.0            3.0             1.0             2.0   

       cl

In [36]:
processed_df.dtypes

station               int64
cloud_cover_0       float64
cloud_cover_1       float64
cloud_cover_10      float64
cloud_cover_11      float64
                     ...   
station_name_동두천       bool
station_name_서울        bool
station_name_양평        bool
station_name_이천        bool
station_name_인천        bool
Length: 349, dtype: object

In [40]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-3.0.0-py3-none-macosx_12_0_arm64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.0-py3-none-macosx_12_0_arm64.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m31.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xgboost
Successfully installed xgboost-3.0.0


In [42]:
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np

# 전처리된 데이터 불러온 후 float32로 변환
df = processed_df

X = df.drop(columns=['target'])
y = df['target']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

model = XGBRegressor(
    n_estimators=200,
    learning_rate=0.05,
    max_depth=6,
    random_state=42,
    tree_method='hist'
)

model.fit(X_train, y_train)
y_pred = model.predict(X_val)

rmse = mean_squared_error(y_val, y_pred, squared=False)
print(f'✅ 검증 RMSE: {rmse:.4f} °C')

✅ 검증 RMSE: 1.5064 °C




In [52]:
model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.03,
    max_depth=8,
    random_state=42,
    tree_method='exact'
)

model.fit(X_train, y_train)
y_pred = model.predict(X_val)

rmse = mean_squared_error(y_val, y_pred, squared=False)
print(f'✅ 검증 RMSE: {rmse:.4f} °C')

✅ 검증 RMSE: 1.4610 °C




In [56]:
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import numpy as np
from xgboost import XGBRegressor

def run_kfold_xgb(X, y, n_splits=10):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    rmses = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(X)):
        print(f"🌱 Fold {fold + 1}")
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

        model = XGBRegressor(
            n_estimators=300,
            learning_rate=0.03,
            max_depth=8,
            random_state=42,
            tree_method="hist"
        )
        model.fit(X_train, y_train)
        preds = model.predict(X_val)
        rmse = mean_squared_error(y_val, preds, squared=False)
        print(f"  📏 RMSE: {rmse:.4f}")
        rmses.append(rmse)

    print(f"\n✅ 평균 RMSE: {np.mean(rmses):.4f} ± {np.std(rmses):.4f}")
    return rmses

run_kfold_xgb(X, y)


🌱 Fold 1




  📏 RMSE: 1.4307
🌱 Fold 2




  📏 RMSE: 1.4608
🌱 Fold 3




  📏 RMSE: 1.4782
🌱 Fold 4




  📏 RMSE: 1.4307
🌱 Fold 5




  📏 RMSE: 1.4427
🌱 Fold 6




  📏 RMSE: 1.4287
🌱 Fold 7




  📏 RMSE: 1.4384
🌱 Fold 8




  📏 RMSE: 1.4281
🌱 Fold 9




  📏 RMSE: 1.3588
🌱 Fold 10
  📏 RMSE: 1.4092

✅ 평균 RMSE: 1.4306 ± 0.0300




[1.4306703259824431,
 1.4607670595603344,
 1.4782299193393587,
 1.430660599042766,
 1.4427151492529406,
 1.4286525903657676,
 1.4383742295955755,
 1.4280609553825783,
 1.358849479984743,
 1.4092150595239676]

In [58]:
final_model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.03,
    max_depth=8,
    random_state=42,
    tree_method="hist"
)

# 💥 전부 다 학습에 씀
final_model.fit(X, y)

In [76]:
test_path = "/Users/eunzinri/Downloads/next-day-air-temperature-forecast-challenge/test_dataset.csv"
test_df = pd.read_csv(test_path)

In [78]:
processed_df = preprocess_data(test_df, station_df)
processed_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6570 entries, 0 to 6569
Columns: 344 entries, station to station_name_파주
dtypes: bool(1), float64(340), int64(3)
memory usage: 17.2 MB


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(0, inplace=True)


In [80]:
processed_df.head()

Unnamed: 0,station,cloud_cover_0,cloud_cover_1,cloud_cover_10,cloud_cover_11,cloud_cover_12,cloud_cover_13,cloud_cover_14,cloud_cover_15,cloud_cover_16,...,wind_speed_7,wind_speed_8,wind_speed_9,climatology_temp,month,day,latitude,longitude,altitude,station_name_파주
0,99,0.0,0.0,0.0,1.0,3.0,6.0,9.0,10.0,9.0,...,0.9,1.0,2.0,23.964286,6,26,37.8859,126.7665,30.59,True
1,119,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.1,0.1,0.3,-2.776786,1,11,37.2575,126.983,39.81,False
2,119,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.1,0.1,0.3,-2.776786,1,11,37.2723,126.9853,34.84,False
3,119,5.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.7,1.4,1.6,2.1,2,23,37.2575,126.983,39.81,False
4,119,5.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.7,1.4,1.6,2.1,2,23,37.2723,126.9853,34.84,False


In [74]:
prediction = final_model.predict(df)

ValueError: feature_names mismatch: ['station', 'cloud_cover_0', 'cloud_cover_1', 'cloud_cover_10', 'cloud_cover_11', 'cloud_cover_12', 'cloud_cover_13', 'cloud_cover_14', 'cloud_cover_15', 'cloud_cover_16', 'cloud_cover_17', 'cloud_cover_18', 'cloud_cover_19', 'cloud_cover_2', 'cloud_cover_20', 'cloud_cover_21', 'cloud_cover_22', 'cloud_cover_23', 'cloud_cover_3', 'cloud_cover_4', 'cloud_cover_5', 'cloud_cover_6', 'cloud_cover_7', 'cloud_cover_8', 'cloud_cover_9', 'dew_point_0', 'dew_point_1', 'dew_point_10', 'dew_point_11', 'dew_point_12', 'dew_point_13', 'dew_point_14', 'dew_point_15', 'dew_point_16', 'dew_point_17', 'dew_point_18', 'dew_point_19', 'dew_point_2', 'dew_point_20', 'dew_point_21', 'dew_point_22', 'dew_point_23', 'dew_point_3', 'dew_point_4', 'dew_point_5', 'dew_point_6', 'dew_point_7', 'dew_point_8', 'dew_point_9', 'humidity_0', 'humidity_1', 'humidity_10', 'humidity_11', 'humidity_12', 'humidity_13', 'humidity_14', 'humidity_15', 'humidity_16', 'humidity_17', 'humidity_18', 'humidity_19', 'humidity_2', 'humidity_20', 'humidity_21', 'humidity_22', 'humidity_23', 'humidity_3', 'humidity_4', 'humidity_5', 'humidity_6', 'humidity_7', 'humidity_8', 'humidity_9', 'local_pressure_0', 'local_pressure_1', 'local_pressure_10', 'local_pressure_11', 'local_pressure_12', 'local_pressure_13', 'local_pressure_14', 'local_pressure_15', 'local_pressure_16', 'local_pressure_17', 'local_pressure_18', 'local_pressure_19', 'local_pressure_2', 'local_pressure_20', 'local_pressure_21', 'local_pressure_22', 'local_pressure_23', 'local_pressure_3', 'local_pressure_4', 'local_pressure_5', 'local_pressure_6', 'local_pressure_7', 'local_pressure_8', 'local_pressure_9', 'min_cloud_height_0', 'min_cloud_height_1', 'min_cloud_height_10', 'min_cloud_height_11', 'min_cloud_height_12', 'min_cloud_height_13', 'min_cloud_height_14', 'min_cloud_height_15', 'min_cloud_height_16', 'min_cloud_height_17', 'min_cloud_height_18', 'min_cloud_height_19', 'min_cloud_height_2', 'min_cloud_height_20', 'min_cloud_height_21', 'min_cloud_height_22', 'min_cloud_height_23', 'min_cloud_height_3', 'min_cloud_height_4', 'min_cloud_height_5', 'min_cloud_height_6', 'min_cloud_height_7', 'min_cloud_height_8', 'min_cloud_height_9', 'precipitation_0', 'precipitation_1', 'precipitation_10', 'precipitation_11', 'precipitation_12', 'precipitation_13', 'precipitation_14', 'precipitation_15', 'precipitation_16', 'precipitation_17', 'precipitation_18', 'precipitation_19', 'precipitation_2', 'precipitation_20', 'precipitation_21', 'precipitation_22', 'precipitation_23', 'precipitation_3', 'precipitation_4', 'precipitation_5', 'precipitation_6', 'precipitation_7', 'precipitation_8', 'precipitation_9', 'sea_level_pressure_0', 'sea_level_pressure_1', 'sea_level_pressure_10', 'sea_level_pressure_11', 'sea_level_pressure_12', 'sea_level_pressure_13', 'sea_level_pressure_14', 'sea_level_pressure_15', 'sea_level_pressure_16', 'sea_level_pressure_17', 'sea_level_pressure_18', 'sea_level_pressure_19', 'sea_level_pressure_2', 'sea_level_pressure_20', 'sea_level_pressure_21', 'sea_level_pressure_22', 'sea_level_pressure_23', 'sea_level_pressure_3', 'sea_level_pressure_4', 'sea_level_pressure_5', 'sea_level_pressure_6', 'sea_level_pressure_7', 'sea_level_pressure_8', 'sea_level_pressure_9', 'snow_depth_0', 'snow_depth_1', 'snow_depth_10', 'snow_depth_11', 'snow_depth_12', 'snow_depth_13', 'snow_depth_14', 'snow_depth_15', 'snow_depth_16', 'snow_depth_17', 'snow_depth_18', 'snow_depth_19', 'snow_depth_2', 'snow_depth_20', 'snow_depth_21', 'snow_depth_22', 'snow_depth_23', 'snow_depth_3', 'snow_depth_4', 'snow_depth_5', 'snow_depth_6', 'snow_depth_7', 'snow_depth_8', 'snow_depth_9', 'sunshine_duration_0', 'sunshine_duration_1', 'sunshine_duration_10', 'sunshine_duration_11', 'sunshine_duration_12', 'sunshine_duration_13', 'sunshine_duration_14', 'sunshine_duration_15', 'sunshine_duration_16', 'sunshine_duration_17', 'sunshine_duration_18', 'sunshine_duration_19', 'sunshine_duration_2', 'sunshine_duration_20', 'sunshine_duration_21', 'sunshine_duration_22', 'sunshine_duration_23', 'sunshine_duration_3', 'sunshine_duration_4', 'sunshine_duration_5', 'sunshine_duration_6', 'sunshine_duration_7', 'sunshine_duration_8', 'sunshine_duration_9', 'surface_temp_0', 'surface_temp_1', 'surface_temp_10', 'surface_temp_11', 'surface_temp_12', 'surface_temp_13', 'surface_temp_14', 'surface_temp_15', 'surface_temp_16', 'surface_temp_17', 'surface_temp_18', 'surface_temp_19', 'surface_temp_2', 'surface_temp_20', 'surface_temp_21', 'surface_temp_22', 'surface_temp_23', 'surface_temp_3', 'surface_temp_4', 'surface_temp_5', 'surface_temp_6', 'surface_temp_7', 'surface_temp_8', 'surface_temp_9', 'vapor_pressure_0', 'vapor_pressure_1', 'vapor_pressure_10', 'vapor_pressure_11', 'vapor_pressure_12', 'vapor_pressure_13', 'vapor_pressure_14', 'vapor_pressure_15', 'vapor_pressure_16', 'vapor_pressure_17', 'vapor_pressure_18', 'vapor_pressure_19', 'vapor_pressure_2', 'vapor_pressure_20', 'vapor_pressure_21', 'vapor_pressure_22', 'vapor_pressure_23', 'vapor_pressure_3', 'vapor_pressure_4', 'vapor_pressure_5', 'vapor_pressure_6', 'vapor_pressure_7', 'vapor_pressure_8', 'vapor_pressure_9', 'visibility_0', 'visibility_1', 'visibility_10', 'visibility_11', 'visibility_12', 'visibility_13', 'visibility_14', 'visibility_15', 'visibility_16', 'visibility_17', 'visibility_18', 'visibility_19', 'visibility_2', 'visibility_20', 'visibility_21', 'visibility_22', 'visibility_23', 'visibility_3', 'visibility_4', 'visibility_5', 'visibility_6', 'visibility_7', 'visibility_8', 'visibility_9', 'wind_direction_0', 'wind_direction_1', 'wind_direction_10', 'wind_direction_11', 'wind_direction_12', 'wind_direction_13', 'wind_direction_14', 'wind_direction_15', 'wind_direction_16', 'wind_direction_17', 'wind_direction_18', 'wind_direction_19', 'wind_direction_2', 'wind_direction_20', 'wind_direction_21', 'wind_direction_22', 'wind_direction_23', 'wind_direction_3', 'wind_direction_4', 'wind_direction_5', 'wind_direction_6', 'wind_direction_7', 'wind_direction_8', 'wind_direction_9', 'wind_speed_0', 'wind_speed_1', 'wind_speed_10', 'wind_speed_11', 'wind_speed_12', 'wind_speed_13', 'wind_speed_14', 'wind_speed_15', 'wind_speed_16', 'wind_speed_17', 'wind_speed_18', 'wind_speed_19', 'wind_speed_2', 'wind_speed_20', 'wind_speed_21', 'wind_speed_22', 'wind_speed_23', 'wind_speed_3', 'wind_speed_4', 'wind_speed_5', 'wind_speed_6', 'wind_speed_7', 'wind_speed_8', 'wind_speed_9', 'climatology_temp', 'month', 'day', 'latitude', 'longitude', 'altitude', 'station_name_동두천', 'station_name_서울', 'station_name_양평', 'station_name_이천', 'station_name_인천'] ['station', 'cloud_cover_0', 'cloud_cover_1', 'cloud_cover_10', 'cloud_cover_11', 'cloud_cover_12', 'cloud_cover_13', 'cloud_cover_14', 'cloud_cover_15', 'cloud_cover_16', 'cloud_cover_17', 'cloud_cover_18', 'cloud_cover_19', 'cloud_cover_2', 'cloud_cover_20', 'cloud_cover_21', 'cloud_cover_22', 'cloud_cover_23', 'cloud_cover_3', 'cloud_cover_4', 'cloud_cover_5', 'cloud_cover_6', 'cloud_cover_7', 'cloud_cover_8', 'cloud_cover_9', 'dew_point_0', 'dew_point_1', 'dew_point_10', 'dew_point_11', 'dew_point_12', 'dew_point_13', 'dew_point_14', 'dew_point_15', 'dew_point_16', 'dew_point_17', 'dew_point_18', 'dew_point_19', 'dew_point_2', 'dew_point_20', 'dew_point_21', 'dew_point_22', 'dew_point_23', 'dew_point_3', 'dew_point_4', 'dew_point_5', 'dew_point_6', 'dew_point_7', 'dew_point_8', 'dew_point_9', 'humidity_0', 'humidity_1', 'humidity_10', 'humidity_11', 'humidity_12', 'humidity_13', 'humidity_14', 'humidity_15', 'humidity_16', 'humidity_17', 'humidity_18', 'humidity_19', 'humidity_2', 'humidity_20', 'humidity_21', 'humidity_22', 'humidity_23', 'humidity_3', 'humidity_4', 'humidity_5', 'humidity_6', 'humidity_7', 'humidity_8', 'humidity_9', 'local_pressure_0', 'local_pressure_1', 'local_pressure_10', 'local_pressure_11', 'local_pressure_12', 'local_pressure_13', 'local_pressure_14', 'local_pressure_15', 'local_pressure_16', 'local_pressure_17', 'local_pressure_18', 'local_pressure_19', 'local_pressure_2', 'local_pressure_20', 'local_pressure_21', 'local_pressure_22', 'local_pressure_23', 'local_pressure_3', 'local_pressure_4', 'local_pressure_5', 'local_pressure_6', 'local_pressure_7', 'local_pressure_8', 'local_pressure_9', 'min_cloud_height_0', 'min_cloud_height_1', 'min_cloud_height_10', 'min_cloud_height_11', 'min_cloud_height_12', 'min_cloud_height_13', 'min_cloud_height_14', 'min_cloud_height_15', 'min_cloud_height_16', 'min_cloud_height_17', 'min_cloud_height_18', 'min_cloud_height_19', 'min_cloud_height_2', 'min_cloud_height_20', 'min_cloud_height_21', 'min_cloud_height_22', 'min_cloud_height_23', 'min_cloud_height_3', 'min_cloud_height_4', 'min_cloud_height_5', 'min_cloud_height_6', 'min_cloud_height_7', 'min_cloud_height_8', 'min_cloud_height_9', 'precipitation_0', 'precipitation_1', 'precipitation_10', 'precipitation_11', 'precipitation_12', 'precipitation_13', 'precipitation_14', 'precipitation_15', 'precipitation_16', 'precipitation_17', 'precipitation_18', 'precipitation_19', 'precipitation_2', 'precipitation_20', 'precipitation_21', 'precipitation_22', 'precipitation_23', 'precipitation_3', 'precipitation_4', 'precipitation_5', 'precipitation_6', 'precipitation_7', 'precipitation_8', 'precipitation_9', 'sea_level_pressure_0', 'sea_level_pressure_1', 'sea_level_pressure_10', 'sea_level_pressure_11', 'sea_level_pressure_12', 'sea_level_pressure_13', 'sea_level_pressure_14', 'sea_level_pressure_15', 'sea_level_pressure_16', 'sea_level_pressure_17', 'sea_level_pressure_18', 'sea_level_pressure_19', 'sea_level_pressure_2', 'sea_level_pressure_20', 'sea_level_pressure_21', 'sea_level_pressure_22', 'sea_level_pressure_23', 'sea_level_pressure_3', 'sea_level_pressure_4', 'sea_level_pressure_5', 'sea_level_pressure_6', 'sea_level_pressure_7', 'sea_level_pressure_8', 'sea_level_pressure_9', 'snow_depth_0', 'snow_depth_1', 'snow_depth_10', 'snow_depth_11', 'snow_depth_12', 'snow_depth_13', 'snow_depth_14', 'snow_depth_15', 'snow_depth_16', 'snow_depth_17', 'snow_depth_18', 'snow_depth_19', 'snow_depth_2', 'snow_depth_20', 'snow_depth_21', 'snow_depth_22', 'snow_depth_23', 'snow_depth_3', 'snow_depth_4', 'snow_depth_5', 'snow_depth_6', 'snow_depth_7', 'snow_depth_8', 'snow_depth_9', 'sunshine_duration_0', 'sunshine_duration_1', 'sunshine_duration_10', 'sunshine_duration_11', 'sunshine_duration_12', 'sunshine_duration_13', 'sunshine_duration_14', 'sunshine_duration_15', 'sunshine_duration_16', 'sunshine_duration_17', 'sunshine_duration_18', 'sunshine_duration_19', 'sunshine_duration_2', 'sunshine_duration_20', 'sunshine_duration_21', 'sunshine_duration_22', 'sunshine_duration_23', 'sunshine_duration_3', 'sunshine_duration_4', 'sunshine_duration_5', 'sunshine_duration_6', 'sunshine_duration_7', 'sunshine_duration_8', 'sunshine_duration_9', 'surface_temp_0', 'surface_temp_1', 'surface_temp_10', 'surface_temp_11', 'surface_temp_12', 'surface_temp_13', 'surface_temp_14', 'surface_temp_15', 'surface_temp_16', 'surface_temp_17', 'surface_temp_18', 'surface_temp_19', 'surface_temp_2', 'surface_temp_20', 'surface_temp_21', 'surface_temp_22', 'surface_temp_23', 'surface_temp_3', 'surface_temp_4', 'surface_temp_5', 'surface_temp_6', 'surface_temp_7', 'surface_temp_8', 'surface_temp_9', 'vapor_pressure_0', 'vapor_pressure_1', 'vapor_pressure_10', 'vapor_pressure_11', 'vapor_pressure_12', 'vapor_pressure_13', 'vapor_pressure_14', 'vapor_pressure_15', 'vapor_pressure_16', 'vapor_pressure_17', 'vapor_pressure_18', 'vapor_pressure_19', 'vapor_pressure_2', 'vapor_pressure_20', 'vapor_pressure_21', 'vapor_pressure_22', 'vapor_pressure_23', 'vapor_pressure_3', 'vapor_pressure_4', 'vapor_pressure_5', 'vapor_pressure_6', 'vapor_pressure_7', 'vapor_pressure_8', 'vapor_pressure_9', 'visibility_0', 'visibility_1', 'visibility_10', 'visibility_11', 'visibility_12', 'visibility_13', 'visibility_14', 'visibility_15', 'visibility_16', 'visibility_17', 'visibility_18', 'visibility_19', 'visibility_2', 'visibility_20', 'visibility_21', 'visibility_22', 'visibility_23', 'visibility_3', 'visibility_4', 'visibility_5', 'visibility_6', 'visibility_7', 'visibility_8', 'visibility_9', 'wind_direction_0', 'wind_direction_1', 'wind_direction_10', 'wind_direction_11', 'wind_direction_12', 'wind_direction_13', 'wind_direction_14', 'wind_direction_15', 'wind_direction_16', 'wind_direction_17', 'wind_direction_18', 'wind_direction_19', 'wind_direction_2', 'wind_direction_20', 'wind_direction_21', 'wind_direction_22', 'wind_direction_23', 'wind_direction_3', 'wind_direction_4', 'wind_direction_5', 'wind_direction_6', 'wind_direction_7', 'wind_direction_8', 'wind_direction_9', 'wind_speed_0', 'wind_speed_1', 'wind_speed_10', 'wind_speed_11', 'wind_speed_12', 'wind_speed_13', 'wind_speed_14', 'wind_speed_15', 'wind_speed_16', 'wind_speed_17', 'wind_speed_18', 'wind_speed_19', 'wind_speed_2', 'wind_speed_20', 'wind_speed_21', 'wind_speed_22', 'wind_speed_23', 'wind_speed_3', 'wind_speed_4', 'wind_speed_5', 'wind_speed_6', 'wind_speed_7', 'wind_speed_8', 'wind_speed_9', 'climatology_temp', 'month', 'day', 'latitude', 'longitude', 'altitude', 'station_name_파주']
expected station_name_인천, station_name_동두천, station_name_양평, station_name_서울, station_name_이천 in input data
training data did not have the following fields: station_name_파주