In [2]:
# 모듈 로딩
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib as plt
import seaborn as sns
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV

In [3]:
# 데이터 로딩
DATA_PATH = './clean_data'
dataDF = pd.read_csv(DATA_PATH)

### train_test 나누기

In [4]:
dataDF

Unnamed: 0,c_temp_pv,k_rpm_pv,n_temp_pv,scale_pv,s_temp_pv
0,69.6,189.0,67.2,3.01,67.1
1,69.8,189.0,67.2,3.01,67.0
2,69.7,189.0,67.9,3.08,65.9
3,69.7,189.0,67.8,3.08,65.9
4,69.7,189.0,67.8,3.08,65.9
...,...,...,...,...,...
26927,70.3,185.0,66.7,3.01,67.7
26928,70.4,185.0,66.8,3.03,67.3
26929,70.4,185.0,66.7,3.03,67.2
26930,70.4,185.0,66.7,3.06,66.9


In [5]:
target = dataDF['scale_pv']
feature = dataDF.drop(columns='scale_pv')

In [6]:
target

0        3.01
1        3.01
2        3.08
3        3.08
4        3.08
         ... 
26927    3.01
26928    3.03
26929    3.03
26930    3.06
26931    3.05
Name: scale_pv, Length: 26932, dtype: float64

In [7]:
feature

Unnamed: 0,c_temp_pv,k_rpm_pv,n_temp_pv,s_temp_pv
0,69.6,189.0,67.2,67.1
1,69.8,189.0,67.2,67.0
2,69.7,189.0,67.9,65.9
3,69.7,189.0,67.8,65.9
4,69.7,189.0,67.8,65.9
...,...,...,...,...
26927,70.3,185.0,66.7,67.7
26928,70.4,185.0,66.8,67.3
26929,70.4,185.0,66.7,67.2
26930,70.4,185.0,66.7,66.9


In [8]:
X_train, X_test, y_train, y_test = train_test_split(feature,target, test_size= 0.2, random_state=77)

### feature 데이터 스케일링
- Standard, MinMax, Robust

In [29]:
### 스케일링 해주는 함수
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error

def run_model(X_train, X_test, scaler_type='standard'):

    if scaler_type == 'standard':
        scaler = StandardScaler()
    elif scaler_type == 'minmax':
        scaler = MinMaxScaler()
    elif scaler_type == 'robust':
        scaler = RobustScaler()
    else:
        raise ValueError("Invalid scaler_type. Choose from 'standard', 'minmax', or 'robust'.")

    # 학습 데이터를 기준으로 fitting
    X_train_scaled = scaler.fit_transform(X_train)
    
    # X_test는 fit된 스케일러를 사용하여 변환
    X_test_scaled = scaler.transform(X_test)
    
    xgb = XGBRegressor()

    xgb.fit(X_train_scaled, y_train)
    y_pred = xgb.predict(X_test_scaled)
    MAE = mean_absolute_error(y_test,y_pred)
    MAPE = mean_absolute_percentage_error(y_test,y_pred)
    Score = xgb.score(X_test_scaled,y_test)
    print('*'*100)
    print(f'{scaler_type}')
    print(f'MAE => {MAE}     MAPE => {MAPE*100}    R2 => {Score}')
    print('*'*100)
    

    # return X_train_scaled, X_test_scaled


In [30]:
run_model(X_train, X_test, scaler_type='standard')
run_model(X_train, X_test, scaler_type='minmax')
run_model(X_train, X_test, scaler_type='robust')

****************************************************************************************************
standard
MAE => 0.019765094157293814     MAPE => 0.6513736110969696    R2 => 0.2626733762119068
****************************************************************************************************
****************************************************************************************************
minmax
MAE => 0.01976039852838425     MAPE => 0.6512170578077816    R2 => 0.2628043722806831
****************************************************************************************************
****************************************************************************************************
robust
MAE => 0.019761168975787585     MAPE => 0.6512452561036146    R2 => 0.2627617232631735
****************************************************************************************************
