In [47]:
import platform
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

if platform.system() == "Darwin":           # MAC인 경우
    plt.rc('font', family ="AppleGothic")
else:
    plt.rc('font',family='Malgun Gothic')

#멧플로립에서 음수를 표시
plt.rc('axes',unicode_minus=False)

#워닝 메시지
import warnings
warnings.filterwarnings('ignore')

In [35]:
df = pd.read_csv("../해커톤/s_woo.csv", encoding='euc-kr')

X=df[['위안화원_시', '코스피시가', '달러위안화_시', 'WTI', '달러인덱스', '콜금리', '엔원_시']]
y=df['달러원종가']

In [36]:
X['위안화원_시'].mean()


174.98499095840867

In [37]:
X[X.index==3074]

Unnamed: 0,위안화원_시,코스피시가,달러위안화_시,WTI,달러인덱스,콜금리,엔원_시
3074,199.46,2331.330078,7.0873,79.73,127.17,2.55,9.8682


In [38]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipeline_1 = Pipeline([
    ('scaler', MinMaxScaler()),  # Min-Max scaler
    ('regressor', LinearRegression())  # Linear regression model
])

pipeline_1.fit(X_train,y_train)
y_pred=pipeline_1.predict(X_test)

coefficients = pipeline_1['regressor'].coef_
intercept = pipeline_1['regressor'].intercept_
mse= mean_squared_error(y_test,y_pred)
rmse=np.sqrt(mse)

print(f"MSE : {mse:.3f}, RMSE:{rmse:.3f}")
print(f"Variance score:{r2_score(y_test,y_pred):.3f}")
print("Coefficients (Weights):", coefficients)
print("Intercept (Bias):", intercept)

MSE : 50.915, RMSE:7.136
Variance score:0.991
Coefficients (Weights): [263.78029438   4.47231796 226.56583951  -0.83465662   1.69398467
   7.68538659  -2.98140162]
Intercept (Bias): 960.8086423643653


In [39]:
coef=pd.Series(data=np.round(pipeline_1['regressor'].coef_,2), index=X.columns)
coef

위안화원_시     263.78
코스피시가        4.47
달러위안화_시    226.57
WTI         -0.83
달러인덱스        1.69
콜금리          7.69
엔원_시        -2.98
dtype: float64

In [40]:

result = pd.DataFrame(data={'Y':y_test, 'Y_pred(LR)':y_pred,'diff(LR)':np.round(np.abs(y_test-y_pred),2)})
result.sort_values(by=['diff(LR)'], ascending=False)


Unnamed: 0,Y,Y_pred(LR),diff(LR)
102,1235.61,1197.922810,37.69
218,1164.50,1127.224313,37.28
93,1194.25,1162.984105,31.27
410,1107.15,1076.186835,30.96
416,1179.85,1149.767720,30.08
...,...,...,...
2813,1163.87,1163.817212,0.05
279,1125.35,1125.401075,0.05
1102,1024.75,1024.771914,0.02
70,1110.50,1110.477748,0.02


### 의사결정나무 기반 회귀 알고리즘 

In [62]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
# 이전 코드와 동일한 부분 (데이터 로딩 및 모델 리스트 생성)

df_1 = df.copy
X = df_1([['위안화원_시', '코스피시가', '달러위안화_시', 'WTI', '달러인덱스', '콜금리', '엔원_시']])
y=df_1(['달러원종가'])
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [69]:

forest_model = RandomForestRegressor(n_estimators=100, random_state=42)
lgb_model = LGBMRegressor(n_estimators=100, random_state=42)
xgb_model = XGBRegressor(n_estimators=100, random_state=42)

def printRegressorResult(y_test, y_pred):
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    mape = np.mean(np.abs((np.array(y_test) - np.array(y_pred)) / np.array(y_test))) * 100
    print('MSE: {0:.3f}, RMSE: {1:.3f}, R-squared: {2:.3f}, MAE: {3:.3f}, MAPE: {4:.3f}%'.format(mse, rmse, r2, mae, mape))


In [70]:

model_list = [forest_model, lgb_model, xgb_model]
    

for model in model_list:
    model.fit(X_train, y_train)  # 모델 학습
    y_preds = model.predict(X_test)
    print('{0}모델'.format(model.__class__.__name__))
    printRegressorResult(y_test, y_preds)



RandomForestRegressor모델
MSE: 12.631, RMSE: 3.554, R-squared: 0.974, MAE: 1.374, MAPE: 1.916%


ValueError: y should be a 1d array, got an array of shape (2654, 8) instead.