<a href="https://colab.research.google.com/github/LDJ5098/machineLearning_class/blob/main/%EC%86%90%EC%8B%A4%ED%95%A8%EC%88%98(%ED%9A%8C%EA%B7%80).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Huber Loss 함수 정의
def huber_loss(y_true, y_pred, delta=1.0):
    error = np.abs(y_true - y_pred)
    loss = np.where(error <= delta, 0.5 * error**2, delta * (error - 0.5 * delta))
    return np.mean(loss)

# 데이터 불러오기
df = pd.read_csv('http://bit.ly/perch_csv_data')
perch_full = df.to_numpy()

perch_weight = np.array([5.9, 32.0, 40.0, 51.5, 70.0, 100.0, 78.0, 80.0, 85.0, 85.0, 110.0,
       115.0, 125.0, 130.0, 120.0, 120.0, 130.0, 135.0, 110.0, 130.0,
       150.0, 145.0, 150.0, 170.0, 225.0, 145.0, 188.0, 180.0, 197.0,
       218.0, 300.0, 260.0, 265.0, 250.0, 250.0, 300.0, 320.0, 514.0,
       556.0, 840.0, 685.0, 700.0, 700.0, 690.0, 900.0, 650.0, 820.0,
       850.0, 900.0, 1015.0, 820.0, 1100.0, 1000.0, 1100.0, 1000.0,
       1000.0])

# 데이터 분할
train_input, test_input, train_target, test_target = train_test_split(perch_full, perch_weight, random_state=42)

# 다항 특성 추가
poly = PolynomialFeatures(degree=2, include_bias=False)
poly.fit(train_input)
train_poly = poly.transform(train_input)
test_poly = poly.transform(test_input)

# 표준화
ss = StandardScaler()
ss.fit(train_poly)
train_scaled = ss.transform(train_poly)
test_scaled = ss.transform(test_poly)

# 회귀 모델 학습
lr = LinearRegression()
lr.fit(train_scaled, train_target)

# 예측
train_pred = lr.predict(train_scaled)
test_pred = lr.predict(test_scaled)

# MSE, MAE, Huber Loss 계산
mse_train = mean_squared_error(train_target, train_pred)
mse_test = mean_squared_error(test_target, test_pred)

mae_train = mean_absolute_error(train_target, train_pred)
mae_test = mean_absolute_error(test_target, test_pred)

huber_train = huber_loss(train_target, train_pred, delta=1.0)  # delta(임계값)는 1.0으로 설정
huber_test = huber_loss(test_target, test_pred, delta=1.0)


# 결과 출력
print(f"Train R^2: {lr.score(train_scaled, train_target)}")
print(f"Test R^2: {lr.score(test_scaled, test_target)}")

print(f"Train MSE: {mse_train}")
print(f"Test MSE: {mse_test}")

print(f"Train MAE: {mae_train}")
print(f"Test MAE: {mae_test}")

print(f"Train Huber Loss: {huber_train}")
print(f"Test Huber Loss: {huber_test}")


Train R^2: 0.9903183436982124
Test R^2: 0.9714559911594131
Train MSE: 1194.7149661275623
Test MSE: 2865.036247770619
Train MAE: 24.145442859243975
Test MAE: 32.33717946825578
Train Huber Loss: 4.809088571848796
Test Huber Loss: 6.44743595877741
