In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# -----------------------------
# 1) 데이터 불러오기
# -----------------------------
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/boston.csv")   # 업로드한 파일 경로
print(df.head())

   Unnamed: 0  MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  \
0           0  8.3252      41.0  6.984127   1.023810       322.0  2.555556   
1           1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842   
2           2  7.2574      52.0  8.288136   1.073446       496.0  2.802260   
3           3  5.6431      52.0  5.817352   1.073059       558.0  2.547945   
4           4  3.8462      52.0  6.281853   1.081081       565.0  2.181467   

   Latitude  Longitude  PRICE  
0     37.88    -122.23  4.526  
1     37.86    -122.22  3.585  
2     37.85    -122.24  3.521  
3     37.85    -122.25  3.413  
4     37.85    -122.25  3.422  


In [2]:
X = df.drop(columns=["PRICE"])   # 입력 변수들
y = df["PRICE"]                  # 타깃: 주택가격

# 학습/테스트 분할
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [3]:
lr = LinearRegression()
dt = DecisionTreeRegressor(random_state=42)
rf = RandomForestRegressor(n_estimators=200, random_state=42)

# -----------------------------
# 3) 모델 학습
# -----------------------------
lr.fit(X_train, y_train)
dt.fit(X_train, y_train)
rf.fit(X_train, y_train)

# -----------------------------
# 4) 모델 예측
# -----------------------------
lr_pred = lr.predict(X_test)
dt_pred = dt.predict(X_test)
rf_pred = rf.predict(X_test)

In [5]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

def evaluate(y_true, y_pred):
    try:
        rmse = mean_squared_error(y_true, y_pred, squared=False)  # 신버전
    except TypeError:
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))        # 구버전 대안
    r2 = r2_score(y_true, y_pred)
    return rmse, r2


lr_rmse, lr_r2 = evaluate(y_test, lr_pred)
dt_rmse, dt_r2 = evaluate(y_test, dt_pred)
rf_rmse, rf_r2 = evaluate(y_test, rf_pred)

print("=== Boston Housing Regression Results ===")
print(f"Linear Regression : RMSE={lr_rmse:.4f}, R2={lr_r2:.4f}")
print(f"Decision Tree     : RMSE={dt_rmse:.4f}, R2={dt_r2:.4f}")
print(f"Random Forest     : RMSE={rf_rmse:.4f}, R2={rf_r2:.4f}")

=== Boston Housing Regression Results ===
Linear Regression : RMSE=0.7448, R2=0.5767
Decision Tree     : RMSE=0.6941, R2=0.6323
Random Forest     : RMSE=0.4899, R2=0.8168
