In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
import pandas as pd

# 加載加州房價數據集
house_value = fetch_california_housing()
X = pd.DataFrame(house_value.data, columns=house_value.feature_names)
y = house_value.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=420)

# 重新設定索引，避免視覺化時混亂
for i in [X_train, X_test]:
    i.index = range(i.shape[0])


### 訓練線性回歸模型並檢視係數

In [None]:
# 訓練線性回歸模型
reg = LinearRegression().fit(X_train, y_train)
yhat = reg.predict(X_test)

# 取得回歸係數與截距
coef = reg.coef_
intercept = reg.intercept_

# 顯示特徵與對應係數
compare = pd.DataFrame({"Feature": X_train.columns, "Coefficient": coef})
compare


### 模型評估：均方誤差 (MSE)

In [None]:

from sklearn.metrics import mean_squared_error

mse = mean_squared_error(y_test, yhat)
print(f"均方誤差 (MSE): {mse:.4f}")


## 3. 預測結果視覺化

In [None]:

import matplotlib.pyplot as plt

# 圖1: 實際值 vs 預測值
plt.figure(figsize=(10, 5))
plt.scatter(y_test, yhat, color="blue", alpha=0.5, label="Predicted vs Actual")
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "r--", lw=2, label="Ideal Fit")
plt.xlabel("Actual House Value")
plt.ylabel("Predicted House Value")
plt.title("Actual vs. Predicted House Value")
plt.legend()
plt.show()

# 圖2: 殘差圖 (Residual Plot)
residuals = y_test - yhat

plt.figure(figsize=(10, 5))
plt.scatter(yhat, residuals, color="purple", alpha=0.5)
plt.axhline(y=0, color="r", linestyle="--", lw=2)
plt.xlabel("Predicted House Value")
plt.ylabel("Residuals")
plt.title("Residual Plot")
plt.show()
