# カリフォルニア住宅価格データセット
- さまざまな項目からブロック内の**住宅価格の中央値**を予想する
    - MedInc 所得中央値
    - HouseAge 平均築年数
    - AveRooms 平均部屋数
    - AveBedrms 平均ベッドルーム数
    - Population 人口
    - AveOccup 平均世帯人数
    - Latitude 緯度
    - Longitude 軽度




In [4]:
# データセットの取得
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()

In [15]:
# 内容の確認
print(housing.feature_names)
print(housing.data.shape)
X = housing.data
y = housing.target

['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
(20640, 8)


In [25]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.model_selection import train_test_split

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [22]:
mdl = LinearRegression().fit(X_train, y_train)
print("訓練スコア", mdl.score(X_train, y_train))
print("テストスコア", mdl.score(X_test, y_test))

訓練スコア 0.6102859678113064
テストスコア 0.592986928576003


In [26]:
mdl = Ridge().fit(X_train, y_train)
print("訓練スコア", mdl.score(X_train, y_train))
print("テストスコア", mdl.score(X_test, y_test))


訓練スコア 0.6102859256477542
テストスコア 0.592984667157651


In [27]:
mdl = Lasso().fit(X_train, y_train)
print("訓練スコア", mdl.score(X_train, y_train))
print("テストスコア", mdl.score(X_test, y_test))

訓練スコア 0.28628419168471997
テストスコア 0.28887951253541144


In [36]:
from sklearn.tree import DecisionTreeRegressor

for max_depth in range(2, 15):
    mdl = DecisionTreeRegressor(max_depth=max_depth).fit(X_train, y_train)
    print("{}\ttrain: {}\ttest: {}".format(max_depth, mdl.score(X_train, y_train), mdl.score(X_test, y_test)))


2	train: 0.4476762800567027	test: 0.4402888155161535
3	train: 0.5369734207973751	test: 0.5159577576963043
4	train: 0.587096279828152	test: 0.5671365525573329
5	train: 0.6400092316603436	test: 0.6140595840918428
6	train: 0.6878557970200256	test: 0.6486166438088263
7	train: 0.7270116809292556	test: 0.676460532598538
8	train: 0.7727933187019169	test: 0.6903889618782366
9	train: 0.8116979133551927	test: 0.7059451750558865
10	train: 0.8448027053913012	test: 0.6888636431339225
11	train: 0.8757604962296521	test: 0.6768157400327564
12	train: 0.9022152513931876	test: 0.6595698003602006
13	train: 0.9272956900092948	test: 0.6611120089209137
14	train: 0.9479729729930784	test: 0.6323467437884467
