## 回帰木

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
# 訓練用とテスト用のデータに分割
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [4]:
# 訓練用データを用いたモデルの訓練
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(random_state=0)
regressor.fit(X_train, y_train)

DecisionTreeRegressor(random_state=0)

In [5]:
# テスト用データを用いた結果の予測
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2) # 有効桁数を2桁に設定
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

[[431.28 431.23]
 [459.59 460.01]
 [460.06 461.14]
 ...
 [471.46 473.26]
 [437.76 438.  ]
 [462.74 463.28]]


In [6]:
# モデルの評価
from sklearn.metrics import r2_score

r2_score(y_test, y_pred)

0.922905874177941

## 重回帰分析

In [7]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [8]:
# 訓練用とテスト用のデータに分割
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [9]:
# 訓練用データを用いたモデルの訓練
from sklearn.linear_model import LinearRegression

regressor = LinearRegression()
regressor.fit(X_train, y_train)

LinearRegression()

In [10]:
# テスト用データを用いた結果の予測
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2) # 有効桁数を2桁に設定
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

[[431.43 431.23]
 [458.56 460.01]
 [462.75 461.14]
 ...
 [469.52 473.26]
 [442.42 438.  ]
 [461.88 463.28]]


In [12]:
# モデルの評価
from sklearn.metrics import r2_score

r2_score(y_test, y_pred)

0.9325315554761303

## 多項式回帰

In [13]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [14]:
# 訓練用とテスト用のデータに分割
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [15]:
# 訓練用データを用いたモデルの訓練
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

poly_reg = PolynomialFeatures(degree=4)
X_poly = poly_reg.fit_transform(X_train)
regressor = LinearRegression()
regressor.fit(X_poly, y_train)

LinearRegression()

In [16]:
# テスト用データを用いた結果の予測
y_pred = regressor.predict(poly_reg.transform(X_test))
np.set_printoptions(precision=2) # 有効桁数を2桁に設定
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

[[433.94 431.23]
 [457.9  460.01]
 [460.52 461.14]
 ...
 [469.53 473.26]
 [438.27 438.  ]
 [461.67 463.28]]


In [17]:
# モデルの評価
from sklearn.metrics import r2_score

r2_score(y_test, y_pred)

0.9458193033689702

## ランダムフォレスト（回帰）

In [18]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [19]:
# 訓練用とテスト用のデータに分割
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [20]:
# 訓練用データを用いたモデルの訓練
from sklearn.ensemble import RandomForestRegressor

regressor = RandomForestRegressor(n_estimators=10, random_state=0)
regressor.fit(X_train, y_train)

RandomForestRegressor(n_estimators=10, random_state=0)

In [21]:
# テスト用データを用いた結果の予測
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2) # 有効桁数を2桁に設定
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

[[434.05 431.23]
 [458.79 460.01]
 [463.02 461.14]
 ...
 [469.48 473.26]
 [439.57 438.  ]
 [460.38 463.28]]


In [22]:
# モデルの評価
from sklearn.metrics import r2_score

r2_score(y_test, y_pred)

0.9615908334363876

## サポートベクトル回帰(SVR)

In [23]:
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [24]:
y = y.reshape(len(y), 1)

In [25]:
# 訓練用とテスト用のデータに分割
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, random_state=0)

In [26]:
# フィーチャースケーリング
from sklearn.preprocessing import StandardScaler

sc_X = StandardScaler()
sc_y = StandardScaler()
X_train = sc_X.fit_transform(X_train)
y_train = sc_y.fit_transform(y_train)

In [27]:
# 訓練用データを用いたモデルの訓練
from sklearn.svm import SVR

regressor = SVR(kernel='rbf')
regressor.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)


SVR()

In [28]:
# テスト用データを用いた結果の予測
y_pred = sc_y.inverse_transform(regressor.predict(sc_X.transform(X_test)).reshape(-1, 1))
np.set_printoptions(precision=2) # 有効桁数を2桁に設定
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

[[434.56 431.23]
 [457.95 460.01]
 [461.37 461.14]
 ...
 [449.   452.7 ]
 [466.45 468.45]
 [456.13 454.47]]


In [29]:
# モデルの評価
from sklearn.metrics import r2_score

r2_score(y_test, y_pred)

0.9475212026373167