In [None]:
!pip install japanize-matplotlib

In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
import matplotlib.pyplot as plt
import numpy as np
import japanize_matplotlib

## 残差二乗和の計算

In [None]:
# データの準備
X = [25,   60,   75,    95,    40,   75,   160,   130,   110]
y = [1800, 2500, 19200, 9500, 3200, 8000, 18000, 22000, 14000]

parameter1 = {'a': 0, 'b': 12000}  # y = 0x + 12000
parameter2 = {'a': 20, 'b': 0}  # y = 20x
parameter3 = {'a': -80, 'b': 15000}  # y = -80x + 15000
parameter4 = {'a': 150, 'b': -1500}  # y = 150x - 1500

paramter_list = [
    parameter1, parameter2, parameter3, parameter4
]

for index, parameter in enumerate(paramter_list):

  sum_error = 0
  for i in range(len(X)):
    y_pred = parameter['a'] * X[i] + parameter['b'] # 予想値の計算
    residual = y[i] - y_pred # 残差の計算
    residual_squared = residual ** 2 # 残差二乗の計算

    sum_error += residual_squared # 残差二乗和の計算

  print(f'parameter{index + 1}: {sum_error}')

## scikit-learn による単回帰・重回帰分析の実践

In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression


area =     [25,   60,   75,    95,   40,   75,   160,   130,   110]
distance = [15,   30,   3,     15,   10,   20,   10,    5,     15]
price =    [1800, 2500, 19200, 9500, 3200, 8000, 18000, 22000, 14000]

df = pd.DataFrame({'price': price, 'area': area, 'distance': distance})
print(df)

X1 = df[['area']]  # 単回帰用の説明変数
X2 = df[['area', 'distance']]  # 重回帰用の説明変数
y = df['price']  # 目的変数

# y = w1 * area + w0
model1 = LinearRegression() # 線形回帰を行うインスタンスの生成
model1.fit(X1, y) # 学習（残差二乗和を最小にするパラメータの計算）
print('回帰係数=', model1.coef_)  # w1
print('切片=', model1.intercept_)  # w0

# y = w1 * area + w2 * distance + w0
model2 = LinearRegression() # 線形回帰を行うインスタンスの生成
model2.fit(X2, y) # 学習（残差二乗和を最小にするパラメータの計算）

print('回帰係数=', model2.coef_)  # w1, w2
print('切片=', model2.intercept_)  # w0

## 様々な回帰式でパラメータを求める

In [None]:
X = [0,    0.04, 0.26, 0.3,  0.32, 0.54, 0.6,  0.76, 0.84, 0.94]
y = [4.94, 5.06, 5.04, 4.91, 4.89, 4.33, 4.15, 4.33, 4.65, 5.77]

In [None]:
plt.scatter(X, y)
plt.figure()

In [None]:
def generate_powed_data_set(x, pow_number):
  x_pow = []
  xx = np.array(x)
  xx = xx.reshape(len(x), 1)

  for i in range(1, pow_number + 1):
    x_pow.append(xx ** i)

  mat = np.concatenate(x_pow, axis=1)
  return mat


def calculate_lr_param(x, data_Y, pow_number):
  data_X = generate_powed_data_set(x, pow_number)

  lr = LinearRegression()
  lr.fit(data_X, data_Y)
  print(lr.coef_, lr.intercept_)

In [None]:
calculate_lr_param(X, y, 1)

In [None]:
calculate_lr_param(X, y, 2)

In [None]:
calculate_lr_param(X, y, 3)

In [None]:
calculate_lr_param(X, y, 9)

## リッジ・ラッソ回帰実践


In [None]:
X = [0,    0.04, 0.26, 0.3,  0.32, 0.54, 0.6,  0.76, 0.84, 0.94]
y = [4.94, 5.06, 5.04, 4.91, 4.89, 4.33, 4.15, 4.33, 4.65, 5.77]

In [None]:
df = pd.DataFrame({'x': X, 'y': y})
for i in range(2, 10):
  df['x^{}'.format(i)] = df['x'] ** i # 2~9次の項を作成する

y_columns = ['y']
x_columns = ['x^9', 'x^8', 'x^7', 'x^6', 'x^5', 'x^4', 'x^3', 'x^2', 'x']

data_x = df[x_columns]
data_y = df[y_columns]

# インスタンスを生成
lr = LinearRegression()
lasso = Lasso(alpha=0.001)
ridge = Ridge(alpha=0.001)
elastic = ElasticNet(alpha=0.001)

# パラメータ学習
lr.fit(data_x, data_y)
lasso.fit(data_x, data_y)
ridge.fit(data_x, data_y)
elastic.fit(data_x, data_y)

print('lr_coef: {}'.format(lr.coef_))
print('lr_intercept: {}'.format(lr.intercept_))
print('lasso_coef: {}'.format(lasso.coef_))
print('lasso_intercept: {}'.format(lasso.intercept_))
print('ridge_coef: {}'.format(ridge.coef_))
print('ridge_intercept: {}'.format(ridge.intercept_))
print('elastic_coef: {}'.format(elastic.coef_))
print('elastic_intercept: {}'.format(elastic.intercept_))

## MLP

In [None]:
from sklearn.neural_network import MLPRegressor
import numpy as np

In [None]:
X_memory = [4, 8, 8, 12, 16]
X_hdd = [128, 512, 256, 1024, 512]
y = [5, 12, 10, 20, 23]

X = np.array([X_memory, X_hdd]).T
y = np.array(y)

print(X.shape, y.shape)

model = MLPRegressor(hidden_layer_sizes=(10, 10), max_iter=1000)
model.fit(X, y)

print(model.coefs_)

## PyCaret

In [None]:
!pip install pycaret==3.0.4  # 書籍執筆時点のバージョンをインストール


# 最新版をインストール場合はこちら
# !pip install pycaret 最新版をインストール

In [None]:
import pycaret


pycaret.__version__

In [None]:
from pycaret.datasets import get_data


data = get_data('insurance')

In [None]:
from pycaret.regression import *


s = setup(data, target ='charges', session_id=123)

In [None]:
# compare baseline models
best = compare_models()

In [None]:
best