# 実践データ科学入門 2020年度木曜4限

# 第2回 その3 線形回帰 with Iris

In [None]:
%matplotlib notebook
import matplotlib.pyplot as plt

import numpy as np
from sklearn import datasets
import pandas as pd

# Irisデータセット読み込み
iris = datasets.load_iris()

In [None]:
# データは data 属性に numpy.ndarray として入っている
print(type(iris.data))
print(iris.data)

In [None]:
# iris.data の形を表示
print(iris.data.shape)

150 x 4 の二次元配列（行列）として格納されている

In [None]:
# データの特徴量
print(iris.feature_names)

In [None]:
# あやめの種類は target 属性に numpy.ndarray として入っている
print(type(iris.target))
print(iris.target)

あやめの種類は 0, 1, 2 として数字で表されている  
どの数字がその種に対応するかを調べる

In [None]:
print(iris.target_names)

0 が setosa, 1 が versicolor，2 が virginica

In [None]:
for i in range(iris.target_names.size):
    print('%1d = %s' %(i, iris.target_names[i]))

In [None]:
# Pandas の data frame として格納
df = pd.DataFrame(iris.data, columns=iris.feature_names)

# 種類の情報を df に追加
df['species'] = iris.target
df

In [None]:
# data frame の最初の5インスタンスを表示
df.head()

In [None]:
# data frame の最初の5インスタンスを表示
df.tail()

In [None]:
# Sepal Length の値 (cm) を SL に格納
SL = iris.data[:, 0] # データの0列目が sepal length
print(SL)

# sepal width の値 (cm) を SW に格納
SW = iris.data[:, 1] # データの1列目が sepal width
print(SW)

# Petal Length の値 (cm) を PL に格納
PL = iris.data[:, 2] # データの2列目が petal length
print(PL)

# Petal Widht の値 (cm) を PW に格納
PW = iris.data[:, 3] # データの3列目が petal width
print(PW)

# Species の値 (cm) を Sp に格納
Sp = iris.target
print(Sp)

In [None]:
fig = plt.figure()
ax = fig.add_subplot()
ax.scatter(SL, PL)

In [None]:
fig = plt.figure()
ax = fig.add_subplot()
ax.scatter(SL[Sp==0], PL[Sp==0], marker='o', s=50, label='setosa')
ax.scatter(SL[Sp==1], PL[Sp==1], marker='^', s=50, label='versicolor')
ax.scatter(SL[Sp==2], PL[Sp==2], marker='x', s=50, label='virginica')
ax.legend()

In [None]:
# 線形回帰
from sklearn import linear_model
clf = linear_model.LinearRegression()
X = SL.reshape(-1, 1)
Y = PL
clf.fit(X, Y)

a = clf.coef_[0]
b = clf.intercept_
print(a, b)

fig = plt.figure()
ax = fig.add_subplot()
ax.scatter(SL[Sp==0], PL[Sp==0], marker='o', s=50, label='setosa')
ax.scatter(SL[Sp==1], PL[Sp==1], marker='^', s=50, label='versicolor')
ax.scatter(SL[Sp==2], PL[Sp==2], marker='x', s=50, label='virginica')

x = np.arange(4.0, 8.0, 0.1)
ax.plot(x, a*x+b, linewidth=3, c='k', label='$ax+b$')
ax.text(5.3, 7.3, '$a$=%f, $b$=%f'%(a, b))
ax.legend()

In [None]:
# 決定係数
print(clf.score(X, Y))

In [None]:
clf0 = linear_model.LinearRegression()
X0 = SL[Sp==0].reshape(-1, 1)
Y0 = PL[Sp==0]
clf0.fit(X0, Y0)
a0 = clf0.coef_[0]
b0 = clf0.intercept_

clf1 = linear_model.LinearRegression()
X1 = SL[Sp==1].reshape(-1, 1)
Y1 = PL[Sp==1]
clf1.fit(X1, Y1)
a1 = clf1.coef_[0]
b1 = clf1.intercept_

clf2 = linear_model.LinearRegression()
X2 = SL[Sp==2].reshape(-1, 1)
Y2 = PL[Sp==2]
clf2.fit(X2, Y2)
a2 = clf2.coef_[0]
b2 = clf2.intercept_


x = np.arange(4.0, 8.0, 0.1)
fig = plt.figure()
ax = fig.add_subplot()
ax.scatter(SL[Sp==0], PL[Sp==0], marker='o', s=50, label='setosa')
ax.scatter(SL[Sp==1], PL[Sp==1], marker='^', s=50, label='versicolor')
ax.scatter(SL[Sp==2], PL[Sp==2], marker='x', s=50, label='virginica')
ax.plot(x, a0*x+b0, linewidth=3, c='tab:blue', label='$ax+b$')
ax.plot(x, a1*x+b1, linewidth=3, c='tab:orange', label='$ax+b$')
ax.plot(x, a2*x+b2, linewidth=3, c='tab:green', label='$ax+b$')

ax.set_xlabel('SL', fontsize=18)
ax.set_ylabel('PL', fontsize=18)
ax.tick_params(labelsize=12)

In [None]:
print(a0)
print(a1)
print(a2)

In [None]:
print(clf0.score(X0, Y0))
print(clf1.score(X1, Y1))
print(clf2.score(X2, Y2))

## 重線形回帰

In [None]:
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter3D(SL[Sp==0], SW[Sp==0], PL[Sp==0], marker='o', s=50, label='setosa')
ax.scatter3D(SL[Sp==1], SW[Sp==1], PL[Sp==1], marker='^', s=50, label='versicolor')
ax.scatter3D(SL[Sp==2], SW[Sp==2], PL[Sp==2], marker='x', s=50, label='virginica')
ax.set_xlabel('SL', fontsize=12)
ax.set_ylabel('SW', fontsize=12)
ax.set_zlabel('PL', fontsize=12)
plt.legend()

In [None]:
#重線形回帰
clfM = linear_model.LinearRegression()
XM = iris.data[:, 0:2]
YM = PL
clfM.fit(XM, YM)

aM1 = clfM.coef_[0]
aM2 = clfM.coef_[1]
bM = clfM.intercept_
print(aM1, aM2, bM)

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter3D(SL[Sp==0], SW[Sp==0], PL[Sp==0], marker='o', s=50, label='setosa')
ax.scatter3D(SL[Sp==1], SW[Sp==1], PL[Sp==1], marker='^', s=50, label='versicolor')
ax.scatter3D(SL[Sp==2], SW[Sp==2], PL[Sp==2], marker='x', s=50, label='virginica')
ax.set_xlabel('SL', fontsize=12)
ax.set_ylabel('SW', fontsize=12)
ax.set_zlabel('PL', fontsize=12)

x = np.arange(4.0, 8.0, 0.25)
y = np.arange(2.0, 4.5, 0.25)
x, y = np.meshgrid(x, y)
z = aM1*x + aM2*y + bM
ax.plot_wireframe(x, y, z, linewidth=1, label='$a_1x+a_2y+b$')
plt.legend()

In [None]:
clfM0 = linear_model.LinearRegression()
XM0 = XM[Sp==0]
YM0 = PL[Sp==0]
clfM0.fit(XM0, YM0)

aM01 = clfM0.coef_[0]
aM02 = clfM0.coef_[1]
bM0 = clfM0.intercept_
print(aM01, aM02, bM0)

clfM1 = linear_model.LinearRegression()
XM1 = XM[Sp==1]
YM1 = PL[Sp==1]
clfM1.fit(XM1, YM1)

aM11 = clfM1.coef_[0]
aM12 = clfM1.coef_[1]
bM1 = clfM1.intercept_
print(aM11, aM12, bM1)

clfM2 = linear_model.LinearRegression()
XM2 = XM[Sp==2]
YM2 = PL[Sp==2]
clfM2.fit(XM2, YM2)

aM21 = clfM2.coef_[0]
aM22 = clfM2.coef_[1]
bM2 = clfM2.intercept_
print(aM21, aM22, bM2)


fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter3D(SL[Sp==0], SW[Sp==0], PL[Sp==0], marker='o', s=50, label='setosa')
ax.scatter3D(SL[Sp==1], SW[Sp==1], PL[Sp==1], marker='^', s=50, label='versicolor')
ax.scatter3D(SL[Sp==2], SW[Sp==2], PL[Sp==2], marker='x', s=50, label='virginica')
ax.set_xlabel('SL', fontsize=12)
ax.set_ylabel('SW', fontsize=12)
ax.set_zlabel('PL', fontsize=12)

x = np.arange(4.0, 8.0, 0.25)
y = np.arange(2.0, 4.5, 0.25)
x, y = np.meshgrid(x, y)

z = aM01*x + aM02*y + bM0
ax.plot_wireframe(x, y, z, linewidth=1, label='$a_{01}x+a_{02}y+b_0$', color='tab:blue')

z = aM11*x + aM12*y + bM1
ax.plot_wireframe(x, y, z, linewidth=1, label='$a_{11}x+a_{12}y+b_1$', color='tab:orange')

z = aM21*x + aM22*y + bM2
ax.plot_wireframe(x, y, z, linewidth=1, label='$a_{21}x+a_{22}y+b_2$', color='tab:green')
plt.legend()

<h3><div style="text-align: right;">以上</div></h3>