In [1]:
# モジュールのインポート
import numpy as np
import pandas as pd
import sklearn as sk
import seaborn as sb

scikit-learnのデータセット"Iris"の特徴量の可視化をする。

1. ヒストグラム
1. 3Dグラフ
1. 探索的データ解析(EDA)
1. 等高線図

In [2]:
# データセットの表示
import sklearn.datasets
iris_set = sklearn.datasets.load_iris()

print("class:{}".format(np.unique(iris_set.target)) )
pd.DataFrame(np.column_stack((iris_set.data, iris_set.target.reshape(-1,1))), columns=np.append(iris_set.feature_names,'class') )

class:[0 1 2]


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
0,5.1,3.5,1.4,0.2,0.0
1,4.9,3.0,1.4,0.2,0.0
2,4.7,3.2,1.3,0.2,0.0
3,4.6,3.1,1.5,0.2,0.0
4,5.0,3.6,1.4,0.2,0.0
5,5.4,3.9,1.7,0.4,0.0
6,4.6,3.4,1.4,0.3,0.0
7,5.0,3.4,1.5,0.2,0.0
8,4.4,2.9,1.4,0.2,0.0
9,4.9,3.1,1.5,0.1,0.0


##### ヒストグラム

In [3]:
%matplotlib notebook

import matplotlib.pyplot as plt

fig_num = len(iris_set.feature_names)
fig = plt.figure(figsize=(5,5))
for i, name in enumerate(iris_set.feature_names):
    ax = fig.add_subplot(fig_num,1,i+1)
    ax.hist(iris_set.data[:,i], bins=50)
    ax.set_title(name)
    
fig.tight_layout()
fig.show()

<IPython.core.display.Javascript object>

##### 3Dグラフ

In [4]:
%matplotlib notebook

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure(figsize=(5,5))
ax = Axes3D(fig)
for i in np.unique(iris_set.target):
    mask = iris_set.target == i
    ax.plot(iris_set.data[mask,0], iris_set.data[mask,1], iris_set.data[mask,2], 'o', label=str(i))

ax.set_xlabel(iris_set.feature_names[0])
ax.set_ylabel(iris_set.feature_names[1])
ax.set_zlabel(iris_set.feature_names[2])
plt.title("features-label map")

plt.legend()
plt.show()

<IPython.core.display.Javascript object>

##### 探索的データ解析(EDA)

In [5]:
%matplotlib notebook

import matplotlib.pyplot as plt

df = pd.DataFrame(iris_set.data, columns=iris_set.feature_names)
sb.set(style='whitegrid', context='notebook')
sb.pairplot(df, size=2.5)
plt.show()

<IPython.core.display.Javascript object>

##### 等高線図

In [6]:
%matplotlib notebook

import matplotlib.pyplot as plt
from matplotlib.mlab import griddata

fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)

feat1_dat = iris_set.data[:,0]
feat2_dat = iris_set.data[:,1]
feat1_range = np.linspace(np.min(feat1_dat), np.max(feat1_dat))
feat2_range = np.linspace(np.min(feat2_dat), np.max(feat2_dat))
interp_vals = griddata(feat1_dat, feat2_dat, iris_set.target, feat1_range, feat2_range, interp='linear')
sc = plt.contourf(feat1_range, feat2_range, interp_vals, 
                  cmap=plt.cm.rainbow, vmin=-np.min(interp_vals), vmax=np.max(interp_vals))

fig.colorbar(sc)
fig.gca().set_aspect('equal', adjustable='box')        

ax.set_xlabel(iris_set.feature_names[0])
ax.set_ylabel(iris_set.feature_names[1])
fig.show()

<IPython.core.display.Javascript object>