In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris

In [2]:
dataset = load_iris()

In [3]:
iris_df = pd.DataFrame(data=dataset["data"], columns=dataset["feature_names"])

In [4]:
iris_df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [5]:
basic_statistics = pd.DataFrame(columns=dataset["feature_names"])

In [6]:
labels = ["max", "min", "mean", "median", "std"]
statistics = [iris_df.max(), iris_df.min(), iris_df.mean(), iris_df.median(), iris_df.std()]

for label, static_data in zip(labels, statistics):
    append_data = pd.DataFrame(data=static_data, columns=[label]).T
    basic_statistics = pd.concat([basic_statistics, append_data], axis=0)

In [7]:
basic_statistics

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
max,7.9,4.4,6.9,2.5
min,4.3,2.0,1.0,0.1
mean,5.843333,3.057333,3.758,1.199333
median,5.8,3.0,4.35,1.3
std,0.828066,0.435866,1.765298,0.762238


In [8]:
basic_statistics.round(4).to_csv("basic_statistics.csv")

In [9]:
# pandasのcovメソッドで返ってくるのは不偏分散
# numpyのメソッドなら標本分散(bias=1)，不偏分散(bias=0)のどちらでも計算できる
covariance = iris_df.cov()

In [10]:
covariance

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
sepal length (cm),0.685694,-0.042434,1.274315,0.516271
sepal width (cm),-0.042434,0.189979,-0.329656,-0.121639
petal length (cm),1.274315,-0.329656,3.116278,1.295609
petal width (cm),0.516271,-0.121639,1.295609,0.581006


In [11]:
covariance.round(4).to_csv("covariance.csv")

In [12]:
# 気持ち程度に標本分散も計算しておく
covariance_spec = np.cov(iris_df.values, rowvar=0, bias=1)
covariance_spec = pd.DataFrame(covariance_spec, index=dataset["feature_names"], columns=dataset["feature_names"])

In [13]:
covariance_spec

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
sepal length (cm),0.681122,-0.042151,1.26582,0.512829
sepal width (cm),-0.042151,0.188713,-0.327459,-0.120828
petal length (cm),1.26582,-0.327459,3.095503,1.286972
petal width (cm),0.512829,-0.120828,1.286972,0.577133


In [14]:
correlationcoefficient = iris_df.corr()

In [15]:
correlationcoefficient

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
sepal length (cm),1.0,-0.11757,0.871754,0.817941
sepal width (cm),-0.11757,1.0,-0.42844,-0.366126
petal length (cm),0.871754,-0.42844,1.0,0.962865
petal width (cm),0.817941,-0.366126,0.962865,1.0


In [16]:
correlationcoefficient.round(4).to_csv("correlationcoefficient.csv")