# Throw the dice

In [1]:
import numpy as np

In [2]:
np.random.choice(np.arange(1, 7), 10)

array([6, 3, 4, 1, 2, 1, 6, 5, 3, 2])

In [3]:
np.random.choice(np.arange(1, 7), 10, replace=True) # By default "replace" is "True"

array([6, 4, 5, 1, 3, 6, 4, 6, 5, 1])

In [4]:
np.random.choice(np.arange(1, 7), 5, replace=False)

array([6, 1, 3, 2, 5])

In [5]:
p = np.array([1, 1, 1, 3, 3, 3]); p = p / np.sum(p)
np.random.choice(np.arange(1, 7), 10, p = p)

array([5, 6, 4, 3, 4, 2, 5, 5, 4, 3])

# Expected value and Variance

In [6]:
import numpy as np

#### 正規分布

np.random.normal(loc = 0.0, scale = 1.0, size = None)

オプションのlocは期待値，scaleは標準偏差，sizeはサンプル数

#### 標準正規分布

np.random.randn(d0, d1, d2, ...)

標準正規分布は，期待値0, 分散1の正規分布．(d0, d1, d2, ...) のサイズの配列に，標準正規分布からのサンプルを格納

#### 一様分布

np.random.uniform(low = 0.0, high = 1.0, size = None)

オプションのlowは最小値，highは最大値，sizeはサンプル数

#### 区間[0, 1]上の一様分布

np.random.rand(d0, d1, d2, ...)

(d0, d1, d2, ...) のサイズの配列に，区間[0, 1]上の一様分布からのサンプルを格納

In [7]:
x = np.random.normal(1, 2, 100) # np.random.normal(loc, scale, size)

In [8]:
x.mean()   # Same as "np.mean(x)"

0.8052190926508561

In [9]:
np.mean(x) # Same as "x.mean()"

0.8052190926508561

In [10]:
x.std()    # Same as "np.std()"

2.043235024619204

In [11]:
np.std(x)  # Same as "x.std()"

2.043235024619204

$|x−E[x]|\leq \mathrm{sd}(x)$となるデータの割合を求めます．

In [12]:
np.mean(np.abs(x - np.mean(x)) <= np.std(x))

0.69

$| x - E[x] | \leq 2 \times \mathrm{sd}(x) $となるデータの割合を求めます．

In [13]:
np.mean(np.abs(x - np.mean(x)) <= 2 * np.std(x))

0.96

# Quantile

In [14]:
import scipy as sp
from scipy.stats import norm

標準正規分布$N(0, 1)$ の上側 $\alpha$ を $z_{\alpha}$ と表す．

sp.stats.norm.ppf

を使うと正規分布の分位点（Quantile）$z_{\alpha}$を計算できる．

$N(0, 1)$の0.7点

In [15]:
sp.stats.norm.ppf(0.7)

0.5244005127080407

$N(1, 2^2)$ の0.7点

In [16]:
sp.stats.norm.ppf(0.7, loc = 1, scale = 2)

2.0488010254160813

$N(0, 1)$の上側0.05点

In [17]:
alpha = 0.05
sp.stats.norm.ppf(1 - alpha)

1.6448536269514722

# Covariance and Correlation coefficient

In [18]:
import numpy as np
from sklearn.datasets import load_iris

In [19]:
iris = load_iris()
iris.data.shape    # number of data, dimension

(150, 4)

In [20]:
# Variance-covariance matrix (Transposition of data matrix)

In [21]:
np.cov(iris.data.T)

array([[ 0.68569351, -0.042434  ,  1.27431544,  0.51627069],
       [-0.042434  ,  0.18997942, -0.32965638, -0.12163937],
       [ 1.27431544, -0.32965638,  3.11627785,  1.2956094 ],
       [ 0.51627069, -0.12163937,  1.2956094 ,  0.58100626]])

In [22]:
# Correlation coefficient matrix (Transposition of data matrix)

In [23]:
np.corrcoef(iris.data.T)

array([[ 1.        , -0.11756978,  0.87175378,  0.81794113],
       [-0.11756978,  1.        , -0.4284401 , -0.36612593],
       [ 0.87175378, -0.4284401 ,  1.        ,  0.96286543],
       [ 0.81794113, -0.36612593,  0.96286543,  1.        ]])