# Throw the dice

In [1]:
import numpy as np

In [2]:
np.random.choice(np.arange(1, 7), 10)

array([5, 5, 5, 6, 3, 5, 6, 3, 1, 1])

In [3]:
np.random.choice(np.arange(1, 7), 10, replace=True) # By default "replace" is "True"

array([1, 2, 4, 6, 2, 4, 4, 5, 2, 6])

In [4]:
np.random.choice(np.arange(1, 7), 5, replace=False)

array([5, 6, 1, 4, 3])

In [5]:
p = np.array([1, 1, 1, 3, 3, 3]); p = p / np.sum(p)
np.random.choice(np.arange(1, 7), 10, p = p)

array([5, 4, 4, 5, 6, 6, 6, 5, 6, 5])

# Expected value and Variance

In [6]:
import numpy as np

#### 正規分布

np.random.normal(loc = 0.0, scale = 1.0, size = None)

オプションのlocは期待値，scaleは標準偏差，sizeはサンプル数

#### 標準正規分布

np.random.randn(d0, d1, d2, ...)

標準正規分布は，期待値0, 分散1の正規分布．(d0, d1, d2, ...) のサイズの配列に，標準正規分布からのサンプルを格納

#### 一様分布

np.random.uniform(low = 0.0, high = 1.0, size = None)

オプションのlowは最小値，highは最大値，sizeはサンプル数

#### 区間[0, 1]上の一様分布

np.random.rand(d0, d1, d2, ...)

(d0, d1, d2, ...) のサイズの配列に，区間[0, 1]上の一様分布からのサンプルを格納

In [7]:
x = np.random.normal(1, 2, 100) # np.random.normal(loc, scale, size)

In [8]:
x.mean()   # Same as "np.mean(x)"

0.6011411838055563

In [9]:
np.mean(x) # Same as "x.mean()"

0.6011411838055563

In [10]:
x.std()    # Same as "np.std()"

2.1108227719457693

In [11]:
np.std(x)  # Same as "x.std()"

2.1108227719457693

$|x−E[x]|\leq \mathrm{sd}(x)$となるデータの割合を求めます．

In [12]:
np.mean(np.abs(x - np.mean(x)) <= np.std(x))

0.64

$| x - E[x] | \leq 2 \times \mathrm{sd}(x) $となるデータの割合を求めます．

In [13]:
np.mean(np.abs(x - np.mean(x)) <= 2 * np.std(x))

0.98

# Quantile

In [14]:
import scipy as sp
from scipy.stats import norm

  return f(*args, **kwds)


標準正規分布$N(0, 1)$ の上側 $\alpha$ を $z_{\alpha}$ と表す．

sp.stats.norm.ppf

を使うと正規分布の分位点（Quantile）$z_{\alpha}$を計算できる．

$N(0, 1)$の0.7点

In [15]:
sp.stats.norm.ppf(0.7)

0.5244005127080407

$N(1, 2^2)$ の0.7点

In [16]:
sp.stats.norm.ppf(0.7, loc = 1, scale = 2)

2.0488010254160813

$N(0, 1)$の上側0.05点

In [17]:
alpha = 0.05
sp.stats.norm.ppf(1 - alpha)

1.6448536269514722

# Covariance and Correlation coefficient

In [18]:
import numpy as np
from sklearn.datasets import load_iris

  return f(*args, **kwds)
  return f(*args, **kwds)


In [19]:
iris = load_iris()
iris.data.shape    # number of data, dimension

(150, 4)

In [20]:
# Variance-covariance matrix (Transposition of data matrix)

In [21]:
np.cov(iris.data.T)

array([[ 0.68569351, -0.03926846,  1.27368233,  0.5169038 ],
       [-0.03926846,  0.18800403, -0.32171275, -0.11798121],
       [ 1.27368233, -0.32171275,  3.11317942,  1.29638747],
       [ 0.5169038 , -0.11798121,  1.29638747,  0.58241432]])

In [22]:
# Correlation coefficient matrix (Transposition of data matrix)

In [23]:
np.corrcoef(iris.data.T)

array([[ 1.        , -0.10936925,  0.87175416,  0.81795363],
       [-0.10936925,  1.        , -0.4205161 , -0.35654409],
       [ 0.87175416, -0.4205161 ,  1.        ,  0.9627571 ],
       [ 0.81795363, -0.35654409,  0.9627571 ,  1.        ]])