# 第10回勉強会

## NumPy ndarray:多次元配列

NumPyとは・・・科学技術計算やデータ分析のためのPythonパッケージ<br><br>
NumPyの特徴
1. ベクトル演算やブロードキャストを提供する高速でメモリ効率のよい多次元配列の実装である「ndarray」<br>
1. 高速に動作し、呼び出す際にループ記法を必要としない標準的な数学関数<br>
1. ディスクへの配列の読み書きに加え、メモリマップ機能を提供する入出力
1. 行列計算、乱数生成、フーリエ変換の各機能

In [76]:
import numpy as np
from numpy.random import *
import pandas as pd

In [77]:
# ndarray 多次元配列オブジェクト
data = np.array([
        [0.9526, -0.246, -0.8856],
        [0.5639, 0.2379, 0.9104]
    ])
data

array([[ 0.9526, -0.246 , -0.8856],
       [ 0.5639,  0.2379,  0.9104]])

In [78]:
# 多次元配列の計算
data * 10

array([[ 9.526, -2.46 , -8.856],
       [ 5.639,  2.379,  9.104]])

In [79]:
# ndarray要素のデータ型
arr1 = np.array([1, 2, 3], dtype=np.float64)
print(arr1)

arr2 = np.array([1, 2, 3], dtype=np.int32)
print(arr2)

[1. 2. 3.]
[1 2 3]


python3系ではint型とlong型が統合されていて最大値がいくらでも増加できる。

In [80]:
# 通常のint型
i = int(21474836472147483647) 
print(i)
# numpy int32
npi_error = np.int32(21474836472147483647)
print(npi_error)


21474836472147483647


OverflowError: Python int too large to convert to C long

numpyは要素ごとの処理のためにループを各必要が無い

In [81]:
# ndarrayとスカラーの計算
arr = np.array([[1, 2, 3], [4, 5, 6]])
print(arr)
print("--------")
print(arr*arr)

[[1 2 3]
 [4 5 6]]
--------
[[ 1  4  9]
 [16 25 36]]


In [82]:
# インデックス参照
arr1 = np.arange(10)
print(arr1)
print("-------")
print(arr1[5])

[0 1 2 3 4 5 6 7 8 9]
-------
5


In [83]:
# スライス
arr2 = np.arange(10)
print(arr2)

[0 1 2 3 4 5 6 7 8 9]


In [84]:
arr2_slice = arr2[5:8]
print(arr2_slice)

[5 6 7]


In [85]:
arr2_slice[1] = 12345
print(arr2_slice)

[    5 12345     7]


In [86]:
print(arr2)

[    0     1     2     3     4     5 12345     7     8     9]


In [87]:
# ファンシーインデックスとは、インデックス参照に整数配列を用いる方法
arr = np.empty((8, 4))
for i in range(8):
    arr[i] = i
arr[[4, 3, 0, 6]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [88]:
arr[[-3, -5, -7]]

array([[5., 5., 5., 5.],
       [3., 3., 3., 3.],
       [1., 1., 1., 1.]])

In [89]:
arr = np.arange(32).reshape((8,4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [90]:
arr[[1, 5, 7, 2], [0, 3, 1, 2]]

array([ 4, 23, 29, 10])

In [91]:
# 転置行列、行と列の入れ替え
arr1 = np.arange(15).reshape((3, 5))

In [92]:
arr1

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [93]:
arr1.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

## ユニバーサル関数：すべての配列要素への関数適用

In [94]:
# sqrt 平方根
arr = np.arange(10)
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [95]:
# exp 指数
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

In [96]:
# randn
x = randn(8)
y = randn(8)
print(x)
print(y)

[-1.63174464  1.71721814 -0.22598039 -1.14254809 -0.18148492  1.0919751
 -1.51566314  0.17378101]
[-0.43880066 -1.45887488 -0.36264214  0.24891066 -0.47395406  0.55707714
 -0.10673934  0.95551398]


In [97]:
# 2つの配列の大きい値を最大値とする
np.maximum(x, y)

array([-0.43880066,  1.71721814, -0.22598039,  0.24891066, -0.18148492,
        1.0919751 , -0.10673934,  0.95551398])

## ndarrayを用いたデータ処理

In [98]:
points = np.arange(-5, 5, 0.01) # -5から5まで0.1づつ増やしていく
points

array([-5.0000000e+00, -4.9900000e+00, -4.9800000e+00, -4.9700000e+00,
       -4.9600000e+00, -4.9500000e+00, -4.9400000e+00, -4.9300000e+00,
       -4.9200000e+00, -4.9100000e+00, -4.9000000e+00, -4.8900000e+00,
       -4.8800000e+00, -4.8700000e+00, -4.8600000e+00, -4.8500000e+00,
       -4.8400000e+00, -4.8300000e+00, -4.8200000e+00, -4.8100000e+00,
       -4.8000000e+00, -4.7900000e+00, -4.7800000e+00, -4.7700000e+00,
       -4.7600000e+00, -4.7500000e+00, -4.7400000e+00, -4.7300000e+00,
       -4.7200000e+00, -4.7100000e+00, -4.7000000e+00, -4.6900000e+00,
       -4.6800000e+00, -4.6700000e+00, -4.6600000e+00, -4.6500000e+00,
       -4.6400000e+00, -4.6300000e+00, -4.6200000e+00, -4.6100000e+00,
       -4.6000000e+00, -4.5900000e+00, -4.5800000e+00, -4.5700000e+00,
       -4.5600000e+00, -4.5500000e+00, -4.5400000e+00, -4.5300000e+00,
       -4.5200000e+00, -4.5100000e+00, -4.5000000e+00, -4.4900000e+00,
       -4.4800000e+00, -4.4700000e+00, -4.4600000e+00, -4.4500000e+00,
      

In [99]:
# meshgrid x, yの各座標の要素列から格子座標を作成する
xs, ys = np.meshgrid(points, points)
xs

array([[-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       ...,
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99],
       [-5.  , -4.99, -4.98, ...,  4.97,  4.98,  4.99]])

In [100]:
ys

array([[-5.  , -5.  , -5.  , ..., -5.  , -5.  , -5.  ],
       [-4.99, -4.99, -4.99, ..., -4.99, -4.99, -4.99],
       [-4.98, -4.98, -4.98, ..., -4.98, -4.98, -4.98],
       ...,
       [ 4.97,  4.97,  4.97, ...,  4.97,  4.97,  4.97],
       [ 4.98,  4.98,  4.98, ...,  4.98,  4.98,  4.98],
       [ 4.99,  4.99,  4.99, ...,  4.99,  4.99,  4.99]])

In [101]:
# ndarraを用いたデータ処理
import matplotlib.pyplot as plt
z = np.sqrt(xs ** 2 + ys ** 2)
z

array([[7.07106781, 7.06400028, 7.05693985, ..., 7.04988652, 7.05693985,
        7.06400028],
       [7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
        7.05692568],
       [7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
        7.04985815],
       ...,
       [7.04988652, 7.04279774, 7.03571603, ..., 7.0286414 , 7.03571603,
        7.04279774],
       [7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
        7.04985815],
       [7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
        7.05692568]])

In [102]:
plt.imshow(z, cmap=plt.cm.gray)
plt.colorbar()

<matplotlib.colorbar.Colorbar at 0x2130f38e898>

In [103]:
plt.title("Image plot of $\sqart{x^2 + y^2}$ for a grid of values")

<matplotlib.text.Text at 0x2130f33d780>

In [104]:
# condがTrueの場合はxarrをFalseの場合はyarrを取る
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])

In [105]:
# zip関数は複数のリストをまとめて取得する際に使用する
result = [(x if c else y) for x, y, c in zip(xarr, yarr, cond)]
result

[1.1, 2.2, 1.3, 1.4, 2.5]

In [106]:
# np.where ある配列を基にして別の配列を作る
arr = randn(4, 4)
arr

array([[ 1.47166652, -1.28293205, -0.810321  ,  2.21998788],
       [-0.11336744, -0.24803763,  0.47574117,  0.21116104],
       [ 1.35544653,  0.30635568, -0.85376029,  0.58325508],
       [-2.32775101,  0.54308637,  1.12307055,  1.55831561]])

In [107]:
np.where(arr > 0, 2, -2) # 正の場合は2、負の場合は-2

array([[ 2, -2, -2,  2],
       [-2, -2,  2,  2],
       [ 2,  2, -2,  2],
       [-2,  2,  2,  2]])

In [108]:
# 数学関数
arr1 = randn(5,4)
arr1

array([[-0.34196398, -0.736489  ,  1.9913926 , -1.33029434],
       [-0.56529659, -1.02038024,  1.81943582,  0.5119229 ],
       [-1.50917694, -2.01308908, -0.22861218, -0.81810818],
       [ 0.40139749, -0.41898938,  0.53759444, -0.79424376],
       [ 1.38267181,  1.02431334,  0.02050506,  0.9062655 ]])

In [109]:
# 平均
arr1.mean()

-0.059057234871524054

In [110]:
# 合計
arr1.sum()

-1.1811446974304811

In [111]:
# 真偽値の配列関数
bools1 = np.array([False, False, True, False])
print("bools1:{}".format(bools1))
bools2 = np.array([False, False, False, False])
print("bools2:{}".format(bools2))
bools3 = np.array([True, True, True, True])
print("bools3:{}".format(bools3))

bools1:[False False  True False]
bools2:[False False False False]
bools3:[ True  True  True  True]


In [112]:
# Trueが1つでもある場合はTrue
print("bools1:{}".format(bools1.any()))
print("bools2:{}".format(bools2.any()))
print("bools3:{}".format(bools3.any()))

bools1:True
bools2:False
bools3:True


In [113]:
# すべてTrueの場合はTrue
print("bools1:{}".format(bools1.all()))
print("bools2:{}".format(bools2.all()))
print("bools3:{}".format(bools3.all()))

bools1:False
bools2:False
bools3:True


## ソート

In [114]:
arr = randn(8)
arr

array([ 0.69286289, -0.30440589, -0.55141613, -0.21188306, -0.46536337,
        0.55724723, -1.19218243,  1.5091594 ])

In [115]:
arr.sort()
arr

array([-1.19218243, -0.55141613, -0.46536337, -0.30440589, -0.21188306,
        0.55724723,  0.69286289,  1.5091594 ])

## 集合関数

In [116]:
# 重複なしの配列にする
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
np.unique(names)

array(['Bob', 'Joe', 'Will'], dtype='<U4')

## ndarrayの保存

In [117]:
# バイナリ形式
arr = np.arange(10)
np.save("10_some_array", arr)

## 行列計算

In [118]:
x = np.array([[1, 2, 3], [4, 5, 6]])
y = np.array([[6, 23], [-1, 7], [8, 9]])

In [119]:
x

array([[1, 2, 3],
       [4, 5, 6]])

In [120]:
y

array([[ 6, 23],
       [-1,  7],
       [ 8,  9]])

In [121]:
x.dot(y)

array([[ 28,  64],
       [ 67, 181]])

In [122]:
x1 = np.array([[1,2], [3, 4]])
y1 = np.array([[1, 2], [3, 4]])
x1.dot(y1)

array([[ 7, 10],
       [15, 22]])