In [1]:
import numpy as np

- np.random.randn方法解释：    
    返回一个2*3的2维数组，数组中随机数服从高斯分布

In [2]:
data = np.random.randn(2,3)
data

array([[ 0.53456489, -0.96597529, -0.4890685 ],
       [ 0.55580565, -2.61125166, -1.47628983]])

进行一些数学运算 加、乘

In [3]:
data * 10

array([[  5.34564894,  -9.65975291,  -4.89068501],
       [  5.55805652, -26.11251664, -14.76289828]])

In [4]:
data + data

array([[ 1.06912979, -1.93195058, -0.978137  ],
       [ 1.1116113 , -5.22250333, -2.95257966]])

shape查看维度大小，dtype表示数组中的数据类型。

In [5]:
data.shape

(2, 3)

In [6]:
data.dtype

dtype('float64')

## 1 创建n维数组

In [7]:
#使用array函数，输入一个序列即可，比如list
data1 = [5,6.5,3,0,80]
arr1 = np.array(data1)
arr1

array([ 5. ,  6.5,  3. ,  0. , 80. ])

In [8]:
#嵌套序列能转换为多维数组：
data2 = [[1,2,3,4],[5,6,7,8]]
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [9]:
#因为data2是一个list of lists，所以arr2维度为2。使用ndim和shape查看
arr2.ndim

2

In [10]:
arr2.shape

(2, 4)

In [11]:
#除非主动声明，否则np.array会自动搭配适合的类型，并保存在dtype中
arr1.dtype

dtype('float64')

In [12]:
arr2.dtype

dtype('int32')

其他可以创建数组的函数。

In [13]:
#zeros，ones，empty
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [14]:
#使用元组（tuple）指定shape
np.zeros((3,6))

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [15]:
#创建shape和arr2一样的数组，数组中元素全部为0
np.zeros_like(arr2)

array([[0, 0, 0, 0],
       [0, 0, 0, 0]])

In [16]:
np.ones(5)

array([1., 1., 1., 1., 1.])

In [17]:
np.ones((2,3,4))

array([[[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]],

       [[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]]])

In [18]:
np.ones_like(arr2)

array([[1, 1, 1, 1],
       [1, 1, 1, 1]])

In [19]:
#np.empty并不能保证返回所有是0的数组，某些情况下会返回垃圾值。
np.empty((2,3,2))

array([[[4.94e-322, 0.00e+000],
        [0.00e+000, 0.00e+000],
        [0.00e+000, 0.00e+000]],

       [[0.00e+000, 0.00e+000],
        [0.00e+000, 0.00e+000],
        [0.00e+000, 0.00e+000]]])

In [20]:
np.full((2,3),fill_value=5)

array([[5, 5, 5],
       [5, 5, 5]])

In [21]:
np.full_like(arr2,fill_value=6)

array([[6, 6, 6, 6],
       [6, 6, 6, 6]])

## 2 ndarray中元素的数据类型

In [22]:
arr1 = np.array([1,2,3],dtype=np.float64)
arr2 = np.array([1,2,3],dtype=np.int64)

In [23]:
arr1.dtype

dtype('float64')

In [24]:
arr2.dtype

dtype('int64')

In [25]:
#可以使用astype来转换类型
float_arr2 = arr2.astype(np.float64)
float_arr2.dtype

dtype('float64')

In [26]:
#float转换成int会丢失精度
arr = np.array([3.7,-12.3,2.6,8.6,10.1])
arr

array([  3.7, -12.3,   2.6,   8.6,  10.1])

In [27]:
arr.dtype

dtype('float64')

In [28]:
int_arr = arr.astype(np.int64)
int_arr

array([  3, -12,   2,   8,  10], dtype=int64)

In [29]:
#string里面的数字变为实际的数字
numerical_strs = np.array(['1.25','48','-10.1'],dtype=np.string_)
numerical_strs

array([b'1.25', b'48', b'-10.1'], dtype='|S5')

In [30]:
numericals = numerical_strs.astype(np.float64)
numericals

array([  1.25,  48.  , -10.1 ])

In [31]:
int_array = np.arange(10)
int_array

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [32]:
# 可以用其他数组的dtype直接来指定类型
int_array = np.arange(10)
calibers = np.array([.22,.270,.357],dtype=np.float64)
int_array.astype(calibers.dtype)

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [33]:
# 数组和标量之间的运算
arr = np.array([[1,2,3],[4,5,6]])
print(arr + arr)
print(arr - arr)
print(arr * arr)

[[ 2  4  6]
 [ 8 10 12]]
[[0 0 0]
 [0 0 0]]
[[ 1  4  9]
 [16 25 36]]


In [34]:
1 / arr #除法

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [35]:
arr ** 0.5 #每个元素开根号

array([[1.        , 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974]])

In [36]:
# 基本的索引和切片
arr = np.arange(10)
arr[4:6]

array([4, 5])

In [37]:
# 与list不同，这里切片是引用，赋值的话每个元素都更新
arr[4:7] = 16
arr

array([ 0,  1,  2,  3, 16, 16, 16,  7,  8,  9])

In [38]:
arr_slice = arr[5:8]
arr_slice[1] = 123456
arr#arr[5] 的值进行了改变

array([     0,      1,      2,      3,     16,     16, 123456,      7,
            8,      9])

In [39]:
arr_slice[:] = 64
arr#arr[5:8] 的值 做出改变

array([ 0,  1,  2,  3, 16, 64, 64, 64,  8,  9])

In [40]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr2d[2]

array([7, 8, 9])

In [41]:
arr2d[0][2]# 等价于arr2d[0,2], 先行后列

3

In [42]:
arr3d = np.array([[[1, 2, 3],
                   [4, 5, 6]],
                  [[7, 8, 9],
                   [10, 11, 12]]]) # 3维数组
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [43]:
old_values = arr3d[0].copy()
old_values

array([[1, 2, 3],
       [4, 5, 6]])

In [44]:
old_values[0,1] = 1234
old_values

array([[   1, 1234,    3],
       [   4,    5,    6]])

In [45]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [46]:
arr3d[0] = 42
arr3d

array([[[42, 42, 42],
        [42, 42, 42]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [47]:
arr3d[0] = old_values
arr3d

array([[[   1, 1234,    3],
        [   4,    5,    6]],

       [[   7,    8,    9],
        [  10,   11,   12]]])

In [48]:
arr3d[1, 0, 2] # x-y-z

9

In [50]:
# 切片索引
arr[1:6]

array([ 1,  2,  3, 16, 64])

In [51]:
arr2d[:2] # 前2行

array([[1, 2, 3],
       [4, 5, 6]])

In [52]:
arr2d[:2, 1:] # 前2行，和除第0列构成的二维数组

array([[2, 3],
       [5, 6]])

In [53]:
print(arr2d[1, :2])
print(arr2d[2, :1])

[4 5]
[7]


In [54]:
arr2d[:, :1] # :代表选取整个轴,即每一行的第0列

array([[1],
       [4],
       [7]])

In [55]:
arr2d[:2, 1:] = 0 # 赋值操作作用于每个元素
arr2d

array([[1, 0, 0],
       [4, 0, 0],
       [7, 8, 9]])

In [56]:
# 布尔型索引
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)
print(names)
print(data)

['Bob' 'Joe' 'Will' 'Bob' 'Will' 'Joe' 'Joe']
[[-0.28839899 -1.61488802  0.47600954  0.2191229 ]
 [-0.031779    0.02368679 -0.8422292   0.52148839]
 [-0.85907112 -0.74487811 -0.83004476  1.38618299]
 [ 0.00456712 -0.13881616 -1.32529605 -1.24452512]
 [ 0.48547162 -0.15080318  0.02629649 -0.67678315]
 [-0.13963276 -0.68682398 -1.50266794 -0.15894092]
 [ 0.08954553  0.46229266  0.52304087  0.23830636]]


In [57]:
names == 'Bob' # 返回布尔数组

array([ True, False, False,  True, False, False, False])

In [58]:
data[names == 'Bob'] # 布尔数组的值匹配每一行，True则选取，False则扔掉。

array([[-0.28839899, -1.61488802,  0.47600954,  0.2191229 ],
       [ 0.00456712, -0.13881616, -1.32529605, -1.24452512]])

In [59]:
data[names == 'Bob', 2:] # 去除前2列

array([[ 0.47600954,  0.2191229 ],
       [-1.32529605, -1.24452512]])

In [60]:
data[names == 'Bob', 3] # 保留第三列

array([ 0.2191229 , -1.24452512])

In [61]:
names != 'Bob'

array([False,  True,  True, False,  True,  True,  True])

In [62]:
data[~(names != 'Bob')]# 用~表示逻辑取反。

array([[-0.28839899, -1.61488802,  0.47600954,  0.2191229 ],
       [ 0.00456712, -0.13881616, -1.32529605, -1.24452512]])

In [64]:
#条件混合，取或
mask = (names == 'Bob') | (names == 'Will')
data[mask]

array([[-0.28839899, -1.61488802,  0.47600954,  0.2191229 ],
       [-0.85907112, -0.74487811, -0.83004476,  1.38618299],
       [ 0.00456712, -0.13881616, -1.32529605, -1.24452512],
       [ 0.48547162, -0.15080318,  0.02629649, -0.67678315]])

In [65]:
data[data < 0] = 0 # 小于0的值置为0
data

array([[0.        , 0.        , 0.47600954, 0.2191229 ],
       [0.        , 0.02368679, 0.        , 0.52148839],
       [0.        , 0.        , 0.        , 1.38618299],
       [0.00456712, 0.        , 0.        , 0.        ],
       [0.48547162, 0.        , 0.02629649, 0.        ],
       [0.        , 0.        , 0.        , 0.        ],
       [0.08954553, 0.46229266, 0.52304087, 0.23830636]])

In [66]:
data[names != 'Joe'] = 7
data

array([[7.        , 7.        , 7.        , 7.        ],
       [0.        , 0.02368679, 0.        , 0.52148839],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [0.        , 0.        , 0.        , 0.        ],
       [0.08954553, 0.46229266, 0.52304087, 0.23830636]])

In [68]:
# 花式索引
arr = np.empty((8,4))
for i in range(8):
    arr[i] = i
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [70]:
arr[2,3]# 选定某个数据

2.0

In [71]:
arr[[2,3]] #选定 两行数据  注意方括号

array([[2., 2., 2., 2.],
       [3., 3., 3., 3.]])

In [72]:
arr[[-3,-5,-7]]# 负数索引依然可用

array([[5., 5., 5., 5.],
       [3., 3., 3., 3.],
       [1., 1., 1., 1.]])

In [73]:
arr = np.arange(32).reshape((8, 4)) # 重新调整数组形状
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [74]:
arr[[1, 5, 7, 2], [0, 3, 1, 2]] # 实际选取的是[1, 0]，[5， 3]，[7， 1]，[2， 2]。

array([ 4, 23, 29, 10])

In [75]:
# 这样才是选择一个子数组的正确形式，注意切片
arr[[1,5,7,2]][:,[0,3,1,2]]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

In [76]:
# np.ix_函数可以将两个一维整数数组转换为一个用于选取方形区域的索引器
arr[np.ix_([1, 5, 7, 2], [0, 3, 1, 2])] 

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

In [77]:
# ！！！ 花式索引跟切片不一样，它总是将数据复制到新数组中。

In [78]:
arr_slice = arr[[1,5,7,2]][:,[0,3,1,2]]
arr_slice

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

In [79]:
arr_slice[:2] = 123

In [80]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [81]:
arr_slice

array([[123, 123, 123, 123],
       [123, 123, 123, 123],
       [ 28,  31,  29,  30],
       [  8,  11,   9,  10]])

In [82]:
arr_slice = arr[:3]
arr_slice

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [83]:
arr_slice[1] = 123

In [84]:
arr

array([[  0,   1,   2,   3],
       [123, 123, 123, 123],
       [  8,   9,  10,  11],
       [ 12,  13,  14,  15],
       [ 16,  17,  18,  19],
       [ 20,  21,  22,  23],
       [ 24,  25,  26,  27],
       [ 28,  29,  30,  31]])

In [85]:
arr_slice

array([[  0,   1,   2,   3],
       [123, 123, 123, 123],
       [  8,   9,  10,  11]])

In [87]:
arr_123 = arr[[1,2,3]]
arr_123

array([[123, 123, 123, 123],
       [  8,   9,  10,  11],
       [ 12,  13,  14,  15]])

In [88]:
arr_123[0] = [4,5,6,7]
arr_123

array([[ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [89]:
arr

array([[  0,   1,   2,   3],
       [123, 123, 123, 123],
       [  8,   9,  10,  11],
       [ 12,  13,  14,  15],
       [ 16,  17,  18,  19],
       [ 20,  21,  22,  23],
       [ 24,  25,  26,  27],
       [ 28,  29,  30,  31]])

In [90]:
# 数组转置和轴对换
arr = np.arange(15).reshape((3,5))
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [91]:
arr = np.random.randn(6, 3)
np.dot(arr.T, arr) # 矩阵乘法，3行6列 * 6行3列 = 3行3列

array([[ 5.65320193,  4.47088391, -3.71040096],
       [ 4.47088391, 11.8815606 , -5.30414816],
       [-3.71040096, -5.30414816,  4.03893754]])

In [92]:
arr = np.arange(16).reshape((2, 2, 4))
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [93]:
arr.transpose((1, 0, 2)) # new[y, x, z] = old[x, y, z]

array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])

In [94]:
arr.swapaxes(1, 2) # 轴交换

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])