## Numpy基础：数组和矢量运算

In [1]:
import numpy as np
#创建ndarray
data1 = [6,7.5,8,0,1]
arr1 = np.array(data1)
arr1

array([ 6. ,  7.5,  8. ,  0. ,  1. ])

In [4]:
#嵌套序列（比如由一组等长列表组成的列表）将会被转换成一个多维数组
data2 = [[1,2,3,4],[5,6,6,8]]
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 6, 8]])

In [7]:
#查看数组维度
arr2.ndim

2

In [9]:
#查看数组形状
arr2.shape

(2, 4)

In [5]:
#查看数据类型
?arr2.dtype

## 数组创建函数

In [10]:
#产生全是0的数组
np.zeros(6)

array([ 0.,  0.,  0.,  0.,  0.,  0.])

In [3]:
np.zeros((3,6))[0]

array([ 0.,  0.,  0.,  0.,  0.,  0.])

In [12]:
np.empty((2,3,2))

array([[[  1.19431687e-311,   3.16202013e-322],
        [  0.00000000e+000,   0.00000000e+000],
        [  1.33511969e-306,   4.75730381e-038]],

       [[  2.65008003e-032,   1.73686922e-047],
        [  1.53579185e-051,   7.78791181e-071],
        [  3.44269070e+175,   2.93054526e-057]]])

In [13]:
#创建一个N*N的单位阵（对角线为1，其余为0）
np.eye(5)

array([[ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  1.]])

In [14]:
np.identity(5)

array([[ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  1.]])

In [15]:
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [17]:
#ndarray的数据类型(float,int,complex,bool,object,string_,unicode_等)
arr3 = np.array([1,2,3,4],dtype = np.int64)
arr3

array([1, 2, 3, 4], dtype=int64)

In [18]:
#通过ndarray的astype方法显式的转换其dtype
float_arr = arr3.astype(np.float64)
float_arr 

array([ 1.,  2.,  3.,  4.])

In [20]:
numeric_strings = np.array(['1.25','-9.6','42'],dtype=np.string_)
numeric_strings

array([b'1.25', b'-9.6', b'42'], 
      dtype='|S4')

In [21]:
#也可将字符串数组转化为数值形式
numeric_strings.astype(float)

array([  1.25,  -9.6 ,  42.  ])

In [23]:
int_array = np.array(10)
int_array.dtype

dtype('int32')

In [24]:
calibes = np.array([.22,.270,.358,.380,.44,.55],dtype=np.float64)
calibes

array([ 0.22 ,  0.27 ,  0.358,  0.38 ,  0.44 ,  0.55 ])

In [27]:
#将一个数组的数据类型转变为另一个数组的数据类型
tran_array = int_array.astype(calibes.dtype)
tran_array.dtype

dtype('float64')

## 基本的索引和切片

In [39]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [40]:
#Python里计数是从0开始的
arr[5]

5

In [41]:
#用 A：B 取范围值只能取到下标为A~B-1的数
arr[5:6]

array([5])

In [42]:
arr[5:8]=12
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [43]:
arr_slice = arr[5:8]
arr_slice

array([12, 12, 12])

In [45]:
arr_slice[1] = 12345
arr

array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,     9])

In [46]:
arr_slice[:] = 64
arr

array([ 0,  1,  2,  3,  4, 64, 64, 64,  8,  9])

## 二维数组

In [58]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr2d.ndim

2

In [59]:
arr2d[2]

array([7, 8, 9])

In [60]:
arr2d[0][2]

3

In [61]:
arr2d[0,2]

3

## 3维数组

In [3]:
#注意观察中括号
arr3d = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
arr3d.shape

(2, 2, 3)

In [4]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [5]:
arr3d[0]

array([[1, 2, 3],
       [4, 5, 6]])

In [6]:
arr3d[0,0]

array([1, 2, 3])

In [7]:
arr3d[0,0,0]

1

In [10]:
arr3d[0,1,2]

6

In [14]:
arr3d[:2,:,1]

array([[ 2,  5],
       [ 8, 11]])

## 布尔型索引

In [28]:
names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])

In [26]:
#使用numpy.random中的randn函数生成正太分布的随机数
data = np.random.randn(7,4)
data

array([[-2.17351949, -0.30476106, -0.68369297,  0.17130619],
       [ 0.42615293,  0.00559692, -0.25807861,  0.63802164],
       [ 1.27559996,  0.77339471,  0.97012019, -0.81635756],
       [-0.47286603, -1.3285012 , -1.67151115, -1.47748413],
       [ 0.5522546 , -0.13732568,  0.79188639, -0.76183544],
       [-0.53424972, -1.70265297, -0.91636565, -0.11183847],
       [-0.35148601, -0.25724484, -1.15232654,  0.34267421]])

In [29]:
names == 'Bob'

array([ True, False, False,  True, False, False, False], dtype=bool)

In [30]:
data[names=='Bob']

array([[-2.17351949, -0.30476106, -0.68369297,  0.17130619],
       [-0.47286603, -1.3285012 , -1.67151115, -1.47748413]])

In [31]:
data[names=='Bob',:2]

array([[-2.17351949, -0.30476106],
       [-0.47286603, -1.3285012 ]])

## &(和)，|（或）

In [34]:
##用括号
mask = (names == 'Bob')|(names == 'Will')
mask

array([ True, False,  True,  True,  True, False, False], dtype=bool)

In [35]:
data[data<0]

array([-2.17351949, -0.30476106, -0.68369297, -0.25807861, -0.81635756,
       -0.47286603, -1.3285012 , -1.67151115, -1.47748413, -0.13732568,
       -0.76183544, -0.53424972, -1.70265297, -0.91636565, -0.11183847,
       -0.35148601, -0.25724484, -1.15232654])

## 花式索引

In [6]:
arr = np.empty((8,4))

In [7]:
for i in range(8):
    arr[i] = i
arr

array([[ 0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  1.],
       [ 2.,  2.,  2.,  2.],
       [ 3.,  3.,  3.,  3.],
       [ 4.,  4.,  4.,  4.],
       [ 5.,  5.,  5.,  5.],
       [ 6.,  6.,  6.,  6.],
       [ 7.,  7.,  7.,  7.]])

In [40]:
#以特定的顺序选取子集，只需传入一个用于指定顺序的整数列表
arr[[4,3,0,6]]

array([[ 4.,  4.,  4.,  4.],
       [ 3.,  3.,  3.,  3.],
       [ 0.,  0.,  0.,  0.],
       [ 6.,  6.,  6.,  6.]])

In [43]:
#使用负数索引，从末尾开始选取行
arr[[-1,-3]]

array([[ 7.,  7.,  7.,  7.],
       [ 5.,  5.,  5.,  5.]])

In [47]:
arr = np.arange(32).reshape((8,4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [46]:
#选出的是（1，0），（5，3）...位置的数
arr[[1,5,7,2],[0,3,1,2]]

array([ 4, 23, 29, 10])

In [49]:
#[:,[0,3,1,2]]只是换了输出顺序
arr[[1,5,7,2]][:,[0,3,1,2]]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

In [50]:
arr[[1,5,7,2]]

array([[ 4,  5,  6,  7],
       [20, 21, 22, 23],
       [28, 29, 30, 31],
       [ 8,  9, 10, 11]])