In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import statsmodels as sm
import jieba 

In [2]:
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[:2, 1:] = 0
arr2d

array([[1, 0, 0],
       [4, 0, 0],
       [7, 8, 9]])

## 布尔型索引

In [3]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)
names

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [4]:
data

array([[ 0.24816488,  1.56906989, -2.78259712,  0.14306042],
       [ 2.00867545,  0.33778786,  1.31145625, -1.49872456],
       [-0.63665746,  0.39563813,  0.91491747, -1.70203313],
       [-0.19207795,  0.30838855, -0.52556376, -0.83899108],
       [ 1.64574205, -1.163656  , -1.43328377, -0.67138254],
       [-0.7120986 ,  0.79630807,  0.52794831, -0.10954858],
       [ 1.25623966, -0.5288925 ,  0.72125604, -1.72007659]])

In [5]:
#跟算术运算⼀样，数组的⽐较运算（如==）也是⽮量化的
names == 'Bob'

array([ True, False, False,  True, False, False, False])

In [6]:
#布尔型数组可⽤于数组索引
data[names == 'Bob']

array([[ 0.24816488,  1.56906989, -2.78259712,  0.14306042],
       [-0.19207795,  0.30838855, -0.52556376, -0.83899108]])

In [7]:
#布尔型数组的⻓度必须跟被索引的轴⻓度⼀致。此外，还可以将布尔型数组跟切⽚、整数（或整数序列）混合使⽤
data[names == 'Bob', 2:]

array([[-2.78259712,  0.14306042],
       [-0.52556376, -0.83899108]])

In [8]:
data[names == 'Bob', 3]

array([ 0.14306042, -0.83899108])

In [9]:
names != 'Bob'

array([False,  True,  True, False,  True,  True,  True])

In [10]:
data[~(names == 'Bob')]

array([[ 2.00867545,  0.33778786,  1.31145625, -1.49872456],
       [-0.63665746,  0.39563813,  0.91491747, -1.70203313],
       [ 1.64574205, -1.163656  , -1.43328377, -0.67138254],
       [-0.7120986 ,  0.79630807,  0.52794831, -0.10954858],
       [ 1.25623966, -0.5288925 ,  0.72125604, -1.72007659]])

In [11]:
cond = names =='Bob'
data[~cond]

array([[ 2.00867545,  0.33778786,  1.31145625, -1.49872456],
       [-0.63665746,  0.39563813,  0.91491747, -1.70203313],
       [ 1.64574205, -1.163656  , -1.43328377, -0.67138254],
       [-0.7120986 ,  0.79630807,  0.52794831, -0.10954858],
       [ 1.25623966, -0.5288925 ,  0.72125604, -1.72007659]])

In [12]:
# 选取这三个名字中的两个需要组合应⽤多个布尔条件，使⽤&（和）、|（或）之类的布尔算术运算符即可：
mask = (names == 'Bob') | (names == 'Will')
mask

array([ True, False,  True,  True,  True, False, False])

In [13]:
data[mask]

array([[ 0.24816488,  1.56906989, -2.78259712,  0.14306042],
       [-0.63665746,  0.39563813,  0.91491747, -1.70203313],
       [-0.19207795,  0.30838855, -0.52556376, -0.83899108],
       [ 1.64574205, -1.163656  , -1.43328377, -0.67138254]])

In [14]:
# 通过布尔型索引选取数组中的数据，将总是创建数据的副本，即使返回⼀模⼀样的数组也是如此。注意：Python关键字and和or在布尔型数组中⽆效。
# 要是⽤&与|。通过布尔型数组设置值是⼀种经常⽤到的⼿段。为了将data中的所有负值都设置为0，我们只需：
data[data < 0] = 0
data

array([[0.24816488, 1.56906989, 0.        , 0.14306042],
       [2.00867545, 0.33778786, 1.31145625, 0.        ],
       [0.        , 0.39563813, 0.91491747, 0.        ],
       [0.        , 0.30838855, 0.        , 0.        ],
       [1.64574205, 0.        , 0.        , 0.        ],
       [0.        , 0.79630807, 0.52794831, 0.        ],
       [1.25623966, 0.        , 0.72125604, 0.        ]])

In [15]:
#通过⼀维布尔数组设置整⾏或列的值也很简单
data[names != 'Joe'] = 7
data

array([[7.        , 7.        , 7.        , 7.        ],
       [2.00867545, 0.33778786, 1.31145625, 0.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [7.        , 7.        , 7.        , 7.        ],
       [0.        , 0.79630807, 0.52794831, 0.        ],
       [1.25623966, 0.        , 0.72125604, 0.        ]])

花式索引
#花式索引（Fancy indexing）是⼀个NumPy术语，它指的是利⽤
整数数组进⾏索引。假设我们有⼀个8×4数组：

In [16]:
arr = np.empty((8, 4))
for i in range(8):
    arr[i] = i
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [17]:
#为了以特定顺序选取⾏⼦集，只需传⼊⼀个⽤于指定顺序的整数列表或ndarray即可：
arr[[4, 3, 0, 6]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [18]:
#使⽤负数索引将会从末尾开始选取⾏：
arr[[-3, -5, -7]]

array([[5., 5., 5., 5.],
       [3., 3., 3., 3.],
       [1., 1., 1., 1.]])

In [19]:
#⼀次传⼊多个索引数组会有⼀点特别,返回的是⼀个⼀维数组，其中的元素对应各个索引元组：
arr = np.arange(32).reshape((8, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [20]:
arr[[1, 5, 7, 2], [0, 3, 1, 2]]

array([ 4, 23, 29, 10])

In [21]:
arr[[1,	5,	7,	2]][:,	[0,	3,	1,	2]]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

数组转置和轴对换

转置是重塑的⼀种特殊形式，它返回的是源数据的视图（不会进⾏任何复制操作）。数组不仅有transpose⽅法，还有⼀个特殊的T属性：
https://blog.csdn.net/m0_37192554/article/details/84062201

In [22]:
arr	= np.arange(15).reshape((3,	5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [23]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [24]:
# 在进⾏矩阵计算时，经常需要⽤到该操作，⽐如利⽤np.dot计算矩阵内积
arr	=	np.random.randn(6,	3)
arr

array([[ 0.7819725 ,  0.14109135, -0.82440286],
       [ 0.8510915 ,  0.39770143,  0.0489323 ],
       [-0.57584936, -0.72117935, -0.92853343],
       [-0.20908203, -0.10443312,  1.14719673],
       [ 0.10230878, -0.05484414, -1.21321075],
       [-0.37023749,  0.04640661,  0.68217931]])

In [25]:
np.dot(arr.T, arr)

array([[ 1.8586984 ,  0.86314312, -0.6848678 ],
       [ 0.86314312,  0.71424058,  0.55117326],
       [-0.6848678 ,  0.55117326,  4.79751804]])

In [26]:
arr = np.arange(16).reshape((2, 2, 4))
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [27]:
# transpose需要得到⼀个由轴编号组成的元组才能对这些轴进⾏转置
arr.transpose((1, 0, 2))
# 第⼀个轴被换成了第⼆个，第⼆个轴被换成了第⼀个，最后⼀个轴不变。简单的转置可以使⽤.T，它其实就是进⾏轴对换⽽已。

array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])

In [28]:
# swapaxes⽅法：它需要接受⼀对轴编号
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [29]:
# swapaxes也是返回源数据的视图
arr.swapaxes(1, 2)

array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])

通⽤函数：快速的元素级数组函数
通⽤函数（即ufunc）是⼀种对ndarray中的数据执⾏元素级运算的函数。你可以将其看做简单函数（接受⼀个或多个标量值，并
产⽣⼀个或多个标量值）的⽮量化包装器。

In [30]:
# 许多ufunc都是简单的元素级变体，如sqrt和exp：
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [31]:
# 平方根
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [32]:
# e的n次幂
np.exp(arr)


array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

In [33]:
x = np.random.randn(8)
x

array([ 1.22743803, -0.55325081,  0.61231261,  2.21571249,  0.21513745,
        1.29129912, -1.10755628, -1.34097526])

In [35]:
y = np.random.randn(8)
y

array([ 0.77111423,  0.1971259 , -0.27185397, -0.73339379, -1.05043599,
       -0.32552115, -0.13458288,  0.9298041 ])

In [36]:
# maximum计算了x和y中元素级别最⼤的元素
np.maximum(x, y)

array([ 1.22743803,  0.1971259 ,  0.61231261,  2.21571249,  0.21513745,
        1.29129912, -0.13458288,  0.9298041 ])

In [38]:
arr =  np.random.randn(7) * 5
arr

array([-0.67748036, 12.80332866,  4.13215524, -1.97319789, -2.72763997,
       -5.35413757,  3.25623961])

In [39]:
remainder, whole_part = np.modf(arr)
remainder

array([-0.67748036,  0.80332866,  0.13215524, -0.97319789, -0.72763997,
       -0.35413757,  0.25623961])

In [40]:
whole_part


array([-0., 12.,  4., -1., -2., -5.,  3.])