In [2]:
import pandas as pd
import numpy as np

In [3]:
rand = np.random.RandomState(42)
x = rand.randint(0, 100, 10) # or x = rand.randint(100, size=100)
x

array([51, 92, 14, 71, 60, 20, 82, 86, 74, 74])

In [4]:
x[2], x[3], x[7]

(14, 71, 86)

In [5]:
ind = [2, 3, 7] # equals to ind = np.array([2, 3, 7])
x[ind]

array([14, 71, 86])

In [5]:
ind = np.array([[3, 7],
                [4, 5]])

In [6]:
x[ind]
# if index is an 2x2 array, the outcome will also be 2x2 array

array([[71, 86],
       [60, 20]])

# Fancy indexing in multiple dimension

In [21]:
x = np.arange(12).reshape(3, 4)
x

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [25]:
row = np.array([0, 1, 2])
col = np.array([2, 1, 3])

x[row, col] # we get (0, 2), (1, 1), (2, 3)

array([ 2,  5, 11])

In [26]:
x[row[:, np.newaxis], col]
# 02 01 03
# 12 11 13
# 22 21 23

array([[ 2,  1,  3],
       [ 6,  5,  7],
       [10,  9, 11]])

In [27]:
x[row, col[:, np.newaxis]]
# 02 12 22
# 01 11 21
# 03 13 23

array([[ 2,  6, 10],
       [ 1,  5,  9],
       [ 3,  7, 11]])

# Combined indexing

In [35]:
x

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [33]:
x[2, [2, 0, 2]] # we get (2, 2), (2, 0), (2, 2)

array([10,  8, 10])

In [34]:
x[1:, [2, 0, 1]]
# 12 10 11
# 22 20 21

array([[ 6,  4,  5],
       [10,  8,  9]])

In [38]:
row, col

(array([0, 1, 2]), array([2, 1, 3]))

In [39]:
mask = np.array([1, 0, 1, 0], dtype=bool)
mask

array([ True, False,  True, False])

In [40]:
x[row[:, np.newaxis], mask]
# 1th and 3th elements are skipped
# 00 02
# 10 12
# 20 22

array([[ 0,  2],
       [ 4,  6],
       [ 8, 10]])

# Modify values with fancy indexing

In [9]:
x = np.arange(10)
i = np.array([2, 1, 4, 8])

x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [10]:
x[i]

array([2, 1, 4, 8])

In [11]:
x[i] = 99  # 所有 index 的位置都會換成 99
x

array([ 0, 99, 99,  3, 99,  5,  6,  7, 99,  9])

In [12]:
x[i] += 10
x

array([  0, 109, 109,   3, 109,   5,   6,   7, 109,   9])

In [13]:
x[[0, 0]] = [4, 6]
x
# only x[0] changes to 6 (take the last value)

array([  6, 109, 109,   3, 109,   5,   6,   7, 109,   9])

In [14]:
x = np.zeros(10)
i = [1, 2, 2, 3, 3, 3, 4, 4, 4, 4]
x

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [58]:
x[i]+=1 # 同樣的 index 只加一次
x

array([0., 1., 1., 1., 1., 0., 0., 0., 0., 0.])

In [59]:
np.add.at(x, i, 1) # index 重複幾次他就會加幾次，很神
x

array([0., 2., 3., 4., 5., 0., 0., 0., 0., 0.])

# Sorting arrays

In [61]:
x = np.array([5, 4, 32, 1])
np.sort(x)

array([ 1,  4,  5, 32])

In [62]:
x # the original array doesn't change

array([ 5,  4, 32,  1])

In [63]:
x.sort()
x # the original array does change

array([ 1,  4,  5, 32])

In [8]:
x = np.array([5, 4, 32, 1])
i = np.argsort(x)

print('the indices after sorting:', i)
print('the array after sorting:', np.sort(x))

the indices after sorting: [3 1 0 2]
the array after sorting: [ 1  4  5 32]


In [71]:
# the teacher names the fixed seed 'rng' or 'rand', notice they are the something.
rand = np.random.RandomState(42)
x = rand.randint(0, 10, (4, 6)) # elements are between 0 and 10, size is 4x6
x

array([[6, 3, 7, 4, 6, 9],
       [2, 6, 7, 4, 3, 7],
       [7, 2, 5, 4, 1, 7],
       [5, 1, 4, 0, 9, 5]])

In [72]:
# 0 means row = take every row each time = sorting each element in columns
np.sort(x, axis=0)

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

In [73]:
# 1 means column = take every column each time = sorting each element in rows
np.sort(x, axis=1)

array([[3, 4, 6, 6, 7, 9],
       [2, 3, 4, 6, 7, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 5, 9]])

# Partial sorting: partitioning

In [74]:
x = np.array([99, 88, 77, 66, 55, 44, 33, 22, 11])
x

array([99, 88, 77, 66, 55, 44, 33, 22, 11])

In [76]:
np.partition(x, 3)
# given smallest values to the left side, and remaining values to the right side in arbitrary order

array([22, 11, 33, 44, 55, 88, 66, 77, 99])

In [15]:
rand = np.random.RandomState(42)
x = rand.randint(0, 10, (4, 6))
x

array([[6, 3, 7, 4, 6, 9],
       [2, 6, 7, 4, 3, 7],
       [7, 2, 5, 4, 1, 7],
       [5, 1, 4, 0, 9, 5]])

In [18]:
np.partition(x, 2, axis=0) # 取每個 column 最小的兩個數字擺上面

array([[2, 1, 4, 0, 1, 5],
       [5, 2, 5, 4, 3, 7],
       [6, 3, 7, 4, 6, 7],
       [7, 6, 7, 4, 9, 9]])

In [19]:
np.partition(x, 2, axis=1) # 取每個 row 最小的兩個數字擺左邊

array([[3, 4, 6, 7, 6, 9],
       [2, 3, 4, 7, 6, 7],
       [1, 2, 4, 5, 7, 7],
       [0, 1, 4, 5, 9, 5]])

In [20]:
x # x doesn't change

array([[6, 3, 7, 4, 6, 9],
       [2, 6, 7, 4, 3, 7],
       [7, 2, 5, 4, 1, 7],
       [5, 1, 4, 0, 9, 5]])

# Structured data

In [2]:
datatype = [('name', 'U10'), ('age', 'i4'), ('weight', 'f4')]
datatype

[('name', 'U10'), ('age', 'i4'), ('weight', 'f4')]

In [3]:
# in the order of (name, age, weight)
values = [('rex', 9, 81.0), ('fido', 3, 27.0), ('bob', 45, 85.5)]
values

[('rex', 9, 81.0), ('fido', 3, 27.0), ('bob', 45, 85.5)]

In [4]:
f = np.array(values, dtype=datatype)
f

array([('rex',  9, 81. ), ('fido',  3, 27. ), ('bob', 45, 85.5)],
      dtype=[('name', '<U10'), ('age', '<i4'), ('weight', '<f4')])

In [5]:
f[0]

('rex', 9, 81.)

In [6]:
f[-1]['name']

'bob'

In [7]:
f[f['age'] > 30]['name']

array(['bob'], dtype='<U10')

actually I don't know why we use structured data. Isn't Dataframe a better choice?

# Record array

In [11]:
data_rec = f.view(np.recarray)

In [12]:
data_rec

rec.array([('rex',  9, 81. ), ('fido',  3, 27. ), ('bob', 45, 85.5)],
          dtype=[('name', '<U10'), ('age', '<i4'), ('weight', '<f4')])

In [14]:
print(data_rec.age)
print(data_rec['age'])
# same thing

[ 9  3 45]
[ 9  3 45]


In [15]:
%timeit f['age']
%timeit data_rec['age']
%timeit data_rec.age

111 ns ± 9.85 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)
2.04 µs ± 179 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
3.01 µs ± 305 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
