                    chapter4: Numpy Basics: Arrays and Vectorized Computation

# the numpy ndarray: A multidimensional Array Object

In [1]:
import numpy as np

In [2]:
data = np.array([[1,2,3],[4,5,6]])

In [3]:
data

array([[1, 2, 3],
       [4, 5, 6]])

In [4]:
data*10

array([[10, 20, 30],
       [40, 50, 60]])

In [5]:
data + data

array([[ 2,  4,  6],
       [ 8, 10, 12]])

In [6]:
data.shape

(2, 3)

In [7]:
data.dtype

dtype('int64')

In [8]:
data = [1,2,3]

In [11]:
arr1 = np.array(data)

In [12]:
arr1

array([1, 2, 3])

In [13]:
data = [[1,2,3],[1,2,3]]

In [14]:
arr1 = np.array(data)

In [16]:
arr1

array([[1, 2, 3],
       [1, 2, 3]])

In [17]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [18]:
np.zeros((3,6))

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [19]:
np.empty((2,3,2))

array([[[0.00000000e+000, 2.32210854e-322],
        [0.00000000e+000, 0.00000000e+000],
        [9.76118064e-313, 1.16095484e-028]],

       [[1.14568603e+243, 4.25117084e-096],
        [9.80058441e+252, 1.23971686e+224],
        [1.33872411e+253, 1.28959373e+295]]])

In [20]:
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

## Data Types for ndarrays

In [23]:
arr1.dtype

dtype('int64')

In [24]:
arr1=np.array([12,13,13],dtype = np.float64)

In [25]:
arr1

array([12., 13., 13.])

In [26]:
arr1 = arr1.astype(np.int32)

In [27]:
arr1

array([12, 13, 13], dtype=int32)

In [28]:
zero_uint32 = np.zeros(8,dtype = "u4")

In [29]:
zero_uint32

array([0, 0, 0, 0, 0, 0, 0, 0], dtype=uint32)

In [30]:
arr1.astype(np.int32)

array([12, 13, 13], dtype=int32)

In [31]:
arr = np.array([[1,2],[3,4]])

In [32]:
arr

array([[1, 2],
       [3, 4]])

In [35]:
arr1 = np.array([[3,4],[1,2]])

In [37]:
arr > arr1

array([[False, False],
       [ True,  True]])

## basic indexing and slicing

In [39]:
arr = np.arange(10)

In [40]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [41]:
arr[1:]

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [42]:
arr[:]

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [43]:
arr[:1]

array([0])

In [45]:
array = arr[1:2]

In [46]:
array

array([1])

In [47]:
array[0] = 10

In [48]:
arr

array([ 0, 10,  2,  3,  4,  5,  6,  7,  8,  9])

In [49]:
array = arr[1:3].copy()

In [50]:
array

array([10,  2])

In [51]:
array[0] = 1

In [52]:
array

array([1, 2])

In [53]:
arr

array([ 0, 10,  2,  3,  4,  5,  6,  7,  8,  9])

In [55]:
arr2d = np.array([[1,2,3],[4,5,6]])

In [56]:
arr2d

array([[1, 2, 3],
       [4, 5, 6]])

In [58]:
arr2d[1,:2]

array([4, 5])

In [59]:
arr2d[0,1:]

array([2, 3])

In [60]:
arr2d[0] = 0

In [61]:
arr2d

array([[0, 0, 0],
       [4, 5, 6]])

## bolean indexing

In [66]:
names = np.array(["x","y","z","x"])

In [67]:
names

array(['x', 'y', 'z', 'x'], dtype='<U1')

In [76]:
data = np.array([[1,2,3],[4,5,6],[7,8,9],[1,2,4]])

In [77]:
data

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9],
       [1, 2, 4]])

In [78]:
data[names == "x"]

array([[1, 2, 3],
       [1, 2, 4]])

In [79]:
data[names == "x"]

array([[1, 2, 3],
       [1, 2, 4]])

In [81]:
data[names == "x",0]

array([1, 1])

In [82]:
data[names == "x",1:]

array([[2, 3],
       [2, 4]])

In [84]:
mask = (names == "x") | (names == "y")

In [85]:
mask

array([ True,  True, False,  True])

In [86]:
data[mask]

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 4]])

## Fancy Indexing

In [91]:
arr = np.zeros([3,2])

In [92]:
arr

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [94]:
for i in range(3):
    arr[i] = i

In [95]:
arr

array([[0., 0.],
       [1., 1.],
       [2., 2.]])

In [97]:
arr[[1,2]]

array([[1., 1.],
       [2., 2.]])

In [99]:
arr[[0,1]]

array([[0., 0.],
       [1., 1.]])

In [100]:
arr = np.arange(32).reshape(8,4)

In [101]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [102]:
arr[0][1]

1

In [113]:
arr[[1,2,3],[1,2,3]]

array([ 5, 10, 15])

In [114]:
arr[[3],[2]]

array([14])

In [115]:
arr2 = arr[[1,2,3]]

In [116]:
arr2

array([[ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [117]:
arr2 = 0

In [118]:
arr2

0

In [119]:
arr2 = arr[[1,2,3]]

In [120]:
arr2

array([[ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [121]:
arr2[0] = 1


In [122]:
arr2

array([[ 1,  1,  1,  1],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [123]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [124]:
array[array>0] = 1

In [125]:
array

array([0, 1])

In [126]:
arr[arr>0]  = 1

In [127]:
arr

array([[0, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1]])

## transposing arrays and swapping Axes

In [130]:
arr = np.arange(15).reshape(3,5)

In [131]:
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [134]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [135]:
np.dot(arr,arr.T)

array([[ 30,  80, 130],
       [ 80, 255, 430],
       [130, 430, 730]])

In [136]:
arr @ arr.T

array([[ 30,  80, 130],
       [ 80, 255, 430],
       [130, 430, 730]])

In [137]:
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [138]:
arr.swapaxes(0,1)

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

# 4.2 Pseudorandom number generation

In [140]:
samples = np.random.standard_normal(size=(4,4))

In [141]:
samples

array([[ 0.53059693, -0.45576666, -0.4980529 ,  0.77535788],
       [ 0.57424752, -0.73176472,  0.10826043,  0.92124459],
       [ 1.72494891, -0.43682375, -0.51230832,  0.59478396],
       [-0.74759382,  0.80568415,  2.20163378, -0.48081304]])

In [142]:
rng = np.random.default_rng(seed = 12345)

In [143]:
rng

Generator(PCG64) at 0x7FB2F927B820

In [144]:
data = rng.standard_normal((2,3))

In [145]:
data

array([[-1.42382504,  1.26372846, -0.87066174],
       [-0.25917323, -0.07534331, -0.74088465]])

# 4.3 Universal Functions: Fast Element-Wise Array Functions

In [148]:
arr = np.arange(10)

In [149]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [150]:
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ])

In [151]:
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03])

In [152]:
x = rng.standard_normal(8)

In [153]:
y = rng.standard_normal(8)

In [154]:
x

array([-1.3677927 ,  0.6488928 ,  0.36105811, -1.95286306,  2.34740965,
        0.96849691, -0.75938718,  0.90219827])

In [155]:
y

array([-0.46695317, -0.06068952,  0.78884434, -1.25666813,  0.57585751,
        1.39897899,  1.32229806, -0.29969852])

In [156]:
np.maximum(x, y)

array([-0.46695317,  0.6488928 ,  0.78884434, -1.25666813,  2.34740965,
        1.39897899,  1.32229806,  0.90219827])

In [157]:
arr = rng.standard_normal(7) * 5

In [158]:
arr

array([ 4.51459671, -8.10791367, -0.7909463 ,  2.24741966, -6.71800536,
       -0.40843795,  8.62369966])

In [159]:
remainder, whole_part = np.modf(arr)

In [160]:
remainder

array([ 0.51459671, -0.10791367, -0.7909463 ,  0.24741966, -0.71800536,
       -0.40843795,  0.62369966])

In [161]:
whole_part

array([ 4., -8., -0.,  2., -6., -0.,  8.])

In [162]:
arr

array([ 4.51459671, -8.10791367, -0.7909463 ,  2.24741966, -6.71800536,
       -0.40843795,  8.62369966])

In [163]:
np.add(arr,1)

array([ 5.51459671, -7.10791367,  0.2090537 ,  3.24741966, -5.71800536,
        0.59156205,  9.62369966])

In [164]:
arr

array([ 4.51459671, -8.10791367, -0.7909463 ,  2.24741966, -6.71800536,
       -0.40843795,  8.62369966])

In [165]:
arr = np.add(arr,1)

In [166]:
arr

array([ 5.51459671, -7.10791367,  0.2090537 ,  3.24741966, -5.71800536,
        0.59156205,  9.62369966])

In [168]:
out = np.zeros_like(arr)

In [169]:
out

array([0., 0., 0., 0., 0., 0., 0.])

In [170]:
np.add(arr,1,out=out)

array([ 6.51459671, -6.10791367,  1.2090537 ,  4.24741966, -4.71800536,
        1.59156205, 10.62369966])

In [171]:
arr

array([ 5.51459671, -7.10791367,  0.2090537 ,  3.24741966, -5.71800536,
        0.59156205,  9.62369966])

In [172]:
out

array([ 6.51459671, -6.10791367,  1.2090537 ,  4.24741966, -4.71800536,
        1.59156205, 10.62369966])

# 4.4 Array_Oriented Programming with Arrays

In [175]:
points = np.arange(10)

In [176]:
points

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [177]:
xs, ys = np.meshgrid(points,points)

In [178]:
ys

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
       [3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
       [4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
       [5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
       [6, 6, 6, 6, 6, 6, 6, 6, 6, 6],
       [7, 7, 7, 7, 7, 7, 7, 7, 7, 7],
       [8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
       [9, 9, 9, 9, 9, 9, 9, 9, 9, 9]])

In [179]:
xs

array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])

In [180]:
z = np.sqrt(xs ** 2 + ys ** 2)

In [181]:
z

array([[ 0.        ,  1.        ,  2.        ,  3.        ,  4.        ,
         5.        ,  6.        ,  7.        ,  8.        ,  9.        ],
       [ 1.        ,  1.41421356,  2.23606798,  3.16227766,  4.12310563,
         5.09901951,  6.08276253,  7.07106781,  8.06225775,  9.05538514],
       [ 2.        ,  2.23606798,  2.82842712,  3.60555128,  4.47213595,
         5.38516481,  6.32455532,  7.28010989,  8.24621125,  9.21954446],
       [ 3.        ,  3.16227766,  3.60555128,  4.24264069,  5.        ,
         5.83095189,  6.70820393,  7.61577311,  8.54400375,  9.48683298],
       [ 4.        ,  4.12310563,  4.47213595,  5.        ,  5.65685425,
         6.40312424,  7.21110255,  8.06225775,  8.94427191,  9.8488578 ],
       [ 5.        ,  5.09901951,  5.38516481,  5.83095189,  6.40312424,
         7.07106781,  7.81024968,  8.60232527,  9.43398113, 10.29563014],
       [ 6.        ,  6.08276253,  6.32455532,  6.70820393,  7.21110255,
         7.81024968,  8.48528137,  9.21954446

## Expressing Conditional Logic as Array Operations

In [184]:
arr1 = np.array([1,2,3])

In [186]:
arr2 = np.array([4,5,6])

In [190]:
cond = np.array([True,False,True,])

In [188]:
zip(arr1,arr2,cond)

<zip at 0x7fb2d80781c0>

In [192]:
result = np.where(cond,arr1,arr2)

In [193]:
type(result)

numpy.ndarray

In [194]:
print(result)

[1 5 3]


In [195]:
result

array([1, 5, 3])

In [196]:
arr = rng.standard_normal((4,4))

In [197]:
np.where(arr > 0, 2, -2)

array([[ 2,  2,  2, -2],
       [-2, -2,  2,  2],
       [-2, -2,  2,  2],
       [-2,  2, -2, -2]])

## Mathematical and Statistical Methods

In [201]:
arr = rng.standard_normal((2,2))

In [202]:
arr

array([[-1.10821447,  0.13595685],
       [ 1.34707776,  0.06114402]])

In [203]:
arr.sum()

0.4359641687307288

In [204]:
arr.mean()

0.1089910421826822

In [205]:
np.mean(arr)

0.1089910421826822

In [206]:
arr.mean(axis=1)

array([-0.48612881,  0.70411089])

In [207]:
arr.sum(axis=0)

array([0.2388633 , 0.19710087])

In [208]:
arr = np.arange(5)

In [209]:
arr

array([0, 1, 2, 3, 4])

In [211]:
arr.cumsum()

array([ 0,  1,  3,  6, 10])

In [212]:
arr = np.array([[1,2],[3,4]])

In [213]:
arr

array([[1, 2],
       [3, 4]])

In [214]:
arr.cumsum(axis=0)

array([[1, 2],
       [4, 6]])

## Methods For Boolean Arrays

In [217]:
arr = rng.standard_normal(10)

In [218]:
arr

array([ 0.0709146 ,  0.43365454,  0.27748366,  0.53025239,  0.53672097,
        0.61835001, -0.79501746,  0.30003095, -1.60270159,  0.26679883])

In [219]:
(arr > 0).sum()

8

In [220]:
(arr < 0).sum()

2

In [221]:
arr.any()

True

In [222]:
arr.all()

True

In [223]:
arr = arr[arr>0]

In [224]:
arr

array([0.0709146 , 0.43365454, 0.27748366, 0.53025239, 0.53672097,
       0.61835001, 0.30003095, 0.26679883])

In [225]:
arr.all()

True

## Sorting 

In [228]:
arr = rng.standard_normal(6)

In [229]:
arr

array([-1.26162378, -0.07127081,  0.47404973, -0.41485376,  0.0977165 ,
       -1.64041784])

In [230]:
arr.sort()

In [231]:
arr

array([-1.64041784, -1.26162378, -0.41485376, -0.07127081,  0.0977165 ,
        0.47404973])

## Unique and other Set Logic

In [235]:

arr = np.array(["x","y","x"])

In [236]:
arr

array(['x', 'y', 'x'], dtype='<U1')

In [237]:
np.unique(arr)

array(['x', 'y'], dtype='<U1')

In [238]:
np.in1d(arr,"x")

array([ True, False,  True])

# 4.5 File Input and Output with Arrays

In [242]:
arr = np.arange(10)

In [249]:
np.save("/Users/yuxichen/online eduction/python_data_analysis/random_file/some",arr)

In [250]:
np.load("some.npy")

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [251]:
np.savez("/Users/yuxichen/online eduction/python_data_analysis/random_file/archive",a=arr, b=arr)

In [253]:
arch = np.load("/Users/yuxichen/online eduction/python_data_analysis/random_file/archive.npz")

In [254]:
arch["a"]

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [255]:
arr = arch["b"]

In [256]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [257]:
arr.dot(arr.T)

285

# 4.7 Example: Random Walks

In [260]:
nwalks = 5000

In [261]:
nsteps = 1000

In [271]:
draws = rng.integers(0,2, size=(nwalks,nsteps))

In [272]:
draws

array([[0, 0, 0, ..., 0, 1, 0],
       [1, 0, 1, ..., 1, 0, 1],
       [0, 1, 0, ..., 0, 1, 1],
       ...,
       [0, 1, 1, ..., 0, 1, 0],
       [0, 1, 1, ..., 0, 1, 1],
       [0, 1, 1, ..., 1, 0, 1]])

In [273]:
steps = np.where(draws > 0, 1, -1)

In [274]:
steps

array([[-1, -1, -1, ..., -1,  1, -1],
       [ 1, -1,  1, ...,  1, -1,  1],
       [-1,  1, -1, ..., -1,  1,  1],
       ...,
       [-1,  1,  1, ..., -1,  1, -1],
       [-1,  1,  1, ..., -1,  1,  1],
       [-1,  1,  1, ...,  1, -1,  1]])

In [276]:
walk = steps.cumsum(axis=1)

In [277]:
walk

array([[ -1,  -2,  -3, ..., -22, -21, -22],
       [  1,   0,   1, ..., -14, -15, -14],
       [ -1,   0,  -1, ..., -16, -15, -14],
       ...,
       [ -1,   0,   1, ..., -12, -11, -12],
       [ -1,   0,   1, ...,  12,  13,  14],
       [ -1,   0,   1, ...,  62,  61,  62]])

In [278]:
walk.max()

137

In [279]:
walk.min()

-126

In [282]:
hit30 = (np.abs(walk) >= 30).any(axis=1)

In [283]:
hit30

array([ True, False, False, ..., False, False,  True])

In [284]:
hit30.sum()

3332

In [286]:
(np.abs(walk) >= 30).argmax(axis=1).mean()

337.1012