### import numpy

In [1]:
import numpy as np

### check python and numpy version

In [2]:
import platform
print('Python version: ' + platform.python_version())
print('Numpy version: ' + np.__version__)

Python version: 3.6.6
Numpy version: 1.15.0


# 1. Create Arrays

### create an array from an iterable

In [3]:
arr = np.array(range(10))
print(arr)

arr = np.array([1,2,3,4,5])
print(arr)

[0 1 2 3 4 5 6 7 8 9]
[1 2 3 4 5]


### create an array in a specified data type

In [4]:
arr = np.array([[1,2,3], [4,5,6]], dtype='i2')
print(arr)

[[1 2 3]
 [4 5 6]]


### create an aray of evenly spaced values within a specified interval

In [5]:
# np.arange(start, stop, step)
arr = np.arange(0, 20, 2)  
print(arr)

[ 0  2  4  6  8 10 12 14 16 18]


### create an array of evenly spaced numbers in a specified interval

In [6]:
# np.linspace(start, stop, num_of_elements, endpoint=True, retstep=False) 
arr = np.linspace(0, 10, 20)
print(arr)

# exclude endpoint and return setp size
arr, step = np.linspace(0, 10, 20, endpoint=False, retstep=True)
print(arr)
print(step)

[ 0.          0.52631579  1.05263158  1.57894737  2.10526316  2.63157895
  3.15789474  3.68421053  4.21052632  4.73684211  5.26315789  5.78947368
  6.31578947  6.84210526  7.36842105  7.89473684  8.42105263  8.94736842
  9.47368421 10.        ]
[0.  0.5 1.  1.5 2.  2.5 3.  3.5 4.  4.5 5.  5.5 6.  6.5 7.  7.5 8.  8.5
 9.  9.5]
0.5


### create an array of random values in a given shape

In [7]:
arr = np.random.rand(3, 3)
print(arr)

[[0.38731151 0.03590239 0.81195451]
 [0.20791333 0.65271402 0.13483703]
 [0.31101729 0.29904341 0.84136213]]


### create an array of zeros in a given shape 

In [8]:
zeros = np.zeros((2,3), dtype='i4')
print(zeros)

[[0 0 0]
 [0 0 0]]


### create an array of zeros with the same shape and data type as a given array

In [9]:
zeros = np.zeros_like(arr)
print(zeros)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


### create an array of ones in a given shape 

In [10]:
ones = np.ones((2,3), dtype=np.int32)
print(ones)

[[1 1 1]
 [1 1 1]]


### create an array of ones with the same shape and data type as a given array

In [11]:
ones = np.ones_like(arr, dtype=np.int16)
print(ones)

[[1 1 1]
 [1 1 1]
 [1 1 1]]


### create an array of arbitrary values in a given shape 

In [12]:
empty = np.empty((2,3))
print(empty)

[[0. 0. 0.]
 [0. 0. 0.]]


### create an array of arbitrary values with the same shape and data type as a given array

In [13]:
empty = np.empty_like(arr)
print(empty)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


### create an array of constant values in a given shape  

In [14]:
p = np.full((2,3), 5, dtype=np.int32)
print(p)

[[5 5 5]
 [5 5 5]]


### create an array of constant values with the same shape and data type as a given array

In [15]:
p = np.full_like(arr, 5)
print(p)

[[5. 5. 5.]
 [5. 5. 5.]
 [5. 5. 5.]]


### create an array by repetition

In [16]:
# repeat each element of an array by a specified number of times
# np.repeat(iterable, reps, axis=None)
arr = [0, 1, 2]
print(np.repeat(arr, 3))    # or np.repeat(range(3), 3)

# repeat along a specified axis with specified number of repetitions
arr = [[1,2], [3,4]]
print(np.repeat(arr, [1,2], axis=0))

[0 0 0 1 1 1 2 2 2]
[[1 2]
 [3 4]
 [3 4]]


In [17]:
# repeat an array by a specified number of times
arr = [0, 1, 2]
print(np.tile(arr, 3))

# repeat along specified axes
print(np.tile(arr, (2,2)))

[0 1 2 0 1 2 0 1 2]
[[0 1 2 0 1 2]
 [0 1 2 0 1 2]]


### create an identity matrix with a given diagonal size

In [18]:
identity_matrix = np.eye(3)
print(identity_matrix)

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


In [19]:
identity_matrix = np.identity(3)
print(identity_matrix)

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


### create an identity matrix with a diagonal offset

In [20]:
identity_matrix = np.eye(5, k=1)    # positive number means upper offset by the number
print(identity_matrix)

[[0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0.]]


In [21]:
identity_matrix = np.eye(5, k=-2)   # negative number means lower offset by the number
print(identity_matrix)

[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]]


# 2. Inspect Arrays

In [22]:
arr = np.array([[1,2,3], [4,5,6]], dtype=np.int64)

### inspect general information of an array

In [23]:
print(np.info(arr))

class:  ndarray
shape:  (2, 3)
strides:  (24, 8)
itemsize:  8
aligned:  True
contiguous:  True
fortran:  False
data pointer: 0x2594a4b3480
byteorder:  little
byteswap:  False
type: int64
None


### inspect the data type of an array

In [24]:
print(arr.dtype)

int64


### inspect the dimension of an array

In [25]:
print(arr.shape)

(2, 3)


### inspect length of an array

In [26]:
print(len(arr))

2


### inspect the number of dimensions of an array

In [27]:
print(arr.ndim)

2


### inspect the number of elements in an array

In [28]:
print(arr.size)

6


### inspect the number of bytes of each element in an array

In [29]:
print(arr.itemsize)

8


# 3. Numpy Data Types

### A list of Numpy Data Types

In [30]:
import pandas as pd
dtypes = pd.DataFrame(
    {
        'Type': ['int8', 'uint8', 'int16', 'uint16', 'int32', 'uint32', 'int64', 'uint64', 'float16', 'float32', 'float64', 'float128', 'complex64', 'complex128', 'bool', 'object', 'string_', 'unicode_'],
        'Type Code': ['i1', 'u1', 'i2', 'u2', 'i4', 'u4', 'i8', 'u8', 'f2', 'f4 or f', 'f8 or d', 'f16 or g', 'c8', 'c16', '', 'O', 'S', 'U']
    }
)

dtypes

Unnamed: 0,Type,Type Code
0,int8,i1
1,uint8,u1
2,int16,i2
3,uint16,u2
4,int32,i4
5,uint32,u4
6,int64,i8
7,uint64,u8
8,float16,f2
9,float32,f4 or f


### convert data type

In [31]:
arr1 = arr.astype('i8')  
print(arr1.dtype)

int64


In [32]:
arr2 = arr.astype(np.float32)
print(arr2.dtype)

float32


### string data type

In [33]:
# set the max length of the string using S + length, such as 'S10'
# any string longer than the max length will be truncated
s = np.array(['abc', 'defg'], dtype='S10')

In [34]:
# 'np.string_' will set the length of the longest string in the array as the max length 
s = np.array([1.96,2.0,8], dtype=np.string_)
print(s.dtype)

|S4


# 4. Sampling Methods

### set seed

In [35]:
np.random.seed(123)

### set random state which is separate from the global seed

In [36]:
rs = np.random.RandomState(321)
rs.rand(10)

array([0.88594794, 0.07791236, 0.97964616, 0.24767146, 0.75288472,
       0.52667564, 0.90755375, 0.8840703 , 0.08926896, 0.5173446 ])

### generate a random sample from interval [0, 1) in a given shape

In [37]:
# generate a random scalar
print(np.random.rand())      

0.6964691855978616


In [38]:
# generate a 1-D array
print(np.random.rand(3,))           

[0.28613933 0.22685145 0.55131477]


In [39]:
# generate a 2-D array
print(np.random.rand(3,3))          

[[0.71946897 0.42310646 0.9807642 ]
 [0.68482974 0.4809319  0.39211752]
 [0.34317802 0.72904971 0.43857224]]


### generate a sample from the standard normal distribution (with mean equal to o and variance 1)

In [40]:
print(np.random.randn(3,3))

[[-0.14337247 -0.6191909  -0.76943347]
 [ 0.57674602  0.12652592 -1.30148897]
 [ 2.20742744  0.52274247  0.46564476]]


### generate an array of random integers in a given interval [low, high)

In [41]:
# np.ranodm.randint(low, high, size, dtype)
print(np.random.randint(1, 10, 3, 'i8'))

[5 7 2]


### generate an array of random floating-point numbers in the interval [0.0, 1.0)

In [42]:
# the following methods are the same as np.random.rand()
print(np.random.random_sample(10))
print(np.random.random(10))
print(np.random.ranf(10))
print(np.random.sample(10))

[0.65472131 0.37380143 0.23451288 0.98799529 0.76599595 0.77700444
 0.02798196 0.17390652 0.15408224 0.07708648]
[0.8898657  0.7503787  0.69340324 0.51176338 0.46426806 0.56843069
 0.30254945 0.49730879 0.68326291 0.91669867]
[0.10892895 0.49549179 0.23283593 0.43686066 0.75154299 0.48089213
 0.79772841 0.28270293 0.43341824 0.00975735]
[0.34079598 0.68927201 0.86936929 0.26780382 0.45674792 0.26828131
 0.8370528  0.27051466 0.53006201 0.17537266]


### # generate a random sample in a given 1-D array

In [43]:
# np.random.choice(iterable_or_int, size, replace=True, p=weights)
print(np.random.choice(range(3), 10, replace=True, p=[0.1, 0.8, 0.1]))

[1 1 1 1 1 1 1 2 2 1]


In [44]:
print(np.random.choice(3, 10))

[1 0 1 2 2 0 1 1 1 0]


In [45]:
print(np.random.choice([1,2,3], 10))

[2 2 1 3 2 3 1 2 1 3]


### shuffle the array in place

In [46]:
arr = np.array(range(10))
print(arr)

[0 1 2 3 4 5 6 7 8 9]


In [47]:
np.random.shuffle(arr)
print(arr)

[1 2 8 5 4 0 6 7 9 3]


### generate a permutation of an array

In [48]:
arr = np.array(range(10))
print('The initial array: ', arr)
print('A permutation of the array: ', np.random.permutation(arr))

The initial array:  [0 1 2 3 4 5 6 7 8 9]
A permutation of the array:  [3 6 2 4 5 9 1 8 0 7]


# 5. Mathematical Functions

In [49]:
arr = np.random.rand(5,5)

### element-wise addition, subtraction, multiplication and division

In [50]:
print(arr + 10)
print(arr - 10)
print(arr * 10)
print(arr / 10)

[[10.87350227 10.02942373 10.55204372 10.2402475  10.88480501]
 [10.46023842 10.19317033 10.2936925  10.81792751 10.55948738]
 [10.67792545 10.80912668 10.86857215 10.41799246 10.05893816]
 [10.478459   10.52115943 10.58063202 10.309199   10.91988263]
 [10.6553475  10.34924138 10.54109404 10.4490534  10.28232096]]
[[-9.12649773 -9.97057627 -9.44795628 -9.7597525  -9.11519499]
 [-9.53976158 -9.80682967 -9.7063075  -9.18207249 -9.44051262]
 [-9.32207455 -9.19087332 -9.13142785 -9.58200754 -9.94106184]
 [-9.521541   -9.47884057 -9.41936798 -9.690801   -9.08011737]
 [-9.3446525  -9.65075862 -9.45890596 -9.5509466  -9.71767904]]
[[8.73502266 0.29423734 5.52043722 2.40247504 8.84805011]
 [4.60238424 1.9317033  2.93692504 8.17927508 5.59487376]
 [6.77925447 8.09126678 8.68572154 4.17992456 0.58938156]
 [4.78459005 5.2115943  5.8063202  3.09198998 9.19882625]
 [6.55347503 3.49241385 5.41094045 4.49053396 2.82320961]]
[[0.08735023 0.00294237 0.05520437 0.02402475 0.0884805 ]
 [0.04602384 0.0193

### element-wise exponentiation

In [51]:
print(np.exp(arr))

[[2.39528511 1.02986089 1.73679893 1.27156383 2.42251198]
 [1.58445171 1.2130894  1.34137137 2.26579912 1.74977529]
 [1.96978706 2.2459457  2.38350515 1.51890922 1.06070964]
 [1.61358596 1.68397897 1.7871676  1.36233345 2.50899588]
 [1.92581163 1.41799143 1.71788528 1.56682832 1.32620431]]


### element-wise logorithm

In [52]:
# natural log
print(np.log(arr))      

[[-0.13524456 -3.52595367 -0.59412803 -1.42608562 -0.12238798]
 [-0.77601061 -1.64418294 -1.22522197 -0.20098157 -0.58073432]
 [-0.38871796 -0.21179979 -0.14090462 -0.87229189 -2.83126659]
 [-0.73718475 -0.65169928 -0.54363808 -1.1737702  -0.0835092 ]
 [-0.42258965 -1.05199195 -0.61416218 -0.80061348 -1.2647107 ]]


In [53]:
# base 2
print(np.log2(arr))     

[[-0.19511665 -5.08687587 -0.85714556 -2.05740665 -0.17656854]
 [-1.11954666 -2.37205457 -1.76762165 -0.28995511 -0.83782252]
 [-0.56080147 -0.3055625  -0.20328239 -1.25845119 -4.08465428]
 [-1.06353278 -0.94020332 -0.78430396 -1.69339245 -0.12047831]
 [-0.60966799 -1.51770357 -0.88604873 -1.15504109 -1.82459185]]


In [54]:
# base 10
print(np.log10(arr))    

[[-0.05873596 -1.53130222 -0.25802652 -0.61934112 -0.05315243]
 [-0.33701713 -0.71405958 -0.53210714 -0.08728519 -0.25220971]
 [-0.16881806 -0.09198348 -0.0611941  -0.37883156 -1.22960346]
 [-0.32015527 -0.2830294  -0.23609902 -0.50976192 -0.03626758]
 [-0.18352835 -0.4568743  -0.26672725 -0.34770202 -0.54925688]]


### element-wise square root

In [55]:
print(np.sqrt(arr))

[[0.93461343 0.17153348 0.74299645 0.49015049 0.94064075]
 [0.67840874 0.43951147 0.54193404 0.90439345 0.74798889]
 [0.82336228 0.89951469 0.93197219 0.64652336 0.24277182]
 [0.69170731 0.72191373 0.76199214 0.55605665 0.95910512]
 [0.80953536 0.59096648 0.73559095 0.67011446 0.53133884]]


### element-wise sine and cosine

In [56]:
print(np.sin(arr))
print(np.cos(arr))

[[0.7665826  0.02941949 0.52442846 0.23794303 0.77379149]
 [0.44416174 0.19197122 0.28948859 0.72973036 0.5307518 ]
 [0.62717856 0.72368474 0.76340745 0.40592657 0.05890404]
 [0.46041177 0.49788598 0.54855249 0.30429572 0.79553051]
 [0.60943479 0.34218509 0.51507406 0.43411297 0.27858547]]
[[0.64214571 0.99956715 0.85145452 0.97127911 0.63344039]
 [0.89594662 0.98140056 0.95718147 0.68373503 0.8475273 ]
 [0.77887551 0.69013071 0.64591723 0.9139057  0.99826365]
 [0.88770547 0.8672425  0.83611612 0.95257762 0.60591354]
 [0.7928362  0.93963257 0.85714568 0.90085844 0.96041144]]


### sum along a specified axis

In [57]:
# sum along the row
print(np.sum(arr, axis=0))    

[3.14547265 1.90212156 2.83603444 2.23441986 2.70543413]


In [58]:
# sum along the column
print(np.sum(arr, axis=1))    

[2.58002224 2.32451614 2.83255489 2.80933208 2.27705729]


### compute the min and max along a specified axis

In [59]:
# calculate min along the row
print(np.min(arr, axis=0))

[0.46023842 0.02942373 0.2936925  0.2402475  0.05893816]


In [60]:
# calculate max along the column
print(np.max(arr, axis=1))    

[0.88480501 0.81792751 0.86857215 0.91988263 0.6553475 ]


In [61]:
# if axis not specified, calculate the max/min value of all elements
print(np.max(arr))
print(np.min(arr))

0.919882625254582
0.029423733551983133


### compute the indices of the min and max along a specified axis

In [62]:
# along the row
print(np.argmin(arr, axis=0))
print(np.argmax(arr, axis=0))

[1 0 1 0 2]
[0 2 2 1 3]


In [63]:
# along the column
print(np.argmin(arr, axis=1))
print(np.argmax(arr, axis=1))

[1 1 4 3 4]
[4 3 2 4 0]


In [64]:
# if axis not specified, return the index of the flattened array
print(np.argmin(arr))
print(np.argmax(arr))

1
19


### compute element-wise min and max of two arrays

In [65]:
arr1 = np.array([1, 3, 5, 7, 9])
arr2 = np.array([0, 4, 3, 8, 7])
print(np.maximum(arr1, arr2))
print(np.minimum(arr1, arr2))

[1 4 5 8 9]
[0 3 3 7 7]


### split fractional and integral parts of a floating-point array

In [66]:
arr1 = np.random.rand(10) * 10
re, intg = np.modf(arr1)
print('fractional: ', re)
print('integral: ', intg)

fractional:  [0.95876525 0.63480983 0.15191279 0.17618677 0.52041829 0.32778859
 0.7311902  0.67888445 0.87478322 0.4941493 ]
integral:  [2. 5. 7. 5. 3. 6. 8. 1. 9. 3.]


### compute the mean

In [67]:
# compute the overall mean
print(np.mean(arr))

0.5129393054604379


In [68]:
# compute the mean along the row
print(np.mean(arr, axis=0))   

[0.62909453 0.38042431 0.56720689 0.44688397 0.54108683]


In [69]:
# compute the mean along the column
print(np.mean(arr, axis=1)) 

[0.51600445 0.46490323 0.56651098 0.56186642 0.45541146]


### compute the median

In [70]:
# compute the overall median
print(np.median(arr))

0.52115942968839


In [71]:
# compute the median along the row
print(np.median(arr, axis=0)) 

[0.6553475  0.34924138 0.55204372 0.41799246 0.55948738]


In [72]:
# compute the median along the column
print(np.median(arr, axis=1))

[0.55204372 0.46023842 0.67792545 0.52115943 0.4490534 ]


### compute the percentile

In [73]:
arr1 = np.random.rand(100)
# compute 5, 65, and 95 percentiles of the array
print(np.percentile(arr1, [5, 65, 95]))

[0.05596961 0.58954837 0.92058256]


### compute the standard deviation & variance

In [74]:
# compute the overall standard deviation
print(np.std(arr))

0.25305273176784365


In [75]:
# compute the standard deviation along the row
print(np.std(arr, axis=0))

[0.15100481 0.26938074 0.18260514 0.20008424 0.33508033]


In [76]:
# compute the standard deviation along the column
print(np.std(arr, axis=1))

[0.33997649 0.21759869 0.29739013 0.20049061 0.13310804]


In [77]:
# compute the overall variance
print(np.var(arr))

0.06403568505516823


In [78]:
# compute the variance along the row
print(np.var(arr, axis=0))

[0.02280245 0.07256599 0.03334464 0.0400337  0.11227883]


In [79]:
# compute the variance along the column
print(np.var(arr, axis=1))

[0.11558401 0.04734919 0.08844089 0.04019649 0.01771775]


### compute the covariance & correlation

In [80]:
arr = np.random.rand(5,8)

In [81]:
print(np.cov(arr))

[[ 0.05402285  0.03916784  0.03506723  0.0367739  -0.02741259]
 [ 0.03916784  0.03992086  0.01853696  0.04299328 -0.02678781]
 [ 0.03506723  0.01853696  0.06217596  0.01094847  0.00254219]
 [ 0.0367739   0.04299328  0.01094847  0.11372942 -0.02614117]
 [-0.02741259 -0.02678781  0.00254219 -0.02614117  0.06564368]]


In [82]:
print(np.corrcoef(arr[:,0], arr[:,1]))

[[ 1.         -0.54127501]
 [-0.54127501  1.        ]]


### compute cumulative sum & product

In [83]:
# calculate the cumulative sums along the row
print(np.cumsum(arr, axis=0))    

[[0.40939041 0.37251939 0.33997017 0.81257008 0.52820553 0.55382711
  0.11720684 0.78460482]
 [0.8439766  0.44974212 0.5217817  1.33358096 1.0075397  0.8673696
  0.21687605 1.38115748]
 [1.09214649 1.05408228 0.82689535 2.2308859  1.58955502 1.57986202
  0.90049152 2.32777838]
 [2.00537774 1.10533719 1.20721083 2.79474244 1.72212877 2.15499731
  1.22305927 3.31170594]
 [2.58104356 1.73506483 1.60822311 3.5393725  2.12331469 2.92047243
  2.20549051 3.47982271]]


In [84]:
# calculate the cumulative sums along the column
print(np.cumsum(arr, axis=1))    

[[0.40939041 0.7819098  1.12187997 1.93445006 2.46265559 3.01648269
  3.13368953 3.91829435]
 [0.43458619 0.51180891 0.69362044 1.21463132 1.69396549 2.00750798
  2.10717719 2.70372985]
 [0.24816989 0.85251005 1.1576237  2.05492864 2.63694396 3.34943639
  4.03305187 4.97967276]
 [0.91323126 0.96448617 1.34480165 1.90865819 2.04123193 2.61636722
  2.93893496 3.92286253]
 [0.57566582 1.20539345 1.60640573 2.35103579 2.75222172 3.51769684
  4.50012808 4.66824484]]


In [85]:
# calculate the cumulative product along the row
print(np.cumprod(arr, axis=0))   

[[4.09390413e-01 3.72519391e-01 3.39970171e-01 8.12570084e-01
  5.28205526e-01 5.53827107e-01 1.17206838e-01 7.84604818e-01]
 [1.77915418e-01 2.87669636e-02 6.18104970e-02 4.23357851e-01
  2.53186958e-01 1.73648331e-01 1.16819128e-02 4.68058094e-01]
 [4.41532491e-02 1.73850315e-02 1.88592261e-02 3.79881090e-01
  1.47358690e-01 1.23723121e-01 7.98593639e-03 4.43073573e-01]
 [4.03221272e-02 8.91068216e-04 7.17245569e-03 2.14198438e-01
  1.95358928e-02 7.11575323e-02 2.57600551e-03 4.35952301e-01]
 [2.32120703e-02 5.61130279e-04 2.87624278e-03 1.59498596e-01
  7.83752521e-03 5.44693209e-02 2.53074828e-03 7.32908915e-02]]


In [86]:
# calculate the cumulative product along the column
print(np.cumprod(arr, axis=1))  

[[4.09390413e-01 1.52505867e-01 5.18474458e-02 4.21296834e-02
  2.22531316e-02 1.23243875e-02 1.44450249e-03 1.13336361e-03]
 [4.34586185e-01 3.35599307e-02 6.10158235e-03 3.17899076e-03
  1.52379890e-03 4.77775703e-04 4.76195265e-05 2.84075554e-05]
 [2.48169887e-01 1.49979030e-01 4.57606489e-02 4.10612562e-02
  2.38982805e-02 1.70273439e-02 1.16401558e-02 1.10188147e-02]
 [9.13231256e-01 4.68075854e-02 1.78016494e-02 1.00375765e-02
  1.33071906e-03 7.65343488e-04 2.46875125e-04 2.42907240e-04]
 [5.75665818e-01 3.62512674e-01 1.45372033e-01 1.08248386e-01
  4.34277288e-02 3.32428461e-02 3.26588104e-02 5.49049362e-03]]


### element-wise comparison

In [87]:
arr1 = np.array([1,2,3,4,5])
arr2 = np.array([5,4,3,2,1])

In [88]:
# return an array of bools
print(arr1 == arr2)    
print(arr1 < 3)

[False False  True False False]
[ True  True False False False]


# 6. Slicing & Indexing

In [89]:
arr = np.array(range(100)).reshape((10,10))

### select an element by row and column indices

In [90]:
print(arr[5][5])
# or 
print(arr[5,5])

55
55


### indexing with slicing

In [91]:
print(arr[1:3, 4:6])

[[14 15]
 [24 25]]


In [92]:
# ellipsis slicing: auto-complete the dimensions
arr = np.array(range(16)).reshape(2,2,2,2)
# equivalent to arr[0,:,:,:]
print(arr[0, ...])    

[[[0 1]
  [2 3]]

 [[4 5]
  [6 7]]]


### assign a scalar to a slice by broadcasting

In [93]:
arr[1:3,:] = 100    # or simply arr[1:3]
arr[:,8:] = 100
print(arr)

[[[[  0   1]
   [  2   3]]

  [[  4   5]
   [  6   7]]]


 [[[100 100]
   [100 100]]

  [[100 100]
   [100 100]]]]


### boolean indexing

In [94]:
arr1 = np.arange(25).reshape((5,5))
bools = np.array([True, True, False, True, False])
print(arr1[bools])

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [15 16 17 18 19]]


In [95]:
# negate the condition
print(arr1[~bools])    

[[10 11 12 13 14]
 [20 21 22 23 24]]


In [96]:
arr2 = np.array([1,2,3,4,5])
# multiple conditions
print(arr1[(arr2<2) | (arr2>4)])    

[[ 0  1  2  3  4]
 [20 21 22 23 24]]


### fancy indexing

In [97]:
arr = np.random.rand(10,10)

In [98]:
# select arr[3,3], arr[1,2], arr[2,1]
print(arr[[3,1,2], [3,2,1]])       

[0.71525681 0.76243827 0.63775096]


In [99]:
# select rows 3,1,2 and columns 6,4,8 
print(arr[[3,1,2]][:, [6,4,8]])    

[[0.28141752 0.62828149 0.07751835]
 [0.67523336 0.35299237 0.45959692]
 [0.53945097 0.83891327 0.56634674]]


### dimension inference

In [100]:
# dimension inference using any negative number (usually -1)
arr = np.array(range(16)).reshape((4,-1))
print(arr.shape)

(4, 4)


# 7. Sort Arrays

In [101]:
arr = np.random.rand(5,5)

### sort an array along a specified axis

In [102]:
# sort along the row and return a copy
print(np.sort(arr, axis=0))   

[[0.11702945 0.17913374 0.06753657 0.02945699 0.05396756]
 [0.13630925 0.18249983 0.41165849 0.12910116 0.31981183]
 [0.36056504 0.51866089 0.42053876 0.56232065 0.37163583]
 [0.44498496 0.62847058 0.42945962 0.70682921 0.43148605]
 [0.7740198  0.74238635 0.55147061 0.86890785 0.75572196]]


In [103]:
# sort along the row in place
arr.sort(axis=0)
print(arr)

[[0.11702945 0.17913374 0.06753657 0.02945699 0.05396756]
 [0.13630925 0.18249983 0.41165849 0.12910116 0.31981183]
 [0.36056504 0.51866089 0.42053876 0.56232065 0.37163583]
 [0.44498496 0.62847058 0.42945962 0.70682921 0.43148605]
 [0.7740198  0.74238635 0.55147061 0.86890785 0.75572196]]


In [104]:
# sort along the column and return a copy
print(np.sort(arr, axis=1))    

[[0.02945699 0.05396756 0.06753657 0.11702945 0.17913374]
 [0.12910116 0.13630925 0.18249983 0.31981183 0.41165849]
 [0.36056504 0.37163583 0.42053876 0.51866089 0.56232065]
 [0.42945962 0.43148605 0.44498496 0.62847058 0.70682921]
 [0.55147061 0.74238635 0.75572196 0.7740198  0.86890785]]


In [105]:
# sort along the column in place
arr.sort(axis=1)    
print(arr)

[[0.02945699 0.05396756 0.06753657 0.11702945 0.17913374]
 [0.12910116 0.13630925 0.18249983 0.31981183 0.41165849]
 [0.36056504 0.37163583 0.42053876 0.51866089 0.56232065]
 [0.42945962 0.43148605 0.44498496 0.62847058 0.70682921]
 [0.55147061 0.74238635 0.75572196 0.7740198  0.86890785]]


### compute the indices that would sort an array along a specified axis

In [106]:
# along the row
print(np.argsort(arr, axis=0))

[[0 0 0 0 0]
 [1 1 1 1 1]
 [2 2 2 2 2]
 [3 3 3 3 3]
 [4 4 4 4 4]]


In [107]:
# along the column
print(np.argsort(arr, axis=1))

[[0 1 2 3 4]
 [0 1 2 3 4]
 [0 1 2 3 4]
 [0 1 2 3 4]
 [0 1 2 3 4]]


In [108]:
# if axis=None, return the indices of a flattened array
print(np.argsort(arr, axis=None))

[ 0  1  2  3  5  6  4  7  8 10 11  9 12 15 16 17 13 20 14 18 19 21 22 23
 24]


# 8. Copy Arrays

In [109]:
arr = np.array([1,2,3])

In [110]:
# the following methods are all deep copy
arr1 = np.copy(arr)
# or 
arr1 = arr.copy()
# or 
arr1 = np.array(arr, copy=True)

# 9. Manipulate Arrays

In [111]:
arr = np.random.rand(3,4)

### transpose an array

In [112]:
# the following methods return a copy
print(arr.T)
# or 
print(np.transpose(arr))
# or
print(arr.transpose())

[[0.98050237 0.47783535 0.42333087]
 [0.15655755 0.06774467 0.58119017]
 [0.2042632  0.52412791 0.75228461]
 [0.4837251  0.82371056 0.39554438]]
[[0.98050237 0.47783535 0.42333087]
 [0.15655755 0.06774467 0.58119017]
 [0.2042632  0.52412791 0.75228461]
 [0.4837251  0.82371056 0.39554438]]
[[0.98050237 0.47783535 0.42333087]
 [0.15655755 0.06774467 0.58119017]
 [0.2042632  0.52412791 0.75228461]
 [0.4837251  0.82371056 0.39554438]]


### transpose of a high dimensional array with specified order of axes

In [113]:
arr1 = np.arange(16).reshape((2,2,4))
print(arr1)

arr1.transpose((1,0,2))
print(arr1)

[[[ 0  1  2  3]
  [ 4  5  6  7]]

 [[ 8  9 10 11]
  [12 13 14 15]]]
[[[ 0  1  2  3]
  [ 4  5  6  7]]

 [[ 8  9 10 11]
  [12 13 14 15]]]


### swap axes

In [114]:
arr1 = np.arange(16).reshape((2,2,4))
print(arr1.swapaxes(1,2))

[[[ 0  4]
  [ 1  5]
  [ 2  6]
  [ 3  7]]

 [[ 8 12]
  [ 9 13]
  [10 14]
  [11 15]]]


### change the shape of an array

In [115]:
# change the shape of an array and return a copy
arr.reshape((2,6))

array([[0.98050237, 0.15655755, 0.2042632 , 0.4837251 , 0.47783535,
        0.06774467],
       [0.52412791, 0.82371056, 0.42333087, 0.58119017, 0.75228461,
        0.39554438]])

In [116]:
# change the shape of an array in place
arr.resize((2,6))

### flatten an array

In [117]:
# return a copy
arr.flatten()    

array([0.98050237, 0.15655755, 0.2042632 , 0.4837251 , 0.47783535,
       0.06774467, 0.52412791, 0.82371056, 0.42333087, 0.58119017,
       0.75228461, 0.39554438])

In [118]:
# return a view
# change any element in the view will also change the initial array
arr.ravel()      

array([0.98050237, 0.15655755, 0.2042632 , 0.4837251 , 0.47783535,
       0.06774467, 0.52412791, 0.82371056, 0.42333087, 0.58119017,
       0.75228461, 0.39554438])

### append elements to an array

In [119]:
arr = np.array([1,2,3])

In [120]:
# append a scalar and return a copy
arr1 = np.append(arr, 4)    
print(arr1)

[1 2 3 4]


In [121]:
# append an array and return a copy
arr2 = np.append(arr, [4,5,6])    
print(arr2)

[1 2 3 4 5 6]


### insert elements into an array

In [122]:
# np.insert(array, position, element)

# insert a scalar at a certain position
arr3 = np.insert(arr, 0, 100)    
print(arr3)

[100   1   2   3]


In [123]:
# insert multiple values at a certain position
arr3 = np.insert(arr, 0, [1,2,3])    
print(arr3)

[1 2 3 1 2 3]


### delete elements from an array

In [124]:
# remove the element at position 0
arr4 = np.delete(arr, 0)    
print(arr4)

[2 3]


In [125]:
# remove the element at multiple positions
arr4 = np.delete(arr, [0,2])    
print(arr4)

[2]


# 10. Combine & Split Arrays

In [126]:
arr1 = np.array([[1,2,3,4], [1,2,3,4]])
arr2 = np.array([[5,6,7,8], [5,6,7,8]])

### np.concatenate((a, b), axis=0)

In [127]:
# concat along the row
cat = np.concatenate((arr1, arr2), axis=0)        
print(cat)

[[1 2 3 4]
 [1 2 3 4]
 [5 6 7 8]
 [5 6 7 8]]


In [128]:
# concat along the column
cat = np.concatenate((arr1, arr2), axis=1)    
print(cat)

[[1 2 3 4 5 6 7 8]
 [1 2 3 4 5 6 7 8]]


### np.vstack((a, b)) 
### np.r_[a, b]

In [129]:
# stack arrays vertically
cat = np.vstack((arr1, arr2))
print(cat)

[[1 2 3 4]
 [1 2 3 4]
 [5 6 7 8]
 [5 6 7 8]]


In [130]:
# stack arrays vertically
cat = np.r_[arr1, arr2]
print(cat)

[[1 2 3 4]
 [1 2 3 4]
 [5 6 7 8]
 [5 6 7 8]]


### np.hstack((a, b))
### np.c_[a, b]

In [131]:
# stack arrays horizontally
cat = np.hstack((arr1, arr2))
print(cat)

[[1 2 3 4 5 6 7 8]
 [1 2 3 4 5 6 7 8]]


In [132]:
# stack arrays horizontally
cat = np.c_[arr1, arr2]
print(cat)

[[1 2 3 4 5 6 7 8]
 [1 2 3 4 5 6 7 8]]


### split the array 

In [133]:
arr = np.random.rand(6,6)

In [134]:
# split the array vertically into n evenly spaced chunks
arr1 = np.vsplit(arr, 2)
print(arr1)

[array([[0.46583576, 0.79078103, 0.80486584, 0.16677372, 0.77011669,
        0.4693528 ],
       [0.92542108, 0.33079165, 0.30494205, 0.4842225 , 0.92490669,
        0.14508927],
       [0.68015706, 0.80614361, 0.86728799, 0.08910079, 0.67553447,
        0.61240133]]), array([[0.65205696, 0.05760322, 0.38582245, 0.73322903, 0.13585084,
        0.56708195],
       [0.02315444, 0.0174567 , 0.47275939, 0.84410265, 0.88709809,
        0.30063103],
       [0.938766  , 0.58832418, 0.09871477, 0.09634253, 0.56561105,
        0.75673868]])]


In [135]:
# split the array horizontally into n evenly spaced chunks
arr2 = np.hsplit(arr, 2)
print(arr2)

[array([[0.46583576, 0.79078103, 0.80486584],
       [0.92542108, 0.33079165, 0.30494205],
       [0.68015706, 0.80614361, 0.86728799],
       [0.65205696, 0.05760322, 0.38582245],
       [0.02315444, 0.0174567 , 0.47275939],
       [0.938766  , 0.58832418, 0.09871477]]), array([[0.16677372, 0.77011669, 0.4693528 ],
       [0.4842225 , 0.92490669, 0.14508927],
       [0.08910079, 0.67553447, 0.61240133],
       [0.73322903, 0.13585084, 0.56708195],
       [0.84410265, 0.88709809, 0.30063103],
       [0.09634253, 0.56561105, 0.75673868]])]


# 11. Set Operations

### select the unique elements from an array

In [136]:
arr = np.array([1,1,2,2,3,3,4,5,6])
print(np.unique(arr))

[1 2 3 4 5 6]


In [137]:
# return the number of times each unique item appears
arr = np.array([1,1,2,2,3,3,4,5,6])
uniques, counts = np.unique(arr, return_counts=True)
print(uniques)
print(counts)

[1 2 3 4 5 6]
[2 2 2 1 1 1]


### compute the intersection & union of two arrays

In [138]:
arr1 = np.array([1,2,3,4,5])
arr2 = np.array([3,4,5,6,7])

In [139]:
# intersection
print(np.intersect1d(arr1, arr2))

[3 4 5]


In [140]:
# union
print(np.union1d(arr1, arr2))

[1 2 3 4 5 6 7]


### compute whether each element of an array is contained in another

In [141]:
print(np.in1d(arr1, arr2))

[False False  True  True  True]


### compute elements in an array that are not in another

In [142]:
print(np.setdiff1d(arr1, arr2))

[1 2]


### compute elements in either of two arrays, but not both

In [143]:
print(np.setxor1d(arr1, arr2))

[1 2 6 7]


# 12. Linear Algebra

In [144]:
arr1 = np.random.rand(5,5)
arr2 = np.random.rand(5,5)

### matrix multiplication

In [145]:
print(arr1.dot(arr2))
# or
print(np.dot(arr1, arr2))
# or
print(arr1 @ arr2)

[[1.3581868  1.46441563 1.19209611 0.89773731 1.60179497]
 [0.56358014 0.83568932 1.18301454 0.75605881 0.52659165]
 [0.92256749 1.27395063 1.23998614 0.69739933 0.92671882]
 [1.03267022 1.21261791 1.32158279 1.0658026  0.97818617]
 [1.50364141 1.29867193 1.37876804 1.45507017 1.61602468]]
[[1.3581868  1.46441563 1.19209611 0.89773731 1.60179497]
 [0.56358014 0.83568932 1.18301454 0.75605881 0.52659165]
 [0.92256749 1.27395063 1.23998614 0.69739933 0.92671882]
 [1.03267022 1.21261791 1.32158279 1.0658026  0.97818617]
 [1.50364141 1.29867193 1.37876804 1.45507017 1.61602468]]
[[1.3581868  1.46441563 1.19209611 0.89773731 1.60179497]
 [0.56358014 0.83568932 1.18301454 0.75605881 0.52659165]
 [0.92256749 1.27395063 1.23998614 0.69739933 0.92671882]
 [1.03267022 1.21261791 1.32158279 1.0658026  0.97818617]
 [1.50364141 1.29867193 1.37876804 1.45507017 1.61602468]]


### QR factorization 

In [146]:
arr = np.random.rand(5,5)

q, r = np.linalg.qr(arr)
print(q)
print(r)

[[-0.25287668  0.30545383 -0.55272407 -0.29581988 -0.67062512]
 [-0.48099961  0.00141258 -0.28186215  0.82874225  0.04875842]
 [-0.50781203  0.07251941 -0.30689915 -0.43871175  0.67097898]
 [-0.63735568 -0.44515031  0.51750961 -0.17480425 -0.31184325]
 [-0.20148804  0.83861678  0.50303733  0.05150099  0.02062923]]
[[-1.53849172 -1.66087161 -1.49768089 -1.27228594 -1.09470342]
 [ 0.          0.48899031  0.00748865  0.25548478 -0.08787503]
 [ 0.          0.         -0.33168525  0.12486023  0.15008394]
 [ 0.          0.          0.          0.49344399  0.35456689]
 [ 0.          0.          0.          0.         -0.06545384]]


### singular value decomposition (SVD)

In [147]:
arr = np.random.rand(5,5)

u, s, v = np.linalg.svd(arr)
print(u)
print(s)
print(v)

[[-0.40123652  0.54157832  0.23882432  0.18481774 -0.67417173]
 [-0.53100041  0.14296127  0.5042882  -0.45694397  0.48424815]
 [-0.45329735  0.0587894  -0.24175051  0.73793897  0.43366818]
 [-0.1369932  -0.73362897  0.55127679  0.29157957 -0.23258687]
 [-0.57689011 -0.38024641 -0.57123284 -0.35703227 -0.26235752]]
[3.24071282 0.68778979 0.61047035 0.33995851 0.08280762]
[[-0.52203044 -0.52538728 -0.29500579 -0.35381414 -0.48912122]
 [ 0.35320653  0.32257134 -0.39710067  0.31955863 -0.71511273]
 [-0.10946927 -0.27475568 -0.66725135  0.53142119  0.4299918 ]
 [ 0.11025276  0.41503989 -0.55581819 -0.66530984  0.2530122 ]
 [ 0.76065115 -0.61005503 -0.03353243 -0.21829377  0.0215893 ]]


### compute eigen values

In [148]:
arr = np.random.rand(5,5)
print(np.linalg.eigvals(arr))

[ 2.48252317 -0.54820284 -0.15449115  0.23474108  0.49072808]


### eigen value decomposition

In [149]:
arr = np.random.rand(5,5)

w, v = np.linalg.eig(arr)
print(w)    # eigen values
print(v)    # eigen vectors

[ 2.85678022 -0.67311818  0.37430308  0.1524331  -0.07643027]
[[-0.51659662 -0.34584088 -0.20903914 -0.63480457  0.40895699]
 [-0.30321413 -0.47803637 -0.16018686  0.44600815 -0.14974489]
 [-0.18776892  0.80515254 -0.54781893  0.5915851  -0.75333954]
 [-0.59588912  0.05627917  0.74907085 -0.16711394 -0.09776297]
 [-0.50084739 -0.0209132   0.2634959  -0.14212629  0.48296231]]


### compute the trace & determinant

In [150]:
# notice this is not a function in linalg!!!
print(np.trace(arr))    

2.633967955576982


In [151]:
print(np.linalg.det(arr))

0.008385639582796341


### calculate the inverse/psedo-inverse of a matrix

In [152]:
arr = np.random.rand(5,5)

In [153]:
# compute the inverse of a matrix
print(np.linalg.inv(arr))

[[ 0.84155331  1.63190796 -3.65455408  0.95608408  0.83765065]
 [-3.18926553 -0.95721688 -0.29098143  1.34658583  1.97491048]
 [ 2.54845699 -0.70737531  2.88513149 -1.6584853  -1.60508904]
 [ 0.71353305 -0.71433099  4.67989952 -2.88642909 -1.12130872]
 [ 0.09916897  1.71046651 -3.37899161  2.25991998 -0.1927277 ]]


In [154]:
# compute the psudo-inverse of a matrix
print(np.linalg.pinv(arr))

[[ 0.84155331  1.63190796 -3.65455408  0.95608408  0.83765065]
 [-3.18926553 -0.95721688 -0.29098143  1.34658583  1.97491048]
 [ 2.54845699 -0.70737531  2.88513149 -1.6584853  -1.60508904]
 [ 0.71353305 -0.71433099  4.67989952 -2.88642909 -1.12130872]
 [ 0.09916897  1.71046651 -3.37899161  2.25991998 -0.1927277 ]]


### solve a linear system

In [155]:
# solve a linear system in closed form
y = [1,2,3,4,5]
print(np.linalg.solve(arr, y))

[ 1.15429656  9.28425211 -4.87028554 -3.8276903   1.45916855]


In [156]:
# calculate the least-squares solution of a linear system
y = [1,2,3,4,5]
solution, residuals, rank, singular = np.linalg.lstsq(arr, y)
print(solution)
print(residuals)
print(rank)
print(singular)

[ 1.15429656  9.28425211 -4.87028554 -3.8276903   1.45916855]
[]
5
[2.85050906 1.01863125 0.77007681 0.21087503 0.11001589]


  This is separate from the ipykernel package so we can avoid doing imports until
