# Advanced NumPy

In [1]:
import numpy as np

## Non-NumPy mathematical operation

In [2]:
2 ** 1000

10715086071862673209484250490600018105614048117055336074437503883703510511249361224931983788156958581275946729175531468251871452856923140435984577574698574803934567774824230985421074605062371141877954182153046474983581941267398767559165543946077062914571196477686542167660429831652624386837205668069376

## Vs. NumPy operation

In [3]:
np.int64(2) ** 1000

0

# 1) Array: the base NumPy Structure

In [4]:
arr = np.array([1,2,3])
arr

array([1, 2, 3])

In [5]:
len(arr)

3

In [6]:
arr[1]

2

In [7]:
type(arr[1])

numpy.int64

In [8]:
arr.dtype

dtype('int64')

In [9]:
arr32 = np.array([1,2,3], dtype=np.int32)
arr32

array([1, 2, 3], dtype=int32)

In [10]:
arr * arr

array([1, 4, 9])

### Multiply 1M random numbers X 1M random numbers

In [11]:
v1 = np.random.rand(1000000)
v2 = np.random.rand(1000000)

In [12]:
%time v1 * v2

CPU times: user 13.8 ms, sys: 3.76 ms, total: 17.6 ms
Wall time: 34.2 ms


array([0.00573748, 0.58262433, 0.89184807, ..., 0.01857637, 0.46911229,
       0.12909404])

### Dot Product

In [13]:
# In this case: (1x1 + 2x2 +3x3)
np.dot(arr, arr)

14

In [14]:
arr @ arr

14

## Matrix manipulation

In [15]:
mat = np.array([[1,2,3], [4,5,6], [7,8,9]])
mat

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [16]:
v = np.arange(12)
v

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [17]:
v.reshape((4, 3))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [18]:
mat = np.arange(12).reshape((4, 3))
mat

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [19]:
mat.shape

(4, 3)

In [20]:
mat2 = mat.reshape((3, 4))
mat2

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [21]:
# replace values in matrix
mat[1,2] = 17

In [22]:
mat2

array([[ 0,  1,  2,  3],
       [ 4, 17,  6,  7],
       [ 8,  9, 10, 11]])

# 2) Slicing

In [23]:
nums = [1,2,3,4,5]
nums[2:4]

[3, 4]

In [24]:
v = np.arange(1, 6)
v[2:4]

array([3, 4])

In [25]:
arr = np.arange(12).reshape((3, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [26]:
# slice out 1st row
arr[0]

array([0, 1, 2, 3])

In [27]:
# slice: 2nd row 2nd column
arr[1, 1]

5

In [28]:
# slice: 2nd column
arr[:, 1]

array([1, 5, 9])

In [29]:
arr[:,1].reshape((3, 1))

array([[1],
       [5],
       [9]])

In [30]:
# slice: from 2nd row, from 3rd column
arr[1:, 2:]

array([[ 6,  7],
       [10, 11]])

In [31]:
# slicing to set values
arr[1:, 2:] = 7
arr

array([[0, 1, 2, 3],
       [4, 5, 7, 7],
       [8, 9, 7, 7]])

# 3) Boolean Indexing: selecting data by logic

In [32]:
arr = np.arange(3)
arr

array([0, 1, 2])

In [33]:
arr[np.array([True, False, True])]

array([0, 2])

In [34]:
arr >= 1

array([False,  True,  True])

In [35]:
arr[arr>=1]

array([1, 2])

In [36]:
arr = np.arange(10)

In [37]:
# all elements bigger than 2 AND smaller then 7
arr[(arr>2)&(arr<7)]

array([3, 4, 5, 6])

In [38]:
# all elements bigger than 2 OR smaller then 7
arr[(arr>7)|(arr<2)]

array([0, 1, 8, 9])

In [39]:
# tilde sign to negate a condition
arr[~(arr>7)]

array([0, 1, 2, 3, 4, 5, 6, 7])

## Ex: Values that are more than 1.5x Standard Deviation from Mean

In [40]:
mat = np.random.rand(5, 5)
mat

array([[0.23376234, 0.39153266, 0.63809181, 0.51407882, 0.3875599 ],
       [0.22225696, 0.62439761, 0.80768164, 0.03369826, 0.23764498],
       [0.70022541, 0.60049929, 0.42386511, 0.2347914 , 0.94242604],
       [0.26210226, 0.86859559, 0.24768218, 0.73730224, 0.35849846],
       [0.43419031, 0.96115601, 0.61674496, 0.95908703, 0.20927519]])

In [41]:
# all the values that are more than one and half standard deviation from the mean
mat[np.abs(mat - mat.mean()) > 1.5*mat.std()]

array([0.03369826, 0.94242604, 0.96115601, 0.95908703])

In [42]:
np.abs(mat - mat.mean())

array([[0.27212352, 0.11435319, 0.13220595, 0.00819296, 0.11832596],
       [0.2836289 , 0.11851175, 0.30179578, 0.4721876 , 0.26824088],
       [0.19433955, 0.09461343, 0.08202075, 0.27109445, 0.43654018],
       [0.2437836 , 0.36270973, 0.25820368, 0.23141638, 0.1473874 ],
       [0.07169555, 0.45527015, 0.1108591 , 0.45320117, 0.29661067]])

In [43]:
# check where it is bigger than one and a half times the matrix dot std
np.abs(mat - mat.mean()) > 1.5*mat.std()

array([[False, False, False, False, False],
       [False, False, False,  True, False],
       [False, False, False, False,  True],
       [False, False, False, False, False],
       [False,  True, False,  True, False]])

In [44]:
mat[np.abs(mat - mat.mean()) > 1.5*mat.std()]

array([0.03369826, 0.94242604, 0.96115601, 0.95908703])

In [45]:
# Normalize to te mean
mat[np.abs(mat - mat.mean()) > 1.5*mat.std()] = mat.mean()

# 4) Broacasting

![numpy_broadcasting](img/numpy_broadcasting.png)

### Brodcasting math operations to all elements of a vector

In [46]:
arr = np.arange(3)
arr + 4

array([4, 5, 6])

In [47]:
arr / 7

array([0.        , 0.14285714, 0.28571429])

In [48]:
arr ** 2

array([0, 1, 4])

### Combining Vector and Matrix

In [49]:
mat = np.arange(9).reshape((3, 3))
vec = np.arange(3)

In [50]:
mat

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [51]:
vec

array([0, 1, 2])

In [52]:
mat + vec

array([[ 0,  2,  4],
       [ 3,  5,  7],
       [ 6,  8, 10]])

### Combining 2 Vectors with different shapes

In [53]:
v1 = np.arange(3)
v2 = np.arange(3).reshape((3, 1))

In [54]:
v1

array([0, 1, 2])

In [55]:
v2

array([[0],
       [1],
       [2]])

In [56]:
v2.shape

(3, 1)

In [57]:
v1 + v2

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [58]:
# deliberate error
# np.arange(3) + np.arange(4)

# 5) Array Operations

In [59]:
v = np.arange(12).reshape((4, 3))
v

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [60]:
#direcotry of all available methods
#dir(v)

In [61]:
# transpose matrix method
v.T

array([[ 0,  3,  6,  9],
       [ 1,  4,  7, 10],
       [ 2,  5,  8, 11]])

In [62]:
# at least one element True?
v.any()

True

In [63]:
# all elements are True?
v.all()

False

In [64]:
v

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [65]:
v.prod()

0

In [66]:
# sums of rows
v.sum(axis=1)

array([ 3, 12, 21, 30])

In [67]:
# sums of columns
v.sum(axis=0)

array([18, 22, 26])

In [68]:
v1 = v.copy()

In [69]:
v1[0,0] = 1000
v

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [70]:
v1

array([[1000,    1,    2],
       [   3,    4,    5],
       [   6,    7,    8],
       [   9,   10,   11]])

In [71]:
v1.prod()

39916800000

# 6) Serialization
 Dump and load will serialize and de-serialize from a file-like object. <br> 
 While dumps and loads will work with bytes. <br> 
 The s version which works with bytes is useful when you'd like to send arrays over circuits, save them to a database, and many other scenarios. 

In [72]:
data = v.dumps()
data # shows sequence of bytes

b'\x80\x02cnumpy.core.multiarray\n_reconstruct\nq\x00cnumpy\nndarray\nq\x01K\x00\x85q\x02c_codecs\nencode\nq\x03X\x01\x00\x00\x00bq\x04X\x06\x00\x00\x00latin1q\x05\x86q\x06Rq\x07\x87q\x08Rq\t(K\x01K\x04K\x03\x86q\ncnumpy\ndtype\nq\x0bX\x02\x00\x00\x00i8q\x0cK\x00K\x01\x87q\rRq\x0e(K\x03X\x01\x00\x00\x00<q\x0fNNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00tq\x10b\x89h\x03X`\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x00\x00\x00\x00\x07\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\t\x00\x00\x00\x00\x00\x00\x00\n\x00\x00\x00\x00\x00\x00\x00\x0b\x00\x00\x00\x00\x00\x00\x00q\x11h\x05\x86q\x12Rq\x13tq\x14b.'

In [73]:
v2 = np.loads(data)
v2 # re-reads into np arrays

  """Entry point for launching an IPython kernel.


array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

# 7) ufuncs

In [74]:
np.sin(np.pi/2)

1.0

In [75]:
v = np.arange(-3, 3)
np.sin(v)

array([-0.14112001, -0.90929743, -0.84147098,  0.        ,  0.84147098,
        0.90929743])

### Ex fuction: return 0 if negative, return number otherwise

In [76]:
def noneg(n):
    if n < 0:
        return 0
    return n

In [77]:
noneg(7)

7

In [78]:
noneg(-3)

0

In [79]:
# deliberate error: function won't work on vector
#noneg(v)

### Overcoming error with ufunc

In [80]:
# add vectorize decoration: Decorators are functions that get functions as arguments and returns functions.
@np.vectorize
def noneg(n):
    if n < 0:
        return 0
    return n

In [81]:
# above ufunc equals to this function
def noneg(n):
    if n < 0:
        return 0
    return n
noneg = np.vectorize(noneg)

In [82]:
noneg(3)

array(3)

In [83]:
noneg(3).shape

()

In [84]:
noneg(v)

array([0, 0, 0, 0, 1, 2])

In [85]:
nv = np.array([-1, np.nan, 1])
np.sin(nv)

array([-0.84147098,         nan,  0.84147098])

In [86]:
# deliberate error: nonag funct will give error on NaN
#noneg(nv)

## np.nan is a very negative type

In [87]:
np.nan > 0

False

In [88]:
np.nan < 0

False

In [89]:
np.nan == np.nan

False

### Fixing the ufunc to work with NaN

In [90]:
@np.vectorize
def noneg(n):
    if not np.isnan(n) and n < 0:
        return 0
    return n

In [91]:
# deliberate error
#noneg(nv)

In [92]:
@np.vectorize
def noneg(n):
    if not np.isnan(n) and n < 0:
        return n.__class__(0)
    return n

In [93]:
noneg(nv)

array([ 0., nan,  1.])

In [94]:
@np.vectorize
def isneg(n):
    return not np.isnan(n) and n < 0

In [95]:
nv[isneg(nv)] = 0

In [96]:
nv

array([ 0., nan,  1.])