# Numpy

- A multi-dimensional array library
    - you can store 1D, 2D, 3D, 4D... arrays


#### Lists vs Numpy:
- Lists are very slow
    - Numpy is fast because it uses fixed types
- Lists store data such as size (4 bytes), reference object (8 bytes (long)), object type (long), and object value (long) when storing a value a list (so totally 28 bytes to store one  value)
    - Numpy casts the value alone to an int32 type(4 bytes) be default (can change it to 16 or 8 manually)
- Numpy uses less bytes of memory so it's easy and fast to read
- In Numpy, there's no typing checking when iterating through objects
- Lists are scattered around in memory. So, a list essentially contains pointers to the values that are intending to be members of the list. It's not super fast to jump around often.
    - Numpy utilizes contiguous memory
    - Benefits of contiguous memory include:
        - SIMD vector processing (single instruction multiple data)
        - Effective cache utilization
- we can insertion, deletion, appending, concatenation etc in both lists and numpy but numpy can do more.

#### Applications of Numpy:
- Mathematics (MATLAB replacement)
- Plotting (Matplotlib)
- Backend of Pandas, Connect 4, Digital Photography etc
- Machine learning (directly and indirectly (through tensor libraries))


In [1]:
import numpy as np

## The basics

In [14]:
# initializing arrays

a = np.array([1,2,3])
print('a:\n', a)

b = np.array([[9.0, 8.0, 7.0], [6.0, 5.0, 4.0]])
print('b:\n', b)

c = np.array([1, 2.0, 3], dtype='int16')
print('c:\n', c)

d = np.array([1, 2.0, 3], dtype='str')
print('d:\n', d)

a:
 [1 2 3]
b:
 [[9. 8. 7.]
 [6. 5. 4.]]
c:
 [1 2 3]
d:
 ['1' '2.0' '3']


In [15]:
# get dimensions

print('a:', a.ndim)
print('b:', b.ndim)
print('c:', c.ndim)

a: 1
b: 2
c: 1


In [9]:
# get shape
# (rows, columns)

print('a:', a.shape)
print('b:', b.shape)

a: (3,)
b: (2, 3)


In [16]:
# Get type

print('a:', a.dtype)
print('b:', b.dtype)
print('c:', c.dtype)
print('d:', d.dtype)

a: int64
b: float64
c: int16
d: <U3


In [22]:
# get size

print('a:', a.itemsize) # itemsize gives the memory size of one item in the array
print('b:', b.itemsize)
print('c:', c.itemsize)
print('d:', d.itemsize)

print('a:', a.size) # size gives the number of elements

print('total size of a:', a.size * a.itemsize)
print('nbytes (total size):', a.nbytes)

a: 8
b: 8
c: 2
d: 12
a: 3
total size of a: 24
nbytes (total size): 24


## Accessing/changind specific elements, rows, columns, etc

In [24]:
a = np.array([[1,2,3,4,5,6,7], [8,9,10,11,12,13,14]]) # (2, 7) array

# get specific element [r, c]
print(a[1,5]) # get 13
print(a[1,-2]) # get 13


13
13


In [25]:
# get specific row

print(a[0, :]) # means, first row and all the columns

[1 2 3 4 5 6 7]


In [26]:
# get specific column

print(a[:, 2]) # means, all the rows and 3rd column

[ 3 10]


In [27]:
# getting a little more fancy [startindex:endindex:stepsize]

print(a[0, 1:6:2])

[2 4 6]


In [30]:
# altering elements

a[1,5] = 20
print(a)

a[:,2] = 5 # every specified element becomes 5
print(a)

a[:,2] = [100,200] # respective
print(a)

[[ 1  2  5  4  5  6  7]
 [ 8  9  5 11 12 20 14]]
[[ 1  2  5  4  5  6  7]
 [ 8  9  5 11 12 20 14]]
[[  1   2 100   4   5   6   7]
 [  8   9 200  11  12  20  14]]


#### 3D example

In [38]:
h = np.array([[[1,2], [3,4]], [[5,6], [7,8]]])
print(h)
print(h[0,1,1])
print(h[:,1,:])

h[:,1,:] = [[9,9], [8,8]] # assignment should be of the same dimension
print('\nh:\n', h)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]
4
[[3 4]
 [7 8]]

h:
 [[[1 2]
  [9 9]]

 [[5 6]
  [8 8]]]


#### Initializing all sorts of arrays

In [43]:
# all 0s matrix

a = np.zeros(5) # 1D
b = np.zeros((2,3))
c = np.zeros((2,3,3,2))
print(a)
print(b)
print(c)

[0. 0. 0. 0. 0.]
[[0. 0. 0.]
 [0. 0. 0.]]
[[[[0. 0.]
   [0. 0.]
   [0. 0.]]

  [[0. 0.]
   [0. 0.]
   [0. 0.]]

  [[0. 0.]
   [0. 0.]
   [0. 0.]]]


 [[[0. 0.]
   [0. 0.]
   [0. 0.]]

  [[0. 0.]
   [0. 0.]
   [0. 0.]]

  [[0. 0.]
   [0. 0.]
   [0. 0.]]]]


In [46]:
# all 1s

a = np.ones((4,2,2), dtype="int32")
print(a)

[[[1 1]
  [1 1]]

 [[1 1]
  [1 1]]

 [[1 1]
  [1 1]]

 [[1 1]
  [1 1]]]


In [47]:
# any other number

a = np.full((2,2), 99) # full(shape, number)
print(a)

[[99 99]
 [99 99]]


In [49]:
# full like

j = np.array([[1,2,3,4,5,6,7], [8,9,10,11,12,13,14]])

t = np.full_like(j, 4) # same as np.full(j.shape, 4)
print(t)

[[4 4 4 4 4 4 4]
 [4 4 4 4 4 4 4]]


In [55]:
# random decimal numbers

i = np.random.rand(2,4,3)
print(i)

p = np.random.random_sample(j.shape)
print('\n\n', p)

[[[0.58968325 0.6472798  0.12923995]
  [0.21045758 0.51877959 0.26417158]
  [0.83291705 0.43166203 0.00438546]
  [0.66164743 0.04576099 0.92854254]]

 [[0.48455293 0.88673045 0.55684389]
  [0.62814183 0.90036529 0.78152751]
  [0.20112054 0.49270769 0.22649105]
  [0.90786112 0.438601   0.9303546 ]]]


 [[0.73183621 0.87629915 0.4480396  0.54183679 0.49729587 0.49462735
  0.27976394]
 [0.58260497 0.26458464 0.58644358 0.44381474 0.860813   0.85720642
  0.83851797]]


In [58]:
# random integers

o = np.random.randint(7, size=(3,3))
print(o)

o = np.random.randint(-3, 7, size=(3,3))
print(o)

[[3 3 4]
 [0 1 6]
 [3 6 1]]
[[-2  6  0]
 [ 3 -1  4]
 [ 0  0  6]]


In [60]:
# identity matrix

v = np.identity(3)
print(v)

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


In [61]:
# repeat an array
a = np.array([[1,2,3]])
q = np.repeat(a, 3, axis=0)
print(q)
q = np.repeat(a, 3) # axis=1
print('\n\n\n', q)

[[1 2 3]
 [1 2 3]
 [1 2 3]]



 [1 1 1 2 2 2 3 3 3]


In [65]:
# challenge


result = np.ones((5,5))
print(result)

z = np.zeros((3,3))
print(z)
z[1,1] = 9
print(z)

result[1:4, 1:4] = z
print(result)

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
[[0. 0. 0.]
 [0. 9. 0.]
 [0. 0. 0.]]
[[1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 9. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1.]]


## Mathematics

In [73]:
a = np.array([1,2,3,4])
print(a)
print(a+2)
print(a-2)
print(a*2)
print(a/2)
print(a**2)

a = np.array([1,2,3,4])
b = np.array([1,0,1,0])
print(a+b)

print(np.sin(a))

[1 2 3 4]
[3 4 5 6]
[-1  0  1  2]
[2 4 6 8]
[0.5 1.  1.5 2. ]
[ 1  4  9 16]
[2 2 4 4]
[ 0.84147098  0.90929743  0.14112001 -0.7568025 ]


### Linear algebra

In [75]:
# matrix multiplication

a = np.ones((2,3))
print(a)
b = np.full((3,2), 2)
print(b)

print(np.matmul(a,b)) # mxn rule

[[1. 1. 1.]
 [1. 1. 1.]]
[[2 2]
 [2 2]
 [2 2]]
[[6. 6.]
 [6. 6.]]


In [77]:
# determinant

q = np.identity(3) # determinant of identity matrix is 1
print(np.linalg.det(q))

1.0


In [None]:
## reference docs -> https://docs.scipy.org/doc/numpy/reference/routines.linalg.html

# determinant
# trace
# singular vector decomposition
# eigen values
# matrix norm
# inverse etc

### Statistics

In [79]:
stats = np.array([[1,2,3], [4,5,6]])
print(stats)

[[1 2 3]
 [4 5 6]]


In [85]:
print(np.min(stats))
print(np.max(stats))
print(np.min(stats, axis=1)) # gives min of all the rows whereas axis=0 will give min of all columns
print(np.sum(stats)) # sum all elements. axis can be used here too.

1
6
[1 4]
21


#### Reorganizing arrays

In [88]:
before = np.array([[1,2,3,4], [5,6,7,8]])
print(before)

after = before.reshape((4,2))
print(after)
after = before.reshape((8,1))
print(after)
after = before.reshape((2,2,2))
print(after)
after = before.reshape((2,3)) # mismatch shape
print(after)

[[1 2 3 4]
 [5 6 7 8]]
[[1 2]
 [3 4]
 [5 6]
 [7 8]]
[[1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]]
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


ValueError: cannot reshape array of size 8 into shape (2,3)

In [91]:
# vertical stacking vectors

v1 = np.array([1,2,3,4])
v2 = np.array([5,6,7,8])

print(np.vstack([v1,v2]))

print(np.vstack([v1,v2,v2,v2,v1]))

[[1 2 3 4]
 [5 6 7 8]]
[[1 2 3 4]
 [5 6 7 8]
 [5 6 7 8]
 [5 6 7 8]
 [1 2 3 4]]


In [93]:
# horizontal stacking vectors

h1 = np.ones((2,4))
h2 = np.zeros((2,2))

print(np.hstack((h1,h2)))

[[1. 1. 1. 1. 0. 0.]
 [1. 1. 1. 1. 0. 0.]]


# Miscellaneous

In [None]:
# load from file
# have a file with values seperated with comma

filedata = np.genfromtxt('data.txt', delimiter=',')
# print(filedata)
filedata1 = filedata.astype('int32')
print(filedata1)

In [None]:
# boolean masking and advanced indexing

print(filedata > 50)
print(filedata[filedata > 50])

a = np.array([1,2,3,4,5,6,7,8,9])
print(a[1,2,8])


print(np.any(filedata > 50, axis=0))
print(np.all(filedata > 50, axis=0))


print(((filedata > 50) & (filedata < 100)))
print(~((filedata > 50) & (filedata < 100)))