# Numpy Python tutorial 
* **Why we use numpy rather than list?**
  ```
  Because numpy is much faster than lists and the reason for this is:
  1. Numpy uses fixed types (reads less bytes)
  2. Numpy uses contigouos memory
  ```
  <br>
  
* **Applications of Numpy**
  ```
  1. Mathematics (MATLAB replacement)
  2. Plotting (Matplotlib)
  3. Backend (Pandas, Connect 4, Digital Photography)
  4. Machine Learning (Tensors)
  ```

# Load in NumPy

In [2]:
import numpy as np

## The basics

In [3]:
a = np.array([1,2,3])
print(a) # 1D dimensional arary
# or type a instead of print(a)

[1 2 3]


In [4]:
b = np.array([[9.0,8.0,7.0],[6.0,5.0,4.0]]) # 2D dimensional array
print(b)

[[9. 8. 7.]
 [6. 5. 4.]]


In [5]:
# Get dimension of an arary
print(a.ndim)
print(b.ndim)

1
2


In [6]:
# Get shape of an arary
print(a.shape)
print(b.shape)

(3,)
(2, 3)


In [7]:
# Get type
print(a.dtype)
print(b.dtype)

int32
float64


In [8]:
# We can specify the data type for the array
c = np.array([4,5,6], dtype = 'int16')
print(c.dtype)

int16


In [9]:
# Get size 
print(a.itemsize)
print(b.itemsize)
print(c.itemsize)

4
8
2


In [10]:
# Get total size (number of elements * size of each element)
# a.size returns the number of elements in the array
print(a.size * a.itemsize)
print(b.size * b.itemsize)
print(c.size * c.itemsize)

# OR

print(a.nbytes)
print(b.nbytes)
print(c.nbytes)

12
48
6
12
48
6


## Accessing/Changing specific elements, rows, columns, etc

In [11]:
a = np.array([[1,2,3,4,5,6,7],[8,9,10,11,12,13,14]])
print(a)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 13 14]]


In [12]:
# Get a specific element [r,c]
# For example we want to get the number '13'
a[1,5] # 1 is for second row , 5 is for 6th element 

13

In [13]:
# Get a specific row
a[0, :] # 0 is for the first row , : is for all the elements in that 

array([1, 2, 3, 4, 5, 6, 7])

In [14]:
# Get a specific column
a[:, 2] # : is for all the rows , 2 is for the thrid column

array([ 3, 10])

In [15]:
# Getting a little more fancy [startindex:endindex:stepsize]
a[0, 1:6:2] 

array([2, 4, 6])

In [16]:
# Change the 13 to 20
a[1,5] = 20
print(a)

[[ 1  2  3  4  5  6  7]
 [ 8  9 10 11 12 20 14]]


In [17]:
# Change the entire row or column
a[1, :] = 1
print(a)

[[1 2 3 4 5 6 7]
 [1 1 1 1 1 1 1]]


## 3D Example

In [18]:
b = np.array([[[1,2],[3,4]],[[5,6],[7,8]]])
print(b)

[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


In [19]:
# Get specific element (work outside in)
b[0,1,1] 

4

In [20]:
# Replace 
b[:,1,:] = [[9,9],[8,8]]
b

array([[[1, 2],
        [9, 9]],

       [[5, 6],
        [8, 8]]])

## Initializing Different Types of Arrays

In [21]:
# All 0s matrix
np.zeros((3,3))

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [22]:
# All 1s matrix
c = np.ones((2,2), dtype='int32') # change its data type from float to integer 32

print(c)
c[1,1].dtype

[[1 1]
 [1 1]]


dtype('int32')

In [23]:
# Any other number 
np.full((2,2), 44)

array([[44, 44],
       [44, 44]])

In [24]:
# Any other number (full_like)
np.full_like(a,4) # creates a matrix of numbers using the shape of another matrix

array([[4, 4, 4, 4, 4, 4, 4],
       [4, 4, 4, 4, 4, 4, 4]])

In [25]:
# Random decimal numbers
np.random.rand(4,2) 

array([[0.66174954, 0.49760073],
       [0.0229995 , 0.28430969],
       [0.10964983, 0.914179  ],
       [0.71730099, 0.82969609]])

In [26]:
# Random Integer Values
np.random.randint(10, size=(4,4)) # 10 is max

array([[7, 6, 4, 6],
       [7, 4, 7, 7],
       [0, 7, 5, 1],
       [8, 2, 1, 4]])

In [27]:
# The Identity matrix
np.identity(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [28]:
# Repeat the array
arr1 = np.array([1,2,3]) # 1D
r1 = np.repeat(arr1,3)

arr2 = np.array([[1,2,3]]) # 2D
r2 = np.repeat(arr2,3,axis=0)

print(r1)
print(r2)

[1 1 1 2 2 2 3 3 3]
[[1 2 3]
 [1 2 3]
 [1 2 3]]


In [29]:
# Challenge
output = np.ones((5,5),dtype='int32')
mid = np.zeros((3,3))
mid[1,1] = 9
output[1:4, 1:4] = mid

print(output)

[[1 1 1 1 1]
 [1 0 0 0 1]
 [1 0 9 0 1]
 [1 0 0 0 1]
 [1 1 1 1 1]]


## Be careful when copying arrays !!!

In [30]:
a = np.array([1,2,3])
b = a
b[0] = 100 # This will also change the a array

print(a)
print(b)

[100   2   3]
[100   2   3]


In [31]:
# Instead we should do this:
c = np.array([1,2,3])
d = c.copy()
d[0] = 100

print(c)
print(d)

[1 2 3]
[100   2   3]


## Mathematics

In [32]:
a = np.array([1,2,3,4])
b = np.array([1,0,1,0])

sum = a + 2
print(sum)

sub = a - 2
print(sub)

mul = a * 2
print(mul)

pow = a ** 2
print(pow)

c = a + b # element wise
print(c)

[3 4 5 6]
[-1  0  1  2]
[2 4 6 8]
[ 1  4  9 16]
[2 2 4 4]


In [33]:
# Take sin
np.sin(a)

array([ 0.84147098,  0.90929743,  0.14112001, -0.7568025 ])

In [34]:
# Take cos
np.cos(b)

array([0.54030231, 1.        , 0.54030231, 1.        ])

## Linear algebera

In [35]:
a = np.ones((2,3))
print(a)

b = np.full((3,4), 2)
print(b) 

# In linear algebra, columns of the first matrix must be equal to the rows of the second matrix
# a * b  -- will not work here

np.matmul(a,b)

[[1. 1. 1.]
 [1. 1. 1.]]
[[2 2 2 2]
 [2 2 2 2]
 [2 2 2 2]]


array([[6., 6., 6., 6.],
       [6., 6., 6., 6.]])

In [36]:
# Find the determinant
c = np.identity(3)
np.linalg.det(c) # determinant of an identity matrix is one

1.0

**You can visit the documentation for more info about the linear algebra in numpy**
[https://numpy.org/doc/stable/reference/routines.linalg.html]

## Statistics

In [37]:
stats = np.array([[1,2,3],[4,5,6]])
print(stats)

print(np.min(stats))
print(np.max(stats))
print(np.sum(stats))

[[1 2 3]
 [4 5 6]]
1
6
21


## Reorganizing arrays

In [38]:
# Resahping arrays
before = np.array([[1,2,3,4],[5,6,7,8]])
print(before)

after = before.reshape((8,1))
print(after)

[[1 2 3 4]
 [5 6 7 8]]
[[1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]]


In [39]:
# Vertically stacking vectors
v1 = np.array([1,2,3,4])
v2 = np.array([5,6,7,8])

np.vstack([v1,v2,v1,v2])

array([[1, 2, 3, 4],
       [5, 6, 7, 8],
       [1, 2, 3, 4],
       [5, 6, 7, 8]])

In [40]:
# Horizontal stacking vectors
h1 = np.ones((2,4))
h2 = np.zeros((2,2))

print(h1)
print(h2)
np.hstack((h1,h2))

[[1. 1. 1. 1.]
 [1. 1. 1. 1.]]
[[0. 0.]
 [0. 0.]]


array([[1., 1., 1., 1., 0., 0.],
       [1., 1., 1., 1., 0., 0.]])

## Miscellaneous
### Load data from file

In [48]:
# Load data from a file as a numpy array
filedata = np.genfromtxt('data.txt', delimiter=',') # load the file from directory
filedata = filedata.astype('int32') # Change data type from float to integer
filedata

array([[  1,  13,  21,  11, 196,  75,   4,   3,  34,   6,   7,   8,   0,
          1,   2,   3,   4,   5],
       [  3,  42,  12,  33, 766,  75,   4,  55,   6,   4,   3,   4,   5,
          6,   7,   0,  11,  12],
       [  1,  22,  33,  11, 999,  11,   2,   1,  78,   0,   1,   2,   9,
          8,   7,   1,  76,  88]])

## Boolean Masking and Advanced Indexing

In [54]:
filedata > 50 # Iterate over the array and places a false if the value is less the 50 and true if greater than 50

array([[False, False, False, False,  True,  True, False, False, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True,  True, False,  True, False,
        False, False, False, False, False, False, False, False, False],
       [False, False, False, False,  True, False, False, False,  True,
        False, False, False, False, False, False, False,  True,  True]])

In [55]:
filedata[filedata > 50] # Returns the indexes of all elements that are greater than 50

array([196,  75, 766,  75,  55, 999,  78,  76,  88])

In [56]:
# You can index with a list in NumPy
a = np.array([1,2,3,4,5,6,7,8,9])
a[[1,2,8]]

array([2, 3, 9])

In [57]:
np.any(filedata > 50, axis = 0) # Returns true for the column that have a value greater than 50

array([False, False, False, False,  True,  True, False,  True,  True,
       False, False, False, False, False, False, False,  True,  True])

In [58]:
np.all(filedata > 50, axis = 0) # Returns true if the entire column is greater than 50

array([False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False])

In [60]:
~((filedata > 50) & (filedata < 100))

array([[ True,  True,  True,  True,  True, False,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True, False,  True, False,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True, False,
         True,  True,  True,  True,  True,  True,  True, False, False]])

In [68]:
quiz = np.array([[1,2,3,4,5],[6,7,8,9,10],[11,12,13,14,15],[16,17,18,19,20],[21,22,23,24,25],[26,27,28,29,30]])
quiz[-3:0, 3:]

array([], shape=(0, 2), dtype=int32)