# Numpy Mastering


*** What is numPy? ***

It is a multidimensional array library that only accept elements of the same type and is used to compute complex task more efficiently than traditional list.

*** Applications of Numpy: ***

-Mathematics
-Plotting (with Matplotlib)
-Backend (Pandas, Connect 4, Digital Photography)
-Machine Learning 





## Basic commands

In [1]:
import numpy as np

In [15]:
a = np.array([1,2,3,4]) # 1D
print("1D:\n", a)

b= np.array([
    [3,4,5,6,8,90], 
    [3,0,5,1,-3,-6]
]) # 2D
print("2D:\n", b)

d = np.array([
    [[1,2,3,4,5],[0,9,8,7,6],[5,4,6,3,7]],
    [[1,2,3,4,5],[0,9,8,7,6],[5,4,6,3,7]],
    [[1,2,3,4,5],[0,9,8,7,6],[5,4,6,3,7]],
    [[1,2,3,4,5],[0,9,8,7,6],[5,4,6,3,7]]
]) # 3D
print("3D:\n", d)

e = np.array(
    [c*2,c-5,c/2]
) # 4D
print("4D:\n", e)


1D:
 [1 2 3 4]
2D:
 [[ 3  4  5  6  8 90]
 [ 3  0  5  1 -3 -6]]
3D:
 [[[1 2 3 4 5]
  [0 9 8 7 6]
  [5 4 6 3 7]]

 [[1 2 3 4 5]
  [0 9 8 7 6]
  [5 4 6 3 7]]

 [[1 2 3 4 5]
  [0 9 8 7 6]
  [5 4 6 3 7]]

 [[1 2 3 4 5]
  [0 9 8 7 6]
  [5 4 6 3 7]]]
4D:
 [[[[ 2.   4.   6. ]
   [ 4.  16.   0. ]]

  [[ 2.  10.  18. ]
   [-4.  18.   6. ]]]


 [[[-4.  -3.  -2. ]
   [-3.   3.  -5. ]]

  [[-4.   0.   4. ]
   [-7.   4.  -2. ]]]


 [[[ 0.5  1.   1.5]
   [ 1.   4.   0. ]]

  [[ 0.5  2.5  4.5]
   [-1.   4.5  1.5]]]]


In [20]:
#  get the dimension
print("a.ndim:",a.ndim )
print("b.ndim:", b.ndim )
print("d.ndim:", d.ndim )
print("e.ndim:", e.ndim )

a.ndim: 1
b.ndim: 2
d.ndim: 3
e.ndim: 4


In [21]:
#Get shape (like length for 1D, rows and columns for 2D, length, width and height for 3D)
print("a.shape:",a.shape)
print("b.shape:", b.shape)
print("d.shape:", d.shape)
print("e.shape:", e.shape)

a.shape: (4,)
b.shape: (2, 6)
d.shape: (4, 3, 5)
e.shape: (3, 2, 2, 3)


In [30]:
#Floats are usually larger in bit/byte size compared to integers
c = np.array([1,2,3,4], dtype='int16')
print("int")
print(c.dtype)
c2 = np.array([1.2,2.,3.,4.])
print('\nfloat')
print(c2.dtype)


int
int16

float
float64


In [29]:
#Get itemsize (itemsize stands for number of bytes)
print(a.itemsize)
print(b.itemsize)
print(c.itemsize)
print(d.itemsize)
print(e.itemsize)

8
8
2
8
8


In [35]:
#Get size (gets total number of elements in the numpy array)
print(e.size)
print(e.size)

#Get total itemsize (total bytes in the numpy array)
print(e.size * e.itemsize)
print(e.nbytes)  #.nbytes work too

36
36
288
288


## Accessing data (upto2Darrays)

In [51]:
print(b)
print(b.shape)

print("b[1, 5]:\n\t", b[1, 5])
print("\nb[:, 2:3]:\n", b[:, 2:3])
print("\nb[0, 0:2]:\n\t", b[0, 0:2])


[[ 3  4  5  6  8 90]
 [ 3  0  5  1 -3 -6]]
(2, 6)
b[1, 5]:
	 -6

b[:, 2:3]:
 [[5]
 [5]]

b[0, 0:2]:
	 [3 4]


In [67]:
print(d.shape)

print(d[0:1:4])
print('--------')
print(d[0:1:4][0:2, 2:])
print('--------')
b2 = np.array([[[1,2], [3,4]], [[5,6], [7,8]]])
print(b2[0,1, 1])   #to get number 4
print("-----")
print(b2[0, 1, :]) # get [3,4]

(4, 3, 5)
[[[1 2 3 4 5]
  [0 9 8 7 6]
  [5 4 6 3 7]]]
--------
[[[5 4 6 3 7]]]
--------
4
-----
[3 4]


In [72]:
# get [0.0, 4.0 ]form e (4D)
print(e)
print('-------')
print(e[1,1, 0, 1:])

[[[[ 2.   4.   6. ]
   [ 4.  16.   0. ]]

  [[ 2.  10.  18. ]
   [-4.  18.   6. ]]]


 [[[-4.  -3.  -2. ]
   [-3.   3.  -5. ]]

  [[-4.   0.   4. ]
   [-7.   4.  -2. ]]]


 [[[ 0.5  1.   1.5]
   [ 1.   4.   0. ]]

  [[ 0.5  2.5  4.5]
   [-1.   4.5  1.5]]]]
-------
[0. 4.]


## Changing data

In [78]:
print(b)
print('-----')
#Changing element, 
b[1, 3] = 7890
print(b)
print('------')

#Changing rows
b[0, 1:3] = 5
print(b)
print('-----')

#If you wanna specify what each element will become instead of both becoming the same value
b[0, 1:3] = [6, 7]
print(b)
print('------')

#Changing columns
b[:, 1] = [1, 2]
print(b)

[[ 3  1  7  6  8 90]
 [ 3  2  5 20 -3 20]]
-----
[[   3    1    7    6    8   90]
 [   3    2    5 7890   -3   20]]
------
[[   3    5    5    6    8   90]
 [   3    2    5 7890   -3   20]]
-----
[[   3    6    7    6    8   90]
 [   3    2    5 7890   -3   20]]
------
[[   3    1    7    6    8   90]
 [   3    2    5 7890   -3   20]]


## initialising different arrays

In [84]:
#Making an all 0s matrix
zeros = np.zeros(5)
print(zeros)
print('--------')
zeros2 = np.zeros([3,6])
print(zeros2)
print('---------')
zeros3 = np.zeros([3,4, 2])
print(zeros3)

[0. 0. 0. 0. 0.]
--------
[[0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]
---------
[[[0. 0.]
  [0. 0.]
  [0. 0.]
  [0. 0.]]

 [[0. 0.]
  [0. 0.]
  [0. 0.]
  [0. 0.]]

 [[0. 0.]
  [0. 0.]
  [0. 0.]
  [0. 0.]]]


In [85]:
#Making an all 1s matrix
ones = np.ones(5)
print(ones)
print('--------')
ones2 = np.ones([3,6])
print(ones2)
print('---------')
ones3 = np.ones([3,4, 2])
print(ones3)

[1. 1. 1. 1. 1.]
--------
[[1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1.]]
---------
[[[1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]]

 [[1. 1.]
  [1. 1.]
  [1. 1.]
  [1. 1.]]]


In [89]:
#Making an all of any other number. '.full()' function takes 2 parameters, the shape, and the number 
#you want to fill the elements with
ez = np.full((2,2,5), 1000, dtype="float32")
print(ez)

[[[1000. 1000. 1000. 1000. 1000.]
  [1000. 1000. 1000. 1000. 1000.]]

 [[1000. 1000. 1000. 1000. 1000.]
  [1000. 1000. 1000. 1000. 1000.]]]


In [None]:
#Making an all of any other number using 'full_like()', which allows you to take in other array variables as its parameter
#if you put 2D array 'f' in shape parameter it will create a new array with the shape of array 'f', but only takes the shape,
#without its elements in it

g = np.full_like(e, 4)  #g = np.full(f.shape, 4) works similarly too
print(g)

[[[[4. 4. 4.]
   [4. 4. 4.]]

  [[4. 4. 4.]
   [4. 4. 4.]]]


 [[[4. 4. 4.]
   [4. 4. 4.]]

  [[4. 4. 4.]
   [4. 4. 4.]]]


 [[[4. 4. 4.]
   [4. 4. 4.]]

  [[4. 4. 4.]
   [4. 4. 4.]]]]


In [99]:
#Making a random decimal numbers matrix
print(np.random.rand(1))
print('------')
print(np.random.rand(3,3))
print('------')
print(np.random.rand(4, 2,2))

[0.65522075]
------
[[0.85319756 0.68781921 0.95864412]
 [0.82404396 0.47887257 0.03524913]
 [0.31364764 0.84101558 0.53128285]]
------
[[[0.65940879 0.57778617]
  [0.57253274 0.03495534]]

 [[0.25596554 0.88996355]
  [0.99339788 0.53432732]]

 [[0.35744082 0.76322236]
  [0.19555706 0.10975256]]

 [[0.58474183 0.27388692]
  [0.90987899 0.90622996]]]


In [100]:
#need use 'random_sample' command in order to take in array variables
print(np.random.random_sample(e.shape))

[[[[0.98061209 0.51337711 0.58712703]
   [0.62097643 0.13088567 0.6470676 ]]

  [[0.22519778 0.30549274 0.2500233 ]
   [0.9346499  0.66111147 0.21568258]]]


 [[[0.21323027 0.41050053 0.93541595]
   [0.43408254 0.85787991 0.27079606]]

  [[0.24861324 0.35233429 0.23111692]
   [0.88324908 0.46711072 0.05794826]]]


 [[[0.04397571 0.09151232 0.47157486]
   [0.69041544 0.18882628 0.02967939]]

  [[0.19020215 0.95941918 0.0749578 ]
   [0.59532705 0.87897356 0.94473956]]]]


In [106]:
#Making a random integer numbers matrix
#first parameter sets range of the random integers. So 7 means range from 0 to 7.
#second parameter sets the shape, but the command is size here dk why but it works
print(np.random.randint(27, size=(3,4)))
print("--------")
print(np.random.randint(3, size=(2,4,2,4)))

[[15  6 24 14]
 [ 9 20 24 25]
 [21  5  5 24]]
--------
[[[[0 0 1 0]
   [0 2 2 0]]

  [[2 0 1 2]
   [0 2 0 0]]

  [[2 1 1 1]
   [2 0 2 0]]

  [[1 1 2 2]
   [2 0 1 1]]]


 [[[0 0 1 2]
   [2 2 0 2]]

  [[0 1 2 1]
   [2 2 0 1]]

  [[0 0 1 2]
   [2 2 1 2]]

  [[0 0 1 0]
   [0 0 0 1]]]]


In [107]:
# Making the identity matrix (An identity matrix is a square matrix in which all the elements of principal 
#diagonals are one, and all other elements are zeros)
print(np.identity(3))

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


In [117]:
#Repeating an array ('.repeat' has 3 parameters, the array, number of times to repeat, around which axis)
print(b)
print("-------")
print(np.repeat(d, 2, axis=1))

[[   3    1    7    6    8   90]
 [   3    2    5 7890   -3   20]]
-------
[[[1 2 3 4 5]
  [1 2 3 4 5]
  [0 9 8 7 6]
  [0 9 8 7 6]
  [5 4 6 3 7]
  [5 4 6 3 7]]

 [[1 2 3 4 5]
  [1 2 3 4 5]
  [0 9 8 7 6]
  [0 9 8 7 6]
  [5 4 6 3 7]
  [5 4 6 3 7]]

 [[1 2 3 4 5]
  [1 2 3 4 5]
  [0 9 8 7 6]
  [0 9 8 7 6]
  [5 4 6 3 7]
  [5 4 6 3 7]]

 [[1 2 3 4 5]
  [1 2 3 4 5]
  [0 9 8 7 6]
  [0 9 8 7 6]
  [5 4 6 3 7]
  [5 4 6 3 7]]]


## Problem

In [120]:
#Initialise this array pattern (try your best not to hardcode)
#[[1. 1. 1. 1. 1.]
# [1. 0. 0. 0. 1.]
# [1. 0. 9. 0. 1.]
# [1. 0. 0. 0. 1.]
# [1. 1. 1. 1. 1.]]

p1 = np.ones([5,5])
p1[1:4,1:4]=0
p1[2,2]=9
print(p1)

[[1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 9. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1.]]


In [122]:
#  how to copy array
p2 = p1.copy()
print(p2)

[[1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 9. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1.]]


## Basic mathematic

In [127]:
#Addition of the elements
print(a + 2)
print('-------')

#Subtraction of the elements
print(a - 2)
print('-------')

#Multiplication of the elements
print(a * 2)
print('-------')

#Squaring of the elements
print(a ** 2)  #This means (a * 2^2)
print('-------')

#Division of the elements
print(a / 2)
print('-------')

#Add/Subtract/Multiply/Divide 2 arrays together
b = np.array([1,0,1,0])
print(a + b)
print('-------')

#Trigonometry-ing the elements
print(np.sin(a))
print('-------')
print(np.cos(a))
print('-------')
print(np.tan(a))
print('-------')

print(np.log(a))
print('-------')
print(np.log10(a))
print('-------')
print(np.log2(d))

[3 4 5 6]
-------
[-1  0  1  2]
-------
[2 4 6 8]
-------
[ 1  4  9 16]
-------
[0.5 1.  1.5 2. ]
-------
[2 2 4 4]
-------
[ 0.84147098  0.90929743  0.14112001 -0.7568025 ]
-------
[ 0.54030231 -0.41614684 -0.9899925  -0.65364362]
-------
[ 1.55740772 -2.18503986 -0.14254654  1.15782128]
-------
[0.         0.69314718 1.09861229 1.38629436]
-------
[0.         0.30103    0.47712125 0.60205999]
-------
[[[0.         1.         1.5849625  2.         2.32192809]
  [      -inf 3.169925   3.         2.80735492 2.5849625 ]
  [2.32192809 2.         2.5849625  1.5849625  2.80735492]]

 [[0.         1.         1.5849625  2.         2.32192809]
  [      -inf 3.169925   3.         2.80735492 2.5849625 ]
  [2.32192809 2.         2.5849625  1.5849625  2.80735492]]

 [[0.         1.         1.5849625  2.         2.32192809]
  [      -inf 3.169925   3.         2.80735492 2.5849625 ]
  [2.32192809 2.         2.5849625  1.5849625  2.80735492]]

 [[0.         1.         1.5849625  2.         2.32192809

  print(np.log2(d))


## Reorganising array

In [147]:
before = np.array([[1,2,3,4], [5,6,7,8]])

print(before)

#This array's shape is now (4,2)
print(before.shape)
print('------\n')

#We can change its shape to however we want ((8,1), (4,2), (2,2,2) etc...) using '.reshape()'
#As long as the number of values fit. So shapes like ((2,3) or (3,3,3)) won't work and you'll get an error
after = before.reshape(8,1)
after2 = before.reshape(2,2,2)
print(after)
print('------')
print(after2)
print('------\n')

#Vertically stacking matrices
#Note that the arrays you are stacking must share the same column number or else it won't work
#E.g. if v2 is [5,6,7,8,9], then you can't stack it on top of v1
v1 = np.array([1,2,3,4])
v2 = np.array([5,6,7,8])
print(np.vstack([v1,v2,v1,v2])) 
print('------\n')

#Horizontally stacking matrices
#Likewise, #Note that the arrays you are stacking must share the same row number or else it won't work
#E.g. if h2 is np.zeros((3,2)), then you can't stack it beside h2
h1 = np.ones((2,4))
h2 = np.zeros((2,2))
print(np.hstack([h1,h2]))

[[1 2 3 4]
 [5 6 7 8]]
(2, 4)
------

[[1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]]
------
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]
------

[[1 2 3 4]
 [5 6 7 8]
 [1 2 3 4]
 [5 6 7 8]]
------

[[1. 1. 1. 1. 0. 0.]
 [1. 1. 1. 1. 0. 0.]]


## Load file

In [138]:
file1 = np.genfromtxt('data.txt', delimiter=',');
print(file1)
print('------')
file2 = np.genfromtxt('data.txt')
print(file2)

#Notice that all the data is in floats (as they have '.' at the end of the values)
#To solve that, we redefine the data types from floats (float32/64/...) to ints (int8/16/32...) using the
#'.astype' function

file2.astype('int32')

[[  1.  13.  21.  11. 196.  75.   4.   3.  34.   6.   7.   8.   0.   1.
    2.   3.   4.   5.]
 [  3.  42.  12.  33. 766.  75.   4.  55.   6.   4.   3.   4.   5.   6.
    7.   0.  11.  12.]
 [  1.  22.  33.  11. 999.  11.   2.   1.  78.   0.   1.   2.   9.   8.
    7.   1.  76.  88.]]
------
[nan nan nan]


  file2.astype('int32')


array([-2147483648, -2147483648, -2147483648], dtype=int32)

## Boolean masking advanced indexing

In [145]:
filedata =  np.genfromtxt('data.txt', delimiter=',')
print(filedata)

[[  1.  13.  21.  11. 196.  75.   4.   3.  34.   6.   7.   8.   0.   1.
    2.   3.   4.   5.]
 [  3.  42.  12.  33. 766.  75.   4.  55.   6.   4.   3.   4.   5.   6.
    7.   0.  11.  12.]
 [  1.  22.  33.  11. 999.  11.   2.   1.  78.   0.   1.   2.   9.   8.
    7.   1.  76.  88.]]


In [141]:
#Print out filedata array with Boolean Masking (indicates with True or False if the value is larger than
#50)
print(filedata > 50)
print('------')

#Print out filedata values that have values > 50
print(filedata[filedata > 50])

[[False False False False  True  True False False False False False False
  False False False False False False]
 [False False False False  True  True False  True False False False False
  False False False False False False]
 [False False False False  True False False False  True False False False
  False False False False  True  True]]
------
[196.  75. 766.  75.  55. 999.  78.  76.  88.]


In [144]:
#Checks that in every column, and printing a boolean value (True/False), if ANY of the values meet the condition
#e.g. > 50
print(np.any(filedata > 50, axis=0)) # for column
print('-----')
print(np.any(filedata > 50, axis=1)) # for rows

[False False False False  True  True False  True  True False False False
 False False False False  True  True]
-----
[ True  True  True]


In [146]:
#Checks that in every column, and printing a boolean value (True/False), if ALL of the values meet the condition
#e.g. > 50
print(np.all(filedata > 50, axis=0))   #Only column 5 (index 4) is True as all of the values is > 50 (196, 766, 999)
print(np.all(filedata > 50, axis=1))   #This would be for the row all value > 50 (False for all 3 rows)



#Putting multiple conditions for Boolean Masking
c = (filedata > 50) & (filedata < 100)
print(c)
d = (~(filedata > 50) & (filedata < 100))   #'~' means do the opposite (put True for all values that are NOT
print(d)                                    #between 50 and 100)

[False False False False  True False False False False False False False
 False False False False False False]
[False False False]
[[False False False False False  True False False False False False False
  False False False False False False]
 [False False False False False  True False  True False False False False
  False False False False False False]
 [False False False False False False False False  True False False False
  False False False False  True  True]]
[[ True  True  True  True False False  True  True  True  True  True  True
   True  True  True  True  True  True]
 [ True  True  True  True False False  True False  True  True  True  True
   True  True  True  True  True  True]
 [ True  True  True  True False  True  True  True False  True  True  True
   True  True  True  True False False]]
