# `Numpy Basics for Python: Newbies`

# <font color=red>Mr Fugu Data Science</font>

# (◕‿◕✿)

# `Purpose & Outcome:`

+ `get a crash course to get you started!`

`Don't always believe things blindly, try to visualize and get context. Also, get second opinions and please verify what you see. Just a good life lesson, ok`

In [208]:
import numpy as np
import time
import pandas as pd

+ Lists operations are extremeley common but using them can be slow, `numpy` can be one tool used to speed things up.
    + written in C/C++, lends to speed and the fact that we are `storing arrays in 1 location in memory` unlike python lists.
    + You are using vectorized code, allowing you to avoid (explicit) looping for instance. But, understand operations like these are just behind the scenes working in the C compiler. 
        + With vectorization we can compact our code into memory; as well as the fact that we are able to use contiguous memory management. 
            + Type Checking in regular old Python, is a part that makes it slower because it will infer during runtime. That is not a problem with Numpy.
            + Also, each time you manipulate an array, you will recreate a new one unlike python where you may just add, delete or change something
+ **Pay Attention Here: unlike Python Lists, Numpy arrays NEED to be same DATA TYPE**

`You want to do mathematical or logical operations on arrays then Numpy is your buddy` 

`----------------`

If you don't have Numpy install now:

`pip install numpy` | `conda install numpy`

# `Compare python to numpy examples`

In [528]:
size = 100000000 # billion 
  
list_1 = [val for val in range(size)]
array_1 = np.arange(size)

initialTime = time.time()
resultantList = [(a * b) for a, b in zip(list_1, list_1)]
 
# calculating execution time
print("Time taken by Lists :", 
      (time.time() - initialTime),
      "seconds")
 
# NumPy array
initialTime = time.time()
resultantArray = array_1 * array_1
 
# calculating execution time 
print("Time taken by NumPy Arrays :",
      (time.time() - initialTime),
      "seconds")

Time taken by Lists : 20.206037998199463 seconds
Time taken by NumPy Arrays : 2.0066680908203125 seconds


# `Comparing various task side by side:`

In [530]:
# Concatenation
print("\nConcatenation:")
 
# list
initialTime = time.time()
list1 = list_1 + list_1
 
# calculating execution time:
print("Time taken by Lists :",
      (time.time() - initialTime),
      "seconds")
  
# Numpy array:
initialTime = time.time()
array = np.concatenate((array_1, array_1),axis = 0)
 
# calculating execution time 
print("Time taken by NumPy Arrays :", 
      (time.time() - initialTime),
      "seconds")
 
# Dot Product:
dot = 0
print("\nDot Product:")
 
# list
initialTime = time.time()
for a, b in zip(list_1, list_1):
        dot = dot + (a * b)
         
# calculating execution time
print("Time taken by Lists :", 
      (time.time() - initialTime),
      "seconds")
  
# NumPy array
initialTime = time.time()
array = np.dot(array_1, array_1)
 
# calculating execution time 
print("Time taken by NumPy Arrays :",
      (time.time() - initialTime),
      "seconds")
 
# Scalar Addtion 
print("\nScalar Addition:")
 
# list
initialTime = time.time()
list1 =[i + 2 for i in range(size)]
 
# calculating execution time
print("Time taken by Lists :",
      (time.time() - initialTime),
      "seconds")
  
# NumPy array
initialTime = time.time()
array1 = array_1 + 2
 
# calculating execution time 
print("Time taken by NumPy Arrays :", 
      (time.time() - initialTime), 
      "seconds")
 
# Deletion
print("\nDeletion: ")
 
# list
initialTime = time.time()
del(list1)
 
# calculating execution time
print("Time taken by Lists :",
      (time.time() - initialTime),
      "seconds")
  
# NumPy array
initialTime = time.time()
del(array1)
 
# calculating execution time 
print("Time taken by NumPy Arrays :", 
      (time.time() - initialTime),
      "seconds")


Concatenation:
Time taken by Lists : 10.227458953857422 seconds
Time taken by NumPy Arrays : 3.2861599922180176 seconds

Dot Product:
Time taken by Lists : 28.33166527748108 seconds
Time taken by NumPy Arrays : 0.24849390983581543 seconds

Scalar Addition:
Time taken by Lists : 15.311026811599731 seconds
Time taken by NumPy Arrays : 1.2025680541992188 seconds

Deletion: 
Time taken by Lists : 7.363348960876465 seconds
Time taken by NumPy Arrays : 0.07433199882507324 seconds


# `Conditional Operators:`

In [526]:
new_arr_ = np.arange(0,12)

print('Boolean where True is >9:',new_arr_ > 9)
print('--------------')
print('Find: 5-7:',new_arr_[(new_arr_> 4) & (new_arr_< 8)])

Boolean where True is >9: [False False False False False False False False False False  True  True]
--------------
Find: 5-7: [5 6 7]


# `Indexing:`

In [520]:
x_ =np.arange(17)

x_desc=np.arange(17,1,-1)

print('elements 2-5:',x_[2:6])

print('elements 1-8 in steps of 2:',x_[1:8:2])

print('repeated elements specified:',x_desc[np.array([4, 4, 2, 9])])

# x_
# x_desc

elements 2-5: [2 3 4 5]
elements 1-8 in steps of 2: [1 3 5 7]
repeated elements specified: [13 13 15  8]


In [499]:
# 2D:
arr = np.array([[0,1,2,3,4,5], [6,7,8,9,10,11]])

print('2nd element of 1st dimension:', arr[0, 1])

2nd element of 1st dimension: 1


In [498]:
#2D:
arr = np.array([[0,1,2,3,4,5], [6,7,8,9,10,11]])

print('6th element of 2nd dimension=', arr[1, 5])

6th element of 2nd dimension= 11


In [495]:
# 3D:
arr_ = np.array([[[0,1,2,3], [4,5,6,7]], [[8,9,10,11], [11,12,13,14]]])

print('3rd element of 2nd array inside first array =',arr_[0, 1, 2])
# arr.shape


3rd element of 2nd array inside first array= 6


# `Import/Export:`

`np.loadtxt('some_file.txt'): From a .txt file
np.genfromtxt('some_file.csv', delimiter=','): From a .csv file
np.savetxt('some_file.txt',arr, delimiter=' '): Writes to .txt file
np.savetxt('some_file.csv', arr, delimiter=','):Writes to .csv file
`


# `Inspect properties:`

`your_array.size(): Returns # of elements in array
your_array.shape(): Returns dimensions of array (rows,columns)
your_array.ndim(): Return dimensions of array
your_array.dtype(): Returns type of elements in array
your_array.astype(dtype): Convert array elements to type dtype
your_array.tolist(): Converts array to Python list
np.info(something of interest: i.e. np.linspace) 
`

# `linspace():`

+ returns evenly spaced numbers over a given/specified interval

In [461]:
np.linspace(0,3,6)

array([0. , 0.6, 1.2, 1.8, 2.4, 3. ])

# `Copy vs view:`

+ `Copy():` will provide a new array, any new changes to the array occur with the copy not the original

+ `View`: just that, a view of the array.

In [463]:
# Check if x vs y, will own its array or not!
arr = np.array([11, 12, 31, 14, 9])

x_copy = arr.copy()
y_view = arr.view()

print(x_copy.base)
print(y_view.base)

None
[11 12 31 14  9]


# `Reshape:`

In [278]:
# reshape
one_d_array = np.array([3,6,9,12])
reshape_one_d_to_two_d=np.reshape(one_d_array,(2,2))


In [543]:
print('(rows X cols)={} ,Number of elements:{}, Dim={}'.format(
one_d_array.shape,one_d_array.size,one_d_array.ndim,one_d_array))
print('----------')
print('reshape (rows X cols)={}, new size:{}, new dim={}'.format(
reshape_one_d_to_two_d.shape,reshape_one_d_to_two_d.size,
reshape_one_d_to_two_d.ndim))

(rows X cols)=(4,) ,Number of elements:4, Dim=1
----------
reshape (rows X cols)=(2, 2), new size:4, new dim=2


# `Sorting:`

+ `kind:` what sorting algorithm you want: 'quicksort', 'mergesort', 'heapsort', 'stable'

+ `axis:` 0= column, 1=row reverse of pandas!

In [221]:
a_ = np.array([[5,6,7,4],
              [9,2,3,7]])

pd.DataFrame(a_)

Unnamed: 0,0,1,2,3
0,5,6,7,4
1,9,2,3,7


In [222]:
pd.DataFrame(np.sort(a_,axis=1,kind='mergresort')) # sort row

Unnamed: 0,0,1,2,3
0,4,5,6,7
1,2,3,7,9


In [214]:
pd.DataFrame(np.sort(a_,axis=0,kind='mergresort')) # sort column

Unnamed: 0,0,1,2,3
0,5,2,3,4
1,9,6,7,7


# `Add/remove elements: from existing array`

In [443]:
# Append: You will need both arrays to have same number of dimensions 

a_p = np.array([[2,9],
             [5,77]])

appd_arry=np.append(a_p,[[3,12]], axis=0)
appd_arry

array([[ 2,  9],
       [ 5, 77],
       [ 3, 12]])

In [444]:
# Delete: By Column and Index=1 for column to drop

np.delete(appd_arry, 1, axis=1)

array([[2],
       [5],
       [3]])

In [445]:
# Drop row 2 based on index=1
np.delete(appd_arry, 1, axis=0)

array([[ 2,  9],
       [ 3, 12]])

In [448]:
# drop multiple rows:
np.delete(appd_arry, [0,2], axis=0)

array([[ 5, 77]])

# `Changing Dimensions using (Squeeze or Expand)`

+ `Expand:` add new axis (rows or columns)

+ `Squeeze:` opposite, but very distinct: will only shrink if axis has single entry
    * `example`: shape=(1,3,1), it will work on axis=0, axis=2 where they are equal to 1

In [343]:
# Squeeze:
four_d_array = np.array([[[[7], [10], [11],[12]]]])

squeeze_axis_0=np.squeeze(four_d_array,axis=0)
squeeze_axis_1=np.squeeze(four_d_array,axis=1)
squeeze_axis_3=np.squeeze(four_d_array,axis=3)

print('four_d_array: \n',four_d_array)
print('\n PAY ATTENTION TO NUMBER OF BRACKETS! \n')
print('four_d_array,squeeze_axis_0: \n',squeeze_axis_0)
print('\n These two look the same but, pay attention to next code block for distinction')
print('\n four_d_array,squeeze_axis_1: \n',squeeze_axis_1)

four_d_array: 
 [[[[ 7]
   [10]
   [11]
   [12]]]]

 PAY ATTENTION TO NUMBER OF BRACKETS! 

four_d_array,squeeze_axis_0: 
 [[[ 7]
  [10]
  [11]
  [12]]]

 These two look the same but, pay attention to next code block for distinction

 four_d_array,squeeze_axis_1: 
 [[[ 7]
  [10]
  [11]
  [12]]]


In [347]:
print('Look for the distinctions here: \n')
print('4D Array: shape{},size={},Dim={}'.format(four_d_array.shape,
                      four_d_array.size,four_d_array.ndim))
print('-----------')
print('Array SQUEEZE axis=0: shape{},size={},Dim={}'.format(squeeze_axis_0.shape,
    squeeze_axis_0.size,squeeze_axis_0.ndim))
print('-----------')
print('Array SQUEEZE axis=1: shape{},size={},Dim={}'.format(squeeze_axis_1.shape,
    squeeze_axis_1.size,squeeze_axis_1.ndim))
print('-----------')
print('Array SQUEEZE axis=3: shape{},size={},Dim={}'.format(squeeze_axis_3.shape,
    squeeze_axis_3.size,squeeze_axis_3.ndim))

Look for the distinctions here: 

4D Array: shape(1, 1, 4, 1),size=4,Dim=4
-----------
Array SQUEEZE axis=0: shape(1, 4, 1),size=4,Dim=3
-----------
Array SQUEEZE axis=1: shape(1, 4, 1),size=4,Dim=3
-----------
Array SQUEEZE axis=3: shape(1, 1, 4),size=4,Dim=3


In [298]:
# Expand:

reg_one_d = np.array([10,20,30,40,50])
reg_one_d_exp_cols = np.expand_dims(reg_one_d,axis=0)
reg_one_d_exp_rows = np.expand_dims(reg_one_d,axis=1)


In [440]:
print('Original 1D array:','\n','Shape',reg_one_d.shape,'\n',
      reg_one_d_exp_cols)
print('Expand along columns:','\n','Shape',reg_one_d_exp_cols.shape,'\n',
      reg_one_d_exp_cols)
print('Expand along rows:','\n','Shape',reg_one_d_exp_rows.shape,'\n',
      reg_one_d_exp_rows)

Original 1D array: 
 Shape (5,) 
 [[10 20 30 40 50]]
Expand along columns: 
 Shape (1, 5) 
 [[10 20 30 40 50]]
Expand along rows: 
 Shape (5, 1) 
 [[10]
 [20]
 [30]
 [40]
 [50]]


# `Vector Math/Concepts:`

In [548]:
# "I"dentity matrix: returns a square matrix of 3X3
i_matrix = np.eye(3,3)

# Zeroes
zroes_=np.zeros((6,2))

# Ones
ones_=np.ones(3)

# Random

# rnd_ = np.random.rand()


In [460]:
# SUM Operations:

mat_ = np.arange(0,25).reshape(5,5)
print('Sum of all values:',mat_.sum())
print('Column Sums:',mat_.sum(axis=0))
print('Row Sums:',mat_.sum(axis=1)) 

Sum of all values: 300
Column Sums: [50 55 60 65 70]
Row Sums: [ 10  35  60  85 110]


# `Flatten and Ravel:`

+ `Flatten:` creates a deep copy
    + `deep copy:` you are creating a new ndarray in memory, any changes made will not effect the original
    
+ `Ravel:` shallow copy
    + `shallow copy:` using a pointer for the memory location, and changes will effect original


In [318]:
ones_two_by_two = np.ones((2,2))

# Flatten:
flat_two_by_two = ones_two_by_two.flatten()

In [317]:
print('ones_two_by_two: shape={}, size= {}, dim= {}'.format(ones_two_by_two.shape,
ones_two_by_two.size,ones_two_by_two.ndim))
print('-------------------')
print('FLAT_two_by_two: shape={}, size= {}, dim= {}'.format(flat_two_by_two.shape,
            flat_two_by_two.size, flat_two_by_two.ndim))
print('---------------')
print('______Making DEEP COPY!!!!______')

ones_two_by_two: shape=(2, 2), size= 4, dim= 2
-------------------
FLAT_two_by_two: shape=(4,), size= 4, dim= 1
---------------
______Making DEEP COPY!!!!______


In [283]:
ones_two_by_two = np.ones((2,2))

# Ravel:
ravel_two_by_two = ones_two_by_two.ravel()

In [296]:

print('ones_two_by_two: shape={}, size= {}, dim= {}'.format(ones_two_by_two.shape,
ones_two_by_two.size,ones_two_by_two.ndim))
print('-------------------')
print('RAVEL_two_by_two: shape={}, size= {}, dim= {}'.format(ravel_two_by_two.shape,
            ravel_two_by_two.size, ravel_two_by_two.ndim))
print('---------------')
print('________Making SHALLOW COPY!!!!_______')

ones_two_by_two: shape=(2, 2), size= 4, dim= 2
-------------------
RAVEL_two_by_two: shape=(4,), size= 4, dim= 1
---------------
________Making SHALLOW COPY!!!!_______


# `Stats:`

`
np.mean(array,axis=?)
np.var(): variance
np.min(): min value of array
np.max(): max
np.std(): standard deviation
np.corrcoeff(): correlation coefficient
np.argmax(): returns index of largest item in array
np.argmin(): reverse
`

# `Stacking and Concatinating:`

`Vstack():` increase rows

`hstack():` increase number of columns

`Dstack():` this will concatonate along a third axis, this uses an index based approach

`Concatenate:` joining arrays along an existing axis

In [None]:
arry_one = np.arange(2,7)
arry_two =np.arange(5,10)

In [356]:
# vstack:
np.vstack((arry_one,arry_two))

array([[2, 3, 4, 5, 6],
       [5, 6, 7, 8, 9]])

In [358]:
# hstack:
np.hstack((arry_one,arry_two))

array([2, 3, 4, 5, 6, 5, 6, 7, 8, 9])

In [375]:
a_2d = np.array([[111,226],[105,256]])
b_2d = np.array([[500,600],[700,800]])
c_3d = np.dstack((a_2d,b_2d))

3


In [396]:
print('Pay attention to brackets for each vector!\n')
print('a_2d=b_2d in quantities: shape{} ,size={}, Dim={},a_2d\n \n{}:'.format(a_2d.shape,
    a_2d.size,a_2d.ndim,a_2d))
print('\n -------------------\n')
print('Dstack with c_3d: shape{} ,size={}, Dim={},c_3d:\n \n{}, look at brackets'.format(c_3d.shape,
    c_3d.size,c_3d.ndim,c_3d))

Pay attention to brackets for each vector!

a_2d=b_2d in quantities: shape(2, 2) ,size=4, Dim=2,a_2d
 
[[111 226]
 [105 256]]:

 -------------------

Dstack with c_3d: shape(2, 2, 2) ,size=8, Dim=3,c_3d:
 
[[[111 500]
  [226 600]]

 [[105 700]
  [256 800]]], look at brackets


In [416]:
# Concate:
a_2d = np.array([[111,226],[105,256]])
b_2d = np.array([[500,600],[700,800]])

# like traditional append to the end of data frame
concate_by_rows=np.concatenate((a_2d, b_2d), axis=0) # Row wise
concate_by_rows

array([[111, 226],
       [105, 256],
       [500, 600],
       [700, 800]])

In [417]:
print('a_2d: shape{}, Dim={}, size={},\nconcate_by_rows: shape{}, Dim={}, size={}'.format(
a_2d.shape,
a_2d.ndim,a_2d.size,
concate_by_rows.shape,concate_by_rows.ndim,concate_by_rows.size))

concate_by_rows

a_2d: shape(2, 2), Dim=2, size=4,
concate_by_rows: shape(4, 2), Dim=2, size=8


array([[111, 226],
       [105, 256],
       [500, 600],
       [700, 800]])

In [418]:
# think adding columns like in a data frame:
concate_by_cols=np.concatenate((a_2d, b_2d), axis=1) #column wise
concate_by_cols

array([[111, 226, 500, 600],
       [105, 256, 700, 800]])

In [425]:
print('a_2d: shape{}, Dim={},\nconcate_by_cols: shape{}, Dim={}'.format(a_2d.shape,
a_2d.ndim,concate_by_cols.shape,concate_by_cols.ndim))

a_2d: shape(2, 2), Dim=2,
concate_by_cols: shape(2, 4), Dim=2


In [426]:
# Think of extending a list instead of append
concate_by_none=np.concatenate((a_2d, b_2d), axis=None)# neither,flatten to single level
concate_by_none

array([111, 226, 105, 256, 500, 600, 700, 800])

In [427]:
print('a_2d: shape{}, Dim={},\nconcate_by_none: shape{}, Dim={}'.format(a_2d.shape,
a_2d.ndim,concate_by_none.shape,concate_by_none.ndim))

a_2d: shape(2, 2), Dim=2,
concate_by_none: shape(8,), Dim=1


# `Broadcasting:` `OUTER` operations

+ Allows you to do arithmetic operations on arrays of mismatched sizes
    + This will be done by stretching the smaller array to fill the void. It will basically wrap around values in the smaller array to make it match the size of larger array.
+ `To make this work: either they need to be the same size or one of them needs to be 1`
    + If this isn't met you will receive: `ValueError()` meaning that there are incompatible shapes

In [550]:
Aa=np.array([1.,2.,3.,4.])
print('shape, Aa: ',Aa.shape,', Bb shape: ',Bb.shape)

Bb=np.array([[.1,.2,.3,.4],[.5,.6,.7,.8]])
print('(Aa + Bb) shape: ',(Aa+Bb).shape)

Aa+Bb


shape, Aa:  (4,) , Bb shape:  (2, 4)
(Aa + Bb) shape:  (2, 4)


array([[1.1, 2.2, 3.3, 4.4],
       [1.5, 2.6, 3.7, 4.8]])

In [555]:
#2D
a = np.array([[ 0.0,  0.0,  0.0],
            [10.0, 10.0, 10.0],
           [20.0, 20.0, 20.0],
           [30.0, 30.0, 30.0]])

# 1D
b = np.array([1.0, 2.0, 3.0])

print('a shape:',a.shape,', b shape:',b.shape)
print('a+b shape:',(a+b).shape)
a+b


a shape: (4, 3) , b shape: (3,)
a+b shape: (4, 3)


array([[ 1.,  2.,  3.],
       [11., 12., 13.],
       [21., 22., 23.],
       [31., 32., 33.]])

# `Now a thoughtful example:`

+ Mental gymnastics are fun, sometimes...

In [202]:
# 1D
A=np.array([1.,2.,3.,4.,5.,6.])

# 2D
B=np.array([[.1,.2,.3,.4],[.5,.6,.7,.8]])

print('B shape:',B.shape,', A shape:',A.shape)
n=(A+np.zeros((2,6)))

print('n transpose shape:',n.T.shape)
print('change axis, n.T shape:',n[np.newaxis,:].T.shape)


B shape: (2, 4) , A shape: (6,)
n transpose shape: (6, 2)
change axis, n.T shape: (6, 2, 1)


In [264]:
print('n[np.newaxis,:].T.shape {} + B.shape{} = result shape{}'.format(n[np.newaxis,:].T.shape,B.shape,
                    (n[np.newaxis,:].T+B).shape))
print('------------------')
print('# of dimensions: n[np.newaxis,:].T.ndim =[{}] +\n B.ndim=[{}] \n= result dimensions=[{}]'.format(n[np.newaxis,:].T.ndim,B.ndim,
(n[np.newaxis,:].T+B).ndim))

n[np.newaxis,:].T+B

n[np.newaxis,:].T.shape (6, 2, 1) + B.shape(2, 4) = result shape(6, 2, 4)
------------------
# of dimensions: n[np.newaxis,:].T.ndim =[3] +
 B.ndim=[2] 
= result dimensions=[3]


array([[[1.1, 1.2, 1.3, 1.4],
        [1.5, 1.6, 1.7, 1.8]],

       [[2.1, 2.2, 2.3, 2.4],
        [2.5, 2.6, 2.7, 2.8]],

       [[3.1, 3.2, 3.3, 3.4],
        [3.5, 3.6, 3.7, 3.8]],

       [[4.1, 4.2, 4.3, 4.4],
        [4.5, 4.6, 4.7, 4.8]],

       [[5.1, 5.2, 5.3, 5.4],
        [5.5, 5.6, 5.7, 5.8]],

       [[6.1, 6.2, 6.3, 6.4],
        [6.5, 6.6, 6.7, 6.8]]])

In [234]:
# Element wise: multiplication (OUTER)
print('Number of New Dimensions:',(B*n[np.newaxis,:].T).ndim)
B*n[np.newaxis,:].T

Number of New Dimensions: 3


array([[[0.1, 0.2, 0.3, 0.4],
        [0.5, 0.6, 0.7, 0.8]],

       [[0.2, 0.4, 0.6, 0.8],
        [1. , 1.2, 1.4, 1.6]],

       [[0.3, 0.6, 0.9, 1.2],
        [1.5, 1.8, 2.1, 2.4]],

       [[0.4, 0.8, 1.2, 1.6],
        [2. , 2.4, 2.8, 3.2]],

       [[0.5, 1. , 1.5, 2. ],
        [2.5, 3. , 3.5, 4. ]],

       [[0.6, 1.2, 1.8, 2.4],
        [3. , 3.6, 4.2, 4.8]]])

# `Dot Product: 'Inner'`

In [226]:
np.dot(B.T,n)

array([[0.6, 1.2, 1.8, 2.4, 3. , 3.6],
       [0.8, 1.6, 2.4, 3.2, 4. , 4.8],
       [1. , 2. , 3. , 4. , 5. , 6. ],
       [1.2, 2.4, 3.6, 4.8, 6. , 7.2]])

# <font color=red>Like</font>, share &

# <font color=red>SUB</font>scribe

# Citations & Help:

# ◔̯◔

https://towardsdatascience.com/lets-talk-about-numpy-for-datascience-beginners-b8088722309f

https://numpy.org/doc/stable/user/whatisnumpy.html

https://towardsdatascience.com/first-step-in-data-science-with-python-numpy-5e99d6821953

https://www.dataquest.io/blog/numpy-cheat-sheet/

https://www.analyticsvidhya.com/blog/2020/04/the-ultimate-numpy-tutorial-for-data-science-beginners/

https://numpy.org/devdocs/user/theory.broadcasting.html

https://thispointer.com/delete-elements-rows-or-columns-from-a-numpy-array-by-index-positions-using-numpy-delete-in-python/