# Numpy

(1) Faster than list 

(2) Less Memory

(3) Easy Maths operations

(4) Used in AI, DS, DA

In [19]:
python_list = [1,2,3,4,5]
print(python_list)                      # print with comma 

import numpy as np
numpy_array = np.array([1,2,3,4,5])
print(numpy_array)                      # print without comma

[1, 2, 3, 4, 5]
[1 2 3 4 5]


In [22]:
# One dimensional array

import numpy as np
arr_1d = np.array([10,20,30,40,50])

print(arr_1d)

[10 20 30 40 50]


In [23]:
# Two dimensional array :: [ Matrics = 2D array ]

import numpy as np
arr_2d = np.array([[1,2,3],
                  [4,5,6],
                  [7,8,9]])
print(arr_2d)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [24]:
# Multi dimensional array

import numpy as np
arr_3d = np.array([[[10,20,30],
                   [40,50,60],
                   [70,80,90]]])
print(arr_3d)

[[[10 20 30]
  [40 50 60]
  [70 80 90]]]


### How do we create an array in Numpy?

##### Creating numpy Arrays

In [25]:
# covert list into numpy array

import numpy as np
arr = np.array([1,2,3,4])
print(arr)

[1 2 3 4]


In [27]:
# creating array with default values
# np.zeros(shape) : Use to set default value for future ( set zero )

import numpy as np
zeros_array = np.zeros(3)

print(zeros_array)

[0. 0. 0.]


In [28]:
# np.ones(shape) : use to set default value for future ( set one )

import numpy as np
ones_array = np.ones((2,3))

print(ones_array)

[[1. 1. 1.]
 [1. 1. 1.]]


In [29]:
# np.full(shape,value) : use to set default value for future ( set entered value )

import numpy as np
filled_array = np.full((2,2),7)

print(filled_array)

[[7 7]
 [7 7]]


In [30]:
# creating sequences of array in numpy
# arange(start, stop, step) -----> will return numpy array

import numpy as np
arr = np.arange(1,10,2)

print(arr)

[1 3 5 7 9]


In [31]:
# creating identity matrices
# eye(shape)

import numpy as np
identity_matrix = np.eye(3)

print(identity_matrix)


[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


### Which array properties can we check, and how do we perform basic operations?

##### Array Properties & Operations

In [32]:
#Cheacking in Array

In [2]:
# Shape : Give rows and columns

import numpy as np
arr_2d = np.array([[1,2,3],
                   [4,5,6]])

print(arr_2d.shape)

(2, 3)


In [33]:
# Size : will return total number of elements in array

import numpy as np
arr = np.array([[10,20,30], 
                [40,50,60]])

print(arr.size)

6


In [34]:
# ndim : will return number of dimensions 

import numpy as np
arr_1d = np.array([1,2,3])
arr_2d = np.array([[1,2,3],[4,5,6]])
arr_3d = np.array([[[1,2],[3,4],[5,6],[7,8]]])

print(arr_1d.ndim)
print(arr_2d.ndim)
print(arr_3d.ndim)

1
2
3


In [35]:
# dtype : Data type of elements (ex : int, str, float)

import numpy as np
arr = np.array([10,20,30.5,40])

print(arr.dtype)

float64


In [36]:
# Changing in Array

In [37]:
# astype(new.type) : To change the datatype of elements (ex : age='100'[str] to age=100[int])

import numpy as np
arr = np.array([1.2,2.5,3.8])
print(f"Before Changing : {arr.dtype}")

int_arr = arr.astype(int)

print(f"int_array : {int_arr}")
print(f"After chaning : {int_arr.dtype}")

Before Changing : float64
int_array : [1 2 3]
After chaning : int32


##### Mathematical Operations in numpy array : Fast execution array without loops

In [38]:
import numpy as np
arr = np.array([10,20,30])

print(arr + 5)
print(arr * 2)
print(arr ** 2)

[15 25 35]
[20 40 60]
[100 400 900]


In [39]:
# Aggregation functions (summarise data)

import numpy as np
arr = np.array([10,20,30,40,50])

print(f"Sum : {np.sum(arr)}")
print(f"Mean : {np.mean(arr)}")
print(f"Min : {np.min(arr)}")
print(f"Max : {max(arr)}")
print(f"Standard Deviation : {np.std(arr)}")
print(f"Variance : {np.var(arr)}")

Sum : 150
Mean : 30.0
Min : 10
Max : 50
Standard Deviation : 14.142135623730951
Variance : 200.0


### How do we index and slice arrays to access specific elements or ranges?

##### Indexing & Slicing Arrays

In [42]:
# Indexing

In [40]:
"""
array[index] #1d Array
array[row, column] #2d Array
"""

import numpy as np
arr = np.array([10,20,30,40,50])

print(arr[0])                    # First element
print(arr[2])                    # Third element
print(arr[-1])                   # Last element

print(arr[10])                   # index error

10
30
50


IndexError: index 10 is out of bounds for axis 0 with size 5

In [41]:
# Slicing

In [44]:
"""
array[start : stop : step]    -----> (start) to (end -1) with (step)
array[start : stop]           -----> (start) to (end -1) with (default step 1)
array[stop]                   -----> (default start 0) to (end -1) (default step 1)
negative step (-1) : print revirse
"""

'\narray[start : stop : step]    -----> (start) to (end -1) with (step)\narray[start : stop]           -----> (start) to (end -1) with (default step 1)\narray[stop]                   -----> (default start 0) to (end -1) (default step 1)\nnegative step (-1) : print revirse\n'

In [45]:
import numpy as np
arr = np.array([10,20,30,40,50,60])

print(arr[1:5])     # index 1 to 4
print(arr[ :4])     # index 0 to 3
print(arr[ : :2])   # every second element
print(arr[ : :-1])  # revirse array

[20 30 40 50]
[10 20 30 40]
[10 30 50]
[60 50 40 30 20 10]


In [46]:
# fancy indexing : selecting multiple elements at ones : don't change orignal array

In [47]:
import numpy as np
arr = np.array([10,20,30,40,50,60])

print(arr[[0,2,4]])    # 10 30 50

[10 30 50]


In [49]:
import numpy as np
arr = np.array([10,20,30,40,50,60])

print(arr[arr > 25])   # follow condition

[30 40 50 60]


### How can we reshape and manipulation arrays for different data needs?

##### Reshaping & Manipulation

In [1]:
# Reshaping

In [2]:
"""
Reshaping : change the dimensions without changing the data 
reshape(rows, columns) specify new shape
it reshapes if dimensions match 
"""

'\nReshaping : change the dimensions without changing the data \nreshape(rows, columns) specify new shape\nit reshapes if dimensions match \n'

In [6]:
# Use to convert 1d Arry to multi-dimensional array
# Reshaping does not create a copy, it returns view

import numpy as np
arr = np.array([1,2,3,4,5,6])

reshaped_arr = arr.reshape(2,3)

print(reshaped_arr)             # changing in a reshaped array afftects the arr

[[1 2 3]
 [4 5 6]]


In [7]:
# flattening

In [8]:
"""
# Use to convert multi-dimensional array to 1d Array
.ravel() --> views (affect the original array)
.flatten() --> copy (does not affect the original)
""" 

'\n# Use to convert multi-dimensional array to 1d Array\n.ravel() --> views (affect the original array)\n.flatten() --> copy (does not affect the original)\n'

In [9]:
import numpy as np
arr_2d = np.array([[1,2,3],
                  [4,5,6]])

print(arr_2d.ravel())
print(arr_2d.flatten())

[1 2 3 4 5 6]
[1 2 3 4 5 6]


### What methods are available for modifying arrays (inserting, appending, deleting)?

##### Array Modification

In [21]:
# Inserting Elements

In [10]:
"""
np.insert(array, index, value, axis=None)

array = original array
index = position where you want to insert value 
value = value that you want to insert 
axis = None (Flatten array) 1d Array
axis = 0 (row-wise) Default row-wise
axis = 1 (column-wise)
"""

'\nnp.insert(array, index, value, axis=None)\narray = original array\nindex = position where you want to insert value \nvalue = value that you want to insert \naxis = None (Flatten array) 1d Array\naxis = 0 (row-wise)\naxis = 1 (column-wise)\n'

In [12]:
# Insert in 1d array

import numpy as np
arr = np.array([10,20,30,40,50,60])
print(f"Befor inserting : {arr}")

new_arr = np.insert(arr, 2, 100, axis=0)

print(f"After inserting : {new_arr}")

Befor inserting : [10 20 30 40 50 60]
After inserting : [ 10  20 100  30  40  50  60]


In [20]:
# Insert in 2d array

import numpy as np
arr_2d = np.array([[1,2],
                  [3,4]])
print(f"Befor inserting :\n{arr_2d}")

# Insert a new row at index 1
new_row_arr_2d = np.insert(arr_2d, 1, [5,6], axis=0)
print(f"After inserting row-wise :\n{new_row_arr_2d}")

# Insert a new column at index 1
new_column_arr_2d = np.insert(arr_2d, 1, [5,6], axis=1)
print(f"After inserting column-wise :\n{new_column_arr_2d}")

# Insert a new flatten data at index 1
new_flatten_arr_2d = np.insert(arr_2d, 1, [5,6], axis=None)
print(f"After inserting flatten array :\n{new_flatten_arr_2d}")

Befor inserting :
[[1 2]
 [3 4]]
After inserting row-wise :
[[1 2]
 [5 6]
 [3 4]]
After inserting column-wise :
[[1 5 2]
 [3 6 4]]
After inserting flatten array :
[1 5 6 2 3 4]


In [22]:
# Appending Elements

In [23]:
"""
np.append(array, value)

array = original array 
value = value that you want to insert
"""

'\nnp.append(array, value)\narray = original array \nvalue = value that you want to insert\n'

In [24]:
import numpy as np
arr = np.array([10,20,30])
new_arr = np.append(arr, [40,50,60])

print(new_arr)

[10 20 30 40 50 60]


In [25]:
# Concatenate 2 arrays

In [26]:
"""
np.concatenate((array1,array2), axis = 0)

vstack() : axis 0 ---> vertical stacking 
hstack() : axis 1 ---> horizontal stacking
"""

'\nnp.concatenate((array1,array2), axis = 0)\n\naxis 0 ---> vertical stacking\naxis 1 ---> horizontal stacking\n'

In [27]:
import numpy as np
arr1 = np.array([1,2,3])
arr2 = np.array([4,5,6])

new_arr1 = np.concatenate((arr1,arr2))
print(new_arr1)

[1 2 3 4 5 6]


In [30]:
# Stacking Along Rows = hstack()
# number of column must be same 
import numpy as np

arr1 = np.array([[1, 1], 
                 [2, 2]])

arr2 = np.array([[3, 3], 
                 [4, 4]])

new_arr_1 = np.concatenate((arr1, arr2), axis=1)
new_arr_2 = np.hstack((arr1,arr2))

print(new_arr_1)
print(new_arr_2)

[[1 1 3 3]
 [2 2 4 4]]
[[1 1 3 3]
 [2 2 4 4]]


In [31]:
# Stacking Along Columns = vstack()
# number of columns must be same
import numpy as np

arr1 = np.array([[1, 1],
                [2, 2]])

arr2 = np.array([[3, 3],
                [4, 4]])

new_arr_1 = np.concatenate((arr1, arr2), axis=0)
new_arr_2 = np.vstack((arr1, arr2))

print(new_arr_1)
print(new_arr_2)

[[1 1]
 [2 2]
 [3 3]
 [4 4]]
[[1 1]
 [2 2]
 [3 3]
 [4 4]]


In [32]:
# Removing Elements of array

In [33]:
"""
np.delete(array, index, axis = None)

array = original array
index = position where you want to insert value  
axis = None (Flatten array) 1d Array
"""

'\nnp.delete(array, index, axis = None)\n\narray = original array\nindex = position where you want to insert value  \naxis = None (Flatten array) 1d Array\n'

In [34]:
# Removing from 1d array
import numpy as np
arr = np.array([10,20,30,40,50,60])
print(f"Before removing element : {arr}")

new_arr = np.delete(arr, 0)
print(f"After removing element : {new_arr}")

Before removing element : [10 20 30 40 50 60]
After removing element : [20 30 40 50 60]


In [35]:
# Removing from 2d array
import numpy as np
arr_2d = np.array([[1,2,3],
                  [4,5,6]])

new_arr_2d = np.delete(arr_2d, 0, axis = 0)

print(new_arr_2d)

[[4 5 6]]


In [None]:
# Spliting

In [36]:
"""
np.split() ---> equal
np.hsplit() ---> horizontally
np.vsplit() ---> vertical 
"""

'\nnp.split() ---> equal\nnp.hsplit() ---> horizontally\nnp.vsplit() ---> vertical \n'

In [37]:
import numpy as np
arr = np.array([10,20,30,40,50,60])

print(np.split(arr, 2))      # 2 is part : here if part is 20 than it gives value error

[array([10, 20, 30]), array([40, 50, 60])]


### How do broadcasting and vectorization make array operations more efficient?

##### Broadcasting & Vectorisation

In [38]:
# Without broadcasting (problem) : loops are slow

In [39]:
prices = [100,200,300]

discount = 10 #10% discount

final_prices = []

for price in prices:
    final_price = price - (price * discount/100)
    final_prices.append(final_price)
    
print(final_prices)

[90.0, 180.0, 270.0]


In [41]:
# Broadcasting (solution) : without loops : fast

In [42]:
import numpy as np

prices = np.array([100,200,300])
discount = 10 # scalar single value

final_prices = prices - (prices * discount/100)

print(final_prices)

[ 90. 180. 270.]


In [43]:
"""
Broadcasting 3 rules
1) matching dimensions           ( [1,2,3] + [4,5,6] = [5,7,9])
2) expanding single element      ( [1,2,3] + 10 = [11,12,13])
3) incompatible shapes           ( [1,2,3] + [1,2] = error)
"""

'\nBroadcasting 3 rules\n1) matching dimensions           ( [1,2,3] + [4,5,6] = [5,7,9])\n2) expanding single element      ( [1,2,3] + 10 = [11,12,13])\n3) incompatible shapes           ( [1,2,3] + [1,2] = error)\n'

In [46]:
# 1) Matching dimensions
# 1d to 2d array broadcasting

import numpy as np
matrix = np.array([[1,2,3],
                  [4,5,6]])   # 2x3 matrix

vector = np.array([10,20,30])  #1d array

result = matrix + vector

print(result)

[[11 22 33]
 [14 25 36]]


In [47]:
# 2) Expanding single element
import numpy as np
arr = np.array([100,200,300])

result = arr * 2

print(result)

[200 400 600]


In [48]:
# 3) Incompatible shapes

import numpy as np
arr1 = np.array([[1,2,3],
                [4,5,6]])     # shape(2,3)

arr2 = np.array([1,2])        # shape(2,)  

result = arr1 + arr2

print(result)                 # Error

ValueError: operands could not be broadcast together with shapes (2,3) (2,) 

In [49]:
# Vectorisation

In [50]:
# slow in big data
list1 = [1,2,3]
list2 = [4,5,6]

result = [x+y for x,y in zip(list1,list2)]

print(result)

[5, 7, 9]


In [52]:
# Using numpy array : fast
import numpy as np
arr1 = np.array([1,2,3])
arr2 = np.array([4,5,6])

result = arr1 + arr2 

print(result)

[5 7 9]


In [53]:
# Apply single value to entire array
import numpy as np
arr = np.array([10,20,30])

multiplied = arr * 3

print(multiplied)

[30 60 90]


### What are some ways to handle missing values in Numpy arrays?

##### Handling Missing Values

In [1]:
# NAN : Not a number
"""
3 builtin function
1) np.isnan    ---> detect missing values (returns boolean values)
2) np.nan_to_num()   ---> replace nan values
3) np.isinf()  ---> detect infinite values
"""

'\n3 builtin function\n1) np.isnan    ---> detect missing values (returns boolean values)\n2) np.nan_to_num()   ---> replace nan values\n3) np.isinf()  ---> detect infinite values\n'

In [2]:
import numpy as np

arr = np.array([1, 2, np.nan, 4, np.nan, 6])

print(np.isnan(arr))

#print(np.nan == np.nan)      # We can't compare Nan value directly

[False False  True False  True False]


In [5]:
# np.nan_to_num(array, nan=value) Default = 0

import numpy as np

arr = np.array([1, 2, np.nan, 4, np.nan, 6])

cleaned_arr_0 = np.nan_to_num(arr)
print(f"Default valuec : {cleaned_arr}")

cleaned_arr_100 = np.nan_to_num(arr, nan=100)
print(f"User entered value : {cleaned_arr_100}")

Default valuec : [1. 2. 0. 4. 0. 6.]
User entered value : [  1.   2. 100.   4. 100.   6.]


In [6]:
# np.isinf() 10^1000        ---> infinite number
# one divide by 0 (1/0)     ---> infinite number

import numpy as np

arr = np.array([1, 2, np.inf, 4, -np.inf, 6])

print(np.isinf(arr))

[False False  True False  True False]


In [7]:
# replace infinite values with finite values

import numpy as np

arr = np.array([1, 2, np.inf, 4, -np.inf, 6])

print(np.isinf(arr))

cleaned_arr = np.nan_to_num(arr, posinf=1000, neginf=-1000)

print(cleaned_arr)

[False False  True False  True False]
[    1.     2.  1000.     4. -1000.     6.]
