<H1> Numpy Basics
<H4> -- By Sajal Kapoor

<H3><u>Importing Numpy

In [2]:
import numpy as np

<H3><u> Basic Numpy Functionalities

In [3]:
# Creation of a numpy array
arr=np.array([1,2,3,4,5,6,7,8,9,10])
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [4]:
#Creation of array using arange() function
arr=np.arange(1,20,2)       # Start, Stop, Step
arr

array([ 1,  3,  5,  7,  9, 11, 13, 15, 17, 19])

In [5]:
# Create an array with evenly spaced values
arr=np.linspace(0,1,5)      # Start, Stop, No. of values
arr

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [6]:
# Create a logarithmic scaled value array with equally spaced values
arr=np.logspace(1,3,4)      # Start, Stop, No. of values. Base is 10 by default (10^1 to 10^3 in this example).
arr

array([  10.        ,   46.41588834,  215.443469  , 1000.        ])

In [7]:
# Check for the dimensions of an array
arr=np.array([[12, 34, 1, 2, 3], [1, 2, 3, 4, 5]])      # Blue bracket-> Rows, Elements separated by commas-> Columns
arr.ndim

2

In [8]:
# Check for the shape of an array
arr=np.array([[12, 34, 1, 2, 3], [1, 2, 3, 4, 5]])
arr.shape

(2, 5)

In [9]:
# Check for the size (no. of elements) of an array
arr=np.array([[12, 34, 1, 2, 3], [1, 2, 3, 4, 5]])
arr.size

10

In [10]:
# Check for the item size (in bytes) of an array
arr=np.array([[12, 34, 1, 2, 3], [1, 2, 3, 4, 5]])
arr.itemsize

8

In [11]:
# Check datatype of an array
arr=np.array([1,2,3,4,5,6,7,8,9,10])
arr.dtype

dtype('int64')

In [12]:
# Create a Zero Array
arr=np.zeros(5)
arr

array([0., 0., 0., 0., 0.])

In [None]:
# Check for a Non-Zero Value in an Array
arr=np.array([0,1,5,7,10,0,0,3,0,8])
print(arr.nonzero())    # Prints the Indexes at which the nonzero elements are present

(array([1, 2, 3, 4, 7, 9]),)


In [13]:
# Create a Multidimensional Zero Array
arr=np.zeros([2,3])     # rows, columns
arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [14]:
# Create a Ones Array
arr=np.ones(5)
arr

array([1., 1., 1., 1., 1.])

In [15]:
# Create a Multidimensional Ones Array
arr=np.ones([2,3])     # rows, columns
arr

array([[1., 1., 1.],
       [1., 1., 1.]])

In [16]:
# By default, the datatype in zeros and ones array is float. To change it, use dtype parameter.
arr=np.zeros([2,3], dtype=int)     # rows, columns
print(arr, "\n")
arr=np.ones([2,3], dtype=int)     # rows, columns
print(arr)

[[0 0 0]
 [0 0 0]] 

[[1 1 1]
 [1 1 1]]


In [17]:
# Create an array full of the value of our choice.
arr=np.full([3,4],7)    # shape, value
arr

array([[7, 7, 7, 7],
       [7, 7, 7, 7],
       [7, 7, 7, 7]])

In [18]:
# Create an Empty/Uninitialized array
arr=np.empty([2,3])
arr

array([[5.e-324, 5.e-324, 5.e-324],
       [5.e-324, 5.e-324, 5.e-324]])

In [19]:
# Create a random float array
arr=np.random.rand(2,3)     # float values ranges from 0 to 1
arr

array([[0.39012419, 0.64126782, 0.37502651],
       [0.48856828, 0.92829251, 0.43366394]])

In [20]:
# Create an array with random values from a standard normal distribution curve (A curve whose mean is always 0 and standard deviation is always 1)
arr=np.random.randn(2,3)
arr

array([[ 0.6388636 ,  0.73098423, -0.63250605],
       [-1.19773724,  2.07394919,  0.24424123]])

In [21]:
# Create an array with random integer values
arr=np.random.randint(10, 300, size=(2,4))   # Start, Stop, Size. By default, the size is 0 (means it gives ony a single value as the output)
arr

array([[233, 127, 265, 205],
       [ 14, 231,  28, 185]], dtype=int32)

<H3><u>Typecasting

In [22]:
arr=np.array([1,2,3,4,5,6,7,8,9,10])
arr.dtype

dtype('int64')

In [23]:
newarr=arr.astype(float)    # Change datatype to float
print(newarr)
newarr.dtype

[ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]


dtype('float64')

<H3><u>Array Reshaping

In [24]:
# Reshape
arr=np.array([[1,2,3,4,5],[6,7,8,9,10]])
print(arr)
print(arr.shape, "\n")
newarr=arr.reshape(5,2)    # rows, columns
print(newarr)
print(newarr.shape)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]]
(2, 5) 

[[ 1  2]
 [ 3  4]
 [ 5  6]
 [ 7  8]
 [ 9 10]]
(5, 2)


In [25]:
# Ravel (Convert to 1D array, A.K.A Flattening the array)
# It returns a view of the original array whenever possible. So, modifying one will modify the other also.
arr=np.array([[1,2,3,4,5],[6,7,8,9,10]])
print(arr)
print(arr.shape, "\n")
newarr=arr.ravel()
print(newarr)
print(newarr.shape)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]]
(2, 5) 

[ 1  2  3  4  5  6  7  8  9 10]
(10,)


In [26]:
# Flatten (Convert to 1D array, A.K.A Flattening the array)
# It always returns a copy of the original array. So, modifying one will NOT modify the other.
arr=np.array([[1,2,3,4,5],[6,7,8,9,10]])
print(arr)
print(arr.shape, "\n")
newarr=arr.flatten()
print(newarr)
print(newarr.shape)

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]]
(2, 5) 

[ 1  2  3  4  5  6  7  8  9 10]
(10,)


<H3><u>Arithmetic Operations on Arrays

In [27]:
arr1=np.array([[1,2,3],[4,5,6]])
arr2=np.array([[7,8,9],[10,11,12]])

In [28]:
# Addition
print(arr1+arr2)

[[ 8 10 12]
 [14 16 18]]


In [29]:
# Subtraction
print(arr1-arr2, "\n")
print(arr2-arr1)

[[-6 -6 -6]
 [-6 -6 -6]] 

[[6 6 6]
 [6 6 6]]


In [30]:
# Division
print(arr1/arr2, "\n")
# If there is a ZeroDivisionError, the element in the resultant array is converted to "inf" or "-inf", depending upon the nature of infinity.
arr2[1][2]=0
print(arr1/arr2)

[[0.14285714 0.25       0.33333333]
 [0.4        0.45454545 0.5       ]] 

[[0.14285714 0.25       0.33333333]
 [0.4        0.45454545        inf]]


  print(arr1/arr2)


In [31]:
# Floor Division (or Integer Division)
print(arr2//arr1)
# If there is a ZeroDivisionError, the element in the resultant array is converted to 0.

[[7 4 3]
 [2 2 0]]


In [32]:
# Multiplication
print(arr1*arr2)

[[ 7 16 27]
 [40 55  0]]


In [33]:
# Modulus
print(arr1%arr2)
# If there is a ZeroDivisionError, the element in the resultant array is converted to 0.

[[1 2 3]
 [4 5 0]]


  print(arr1%arr2)


In [34]:
# Exponentiation
print(arr1**2, "\n")
print(arr2**2, "\n")
print(arr1**arr2)

[[ 1  4  9]
 [16 25 36]] 

[[ 49  64  81]
 [100 121   0]] 

[[       1      256    19683]
 [ 1048576 48828125        1]]


<H3><u>Universal Functions (Ufuncs)

In [35]:
arr=np.array([1,4,9,16,25])
arr

array([ 1,  4,  9, 16, 25])

In [36]:
# Square Root
print(np.sqrt(arr))

[1. 2. 3. 4. 5.]


In [37]:
# Exponential (e^x)
print(np.exp([2,3]), "\n")
print(np.exp(arr))

[ 7.3890561  20.08553692] 

[2.71828183e+00 5.45981500e+01 8.10308393e+03 8.88611052e+06
 7.20048993e+10]


In [38]:
# Sine Function
angles=np.array([0, np.pi, np.pi/2])        # Pi is represented as np.pi in numpy
print(np.sin(angles))

[0.0000000e+00 1.2246468e-16 1.0000000e+00]


<H3><u>Multidimensional Slicing / Matrix Slicing

In [39]:
arr=np.array([[1,2,3],[4,5,6],[7,8,9]])
arr

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [40]:
print(arr[0:2,0:2], "\n") # StartRow:EndRow, StartColumn:EndColumn (EndRow and EndColumn not included)
print(arr[1:,1:])

[[1 2]
 [4 5]] 

[[5 6]
 [8 9]]


In [41]:
# Index Arrays / Advanced Arrays slicing
# np.take -> A built in function to take elements from an array along an axis.
indices = [0, 2]
print(np.take(arr, indices, axis=0), "\n")   # axis=0 -> rows
print(np.take(arr, indices, axis=1))   # axis=1 -> columns

[[1 2 3]
 [7 8 9]] 

[[1 3]
 [4 6]
 [7 9]]


<H3><u>Iterating through numpy arrays

In [42]:
arr=np.array([[[1,2,3,4,5],[6,7,8,9,10]],[[11,12,13,14,15],[16,17,18,19,20]],[[21,22,23,24,25],[26,27,28,29,30]]])
arr

array([[[ 1,  2,  3,  4,  5],
        [ 6,  7,  8,  9, 10]],

       [[11, 12, 13, 14, 15],
        [16, 17, 18, 19, 20]],

       [[21, 22, 23, 24, 25],
        [26, 27, 28, 29, 30]]])

In [43]:
for i in np.nditer(arr):
    print(i, end=' ')

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 

In [44]:
for index, i in np.ndenumerate(arr):
    print(index, i)

(0, 0, 0) 1
(0, 0, 1) 2
(0, 0, 2) 3
(0, 0, 3) 4
(0, 0, 4) 5
(0, 1, 0) 6
(0, 1, 1) 7
(0, 1, 2) 8
(0, 1, 3) 9
(0, 1, 4) 10
(1, 0, 0) 11
(1, 0, 1) 12
(1, 0, 2) 13
(1, 0, 3) 14
(1, 0, 4) 15
(1, 1, 0) 16
(1, 1, 1) 17
(1, 1, 2) 18
(1, 1, 3) 19
(1, 1, 4) 20
(2, 0, 0) 21
(2, 0, 1) 22
(2, 0, 2) 23
(2, 0, 3) 24
(2, 0, 4) 25
(2, 1, 0) 26
(2, 1, 1) 27
(2, 1, 2) 28
(2, 1, 3) 29
(2, 1, 4) 30


<H3><u> View v/s Copy

<p> 
In a <b>View</b>, if a value is changed, then the value in the original array also gets changed.
Whereas, in a <b>Copy</b>, if a value is changed, then the original array is not affected.
</p>

In [47]:
arr=np.array([1,2,3,4,5])
arr

array([1, 2, 3, 4, 5])

In [50]:
view=arr[1:4]
view

array([2, 3, 4])

In [52]:
view[0]=20
print(view, "\n")
print(arr)

[20  3  4] 

[ 1 20  3  4  5]


In [53]:
copy=arr[1:4].copy()
copy

array([20,  3,  4])

In [55]:
copy[0]=35
print(copy, "\n")
print(arr)

[35  3  4] 

[ 1 20  3  4  5]


<H3><u>Transpose of a Matrix

In [56]:
arr=np.array([[1,2,3,4,5],[6,7,8,9,10]])
arr

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

In [57]:
print(arr.transpose())

[[ 1  6]
 [ 2  7]
 [ 3  8]
 [ 4  9]
 [ 5 10]]


<H3><u>Swap Axes in a matrix

In [60]:
arr=np.array([[[1,2,3,4,5],[6,7,8,9,10]]])
arr.shape

(1, 2, 5)

In [None]:
swap=np.swapaxes(arr, 0, 1)     # Array, Index of the 1st axis, Index of the 2nd axis.  Index -> index of the axes in the tuple of "array.shape"
swap.shape

(2, 1, 5)

<H3><u>Concatenation of Arrays

In [70]:
arr1=np.array([[1,2,3,4,5],[6,7,8,9,10]])
arr2=np.array([[11,12,13,14,15],[16,17,18,19,20]])
print(arr1,arr2, sep="\n\n")

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]]

[[11 12 13 14 15]
 [16 17 18 19 20]]


In [72]:
combi=np.concatenate((arr1,arr2))
combi

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20]])

In [None]:
# Vstack Function -> Concatenates and converts the arrays into a single Vertical Stack
print(np.vstack((arr1,arr2)))

[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]
 [16 17 18 19 20]]


In [76]:
# Hstack Function -> Concatenates and converts the arrays into a single Horizontal Stack
print(np.hstack((arr1,arr2)))

[[ 1  2  3  4  5 11 12 13 14 15]
 [ 6  7  8  9 10 16 17 18 19 20]]


In [79]:
# Stack Function -> Concatenates and converts the arrays into a stack. 
# The Axes have to be mentioned in the function.
# Axis=0 -> Row wise.
# Axis=1 -> Column Wise.
print("Row Wise:\n", np.stack((arr1,arr2), axis=0), "\n")
print("Column Wise:\n", np.stack((arr1,arr2), axis=1))

Row Wise:
 [[[ 1  2  3  4  5]
  [ 6  7  8  9 10]]

 [[11 12 13 14 15]
  [16 17 18 19 20]]] 

Column Wise:
 [[[ 1  2  3  4  5]
  [11 12 13 14 15]]

 [[ 6  7  8  9 10]
  [16 17 18 19 20]]]


<H3><u>Splitting of Arrays

In [80]:
arr=np.array([[1,2,3,4,5],[6,7,8,9,10]])
arr

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

In [None]:
print(np.split(arr, 2))     # Array, No. of parts

[array([[1, 2, 3, 4, 5]]), array([[ 6,  7,  8,  9, 10]])]


In [None]:
# Hsplit function -> Splits the array into Horizontal Arrays
print(np.hsplit(arr, 5))    # Array, No. of parts

[array([[1],
       [6]]), array([[2],
       [7]]), array([[3],
       [8]]), array([[4],
       [9]]), array([[ 5],
       [10]])]


In [104]:
# Vsplit function -> Splits the array into Vertical Arrays
print(np.vsplit(arr, 2))    # Array, No. of parts

[array([[1, 2, 3, 4, 5]]), array([[ 6,  7,  8,  9, 10]])]


<H3><u>Repeating

In [105]:
arr=np.array([[1,2,3,4,5],[6,7,8,9,10]])
arr

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

In [None]:
# Repeat() Function -> Repeats each element of an array, a specified no. of times
print(np.repeat(arr, 2))       # Array, No. of times to repeat each element

[ 1  1  2  2  3  3  4  4  5  5  6  6  7  7  8  8  9  9 10 10]


In [109]:
# Tile() Function -> Repeats the whole array, a specified no. of times
print(np.tile(arr, 3))

[[ 1  2  3  4  5  1  2  3  4  5  1  2  3  4  5]
 [ 6  7  8  9 10  6  7  8  9 10  6  7  8  9 10]]


<H3><u>Aggregate Functions

In [110]:
arr=np.array([[1,2,3,4,5],[6,7,8,9,10]])
arr

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

In [None]:
# Sum of Elements in the Array
print(np.sum(arr))
# OR
print(arr.sum())

55
55


In [130]:
# Sum of Elements along any one of the Axes.
print("Column wise Sum: ", arr.sum(axis=0), "\n")
print("Row wise Sum: ", arr.sum(axis=1))

Column wise Sum:  [ 7  9 11 13 15] 

Row wise Sum:  [15 40]


In [None]:
# Mean of the Elements in the Array
print(np.mean(arr))
# OR
print(arr.mean())

5.5
5.5


In [None]:
# Median of the Elements in the Array
print(np.median(arr))

5.5


In [None]:
# Standard Deviation of the Elements in the Array
print(np.std(arr))
# OR
print(arr.std())

2.8722813232690143
2.8722813232690143


In [125]:
# Minimum value Element in an Array
print(np.min(arr))
# OR
print(arr.min())

1
1


In [126]:
# Maximum value Element in an Array
print(np.max(arr))
# OR
print(arr.max())

10
10


<H3><u>Cumulative Functions

<p>Used for calculating Running Totals.</p>

In [132]:
arr=np.array([[1,2,3,4,5],[6,7,8,9,10]])
arr

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

In [None]:
# Cumulative Summation
print(arr.cumsum())

[ 1  3  6 10 15 21 28 36 45 55]


In [134]:
# Cumulative Product
print(arr.cumprod())

[      1       2       6      24     120     720    5040   40320  362880
 3628800]


<H3><u>Condition Based Choices

In [136]:
arr=np.array([[10,20,30,40,50],[60,70,80,90,100]])
arr

array([[ 10,  20,  30,  40,  50],
       [ 60,  70,  80,  90, 100]])

In [137]:
print(np.where(arr <= 60, "Low", "High"))

[['Low' 'Low' 'Low' 'Low' 'Low']
 ['Low' 'High' 'High' 'High' 'High']]


In [None]:
print(np.argwhere(arr>=40))

[[0 3]
 [0 4]
 [1 0]
 [1 1]
 [1 2]
 [1 3]
 [1 4]]


In [149]:
# Logical AND
print("Logical AND:\n", np.logical_and(arr>30, arr<70), "\n")
print("Logical OR:\n", np.logical_or(arr<30, arr>70), "\n")
print("Logical NOT:\n", np.logical_not(arr>30), "\n")
print("Logical XOR:\n", np.logical_and(arr>30, arr<70))

Logical AND:
 [[False False False  True  True]
 [ True False False False False]] 

Logical OR:
 [[ True  True False False False]
 [False False  True  True  True]] 

Logical NOT:
 [[ True  True  True False False]
 [False False False False False]] 

Logical XOR:
 [[False False False  True  True]
 [ True False False False False]]


<H3> <u>Broadcasting</u>

<p>Stretching the shape of smaller arrays to match the shape of larger arrays.<br>
Numpy compares the shape of arrays from the end (Right to Left).<br><br>
Rules:<br>
1. If shapes are Equal, they are Compatible.<br>
2. If shape of any one array is 1 (single element), then it can be stretched to match the other.<br>
3. If the above conditions do NOT satisfy (any one condition needs to satisfy), an error is generated.<br>
</p>

<p>For eg:<br>
If one array has shape (3,1) and the other has shape (1,3), then the resultant array will have the shape (3,3).<br>
But, If one array has shape (3,2) and the other one has shape (2,3), then there are <b>NO OPERATIONS POSSIBLE</b>, i.e., the compiler will throw an error.

In [None]:
arr1=np.array([1,2,3,4,5])
arr2=np.array(5)
print(arr1, "\n")
print(arr2, "\n")
print(arr1+arr2)    # Here, "arr2 = 5" will get converted to "arr2 = [5,5,5,5,5]" to perform the operation as it satisfies Condition-2.

[1 2 3 4 5] 

5 

[ 6  7  8  9 10]


In [5]:
img=np.array([[100,250],[200, 250]])
img.shape

(2, 2)

In [6]:
bright=img + 50
bright

array([[150, 300],
       [250, 300]])

<H3><u>Vectorization

<p><u>Use Cases / Advantages:</u><br>
1. Converting a function into a vector function (A function which can be applied to arrays).<br>
2. When you want to apply a custom function to the array.<br>
3. For better Readability.<br>
<br>
<u>Disadvantages:</u><br>
1. Its not faster than a loop.<br>
2. It only makes the syntax cleaner. It does not improve the speed like real vectorized numpy functions.

In [7]:
def square(x):
    return x*x

vfunc=np.vectorize(square)

In [11]:
arr=np.array([[1,2,3,4,5],[6,7,8,9,10]])

print("Original Array:\n", arr, "\n")
print("Squared Array:\n", vfunc(arr))

Original Array:
 [[ 1  2  3  4  5]
 [ 6  7  8  9 10]] 

Squared Array:
 [[  1   4   9  16  25]
 [ 36  49  64  81 100]]


<H3><u> Some Extra Concepts

In [12]:
# Representing Null values
arr=np.array([1, 2, np.nan, 4, np.nan])     # nan -> Not A Number
arr

array([ 1.,  2., nan,  4., nan])

In [18]:
# np.inf -> Positive Infinity
# -np.inf -> Negative Infinity

In [17]:
# np.isnan -> Used to check for nan values
# np.isinf -> Used to check for inf and -inf values
# np.isfinite -> Used to check for finite values

In [19]:
arr=np.array([1, 2, np.nan, np.inf, -np.inf, 3, 4, 5])
arr

array([  1.,   2.,  nan,  inf, -inf,   3.,   4.,   5.])

In [21]:
print(np.isnan(arr), "\n")
print(np.isinf(arr), "\n")
print(np.isfinite(arr))

[False False  True False False False False False] 

[False False False  True  True False False False] 

[ True  True False False False  True  True  True]


In [22]:
# Removing inf, -inf and nan values
newarr=np.nan_to_num(arr)
newarr

array([ 1.00000000e+000,  2.00000000e+000,  0.00000000e+000,
        1.79769313e+308, -1.79769313e+308,  3.00000000e+000,
        4.00000000e+000,  5.00000000e+000])