## Numpy 


In [2]:
import numpy as np
print(np.__version__)

1.18.5


In [3]:
# creating numpy arrays

a = np.array([1,2,3])

# if data types are different numpy will perform upscaling

a = np.array([1,2.3,4])
print(a) 

# We can explicitly specify data type 

a = np.array([1,2.3,4], dtype = "int32")  # downscaling is performed in this case
print(a)

# other ways of creating array 

a = np.array(range(2,5)) 
print(a)

a = np.array([range(i,i+3) for i in [2,4,6]])
print(a)

# Numpy also supports complex numbers
a = np.array([1 + 2j, 2 + 4j, 3,7j])
print(a)

[1.  2.3 4. ]
[1 2 4]
[2 3 4]
[[2 3 4]
 [4 5 6]
 [6 7 8]]
[1.+2.j 2.+4.j 3.+0.j 0.+7.j]


In [4]:
# creating arrays from scratch 

print("array of length-10 integer array filled with zeros ")
print(np.zeros(10,dtype="int"))

print("\n 3x5 floating-point array filled with 1s")
print(np.ones((3, 5), dtype=float))

print("\n 3x5 array filled with 3.14")
print(np.full(shape=(3,5),fill_value=3.14))

print("\n An array from 2 to 20 with stepping = 2")
print(np.arange(start = 2,stop = 20,step = 2))

print("\n array of five values evenly spaced between 0 and 1")
print(np.linspace(start = 0,stop = 1, num = 5))      # end is included by default (num = number of divisions)

print("\n 3x3 array of uniformly distributed rdom values between 0 and 1")
print(np.random.random((3,3)))
print("\n",np.random.rand(3,3))  # notice the difference in passing arguments 

print("\n 3x3 array of normally distributed random values with mean 0 and standard deviation 1")
print(np.random.normal(loc = 1,scale = 3,size = (3,3)))  # loc = mean, scale = variance 
print("\n", np.random.randn(3,3))

print("\n 3x3 array of random integers in the interval [0, 10)")
print(np.random.randint(low = 0,high = 10, size = (3, 3)))

print("\n Create a 3x3 identity matrix")
print(np.identity(3))   # or np.eye() can also be used 

print("\n Create an uninitialized array of three integers The values will be whatever happens to  \
 already exist at that memory location")
print(np.empty(3))

array of length-10 integer array filled with zeros 
[0 0 0 0 0 0 0 0 0 0]

 3x5 floating-point array filled with 1s
[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]

 3x5 array filled with 3.14
[[3.14 3.14 3.14 3.14 3.14]
 [3.14 3.14 3.14 3.14 3.14]
 [3.14 3.14 3.14 3.14 3.14]]

 An array from 2 to 20 with stepping = 2
[ 2  4  6  8 10 12 14 16 18]

 array of five values evenly spaced between 0 and 1
[0.   0.25 0.5  0.75 1.  ]

 3x3 array of uniformly distributed rdom values between 0 and 1
[[0.63634618 0.10357488 0.8473842 ]
 [0.41976854 0.58036773 0.68073404]
 [0.47055204 0.84465195 0.17015542]]

 [[0.8996336  0.45524149 0.6264442 ]
 [0.00800361 0.52893178 0.21553307]
 [0.43379656 0.14904895 0.37964969]]

 3x3 array of normally distributed random values with mean 0 and standard deviation 1
[[ 2.19525747  4.80175839  0.158135  ]
 [-2.01221957 -1.3728177  -0.07890583]
 [-2.1006989   1.37366207  4.84339632]]

 [[ 1.05527267  0.54141007  0.72154966]
 [-0.52784196 -0.66331061 -0.7906

In [5]:
# Attributes of arrays
# Determining the size, shape, memory consumption, and data types of arrays


a = np.ones((3,3))
print(a)
print("\n ndim: number of dimensions")
print(a.ndim)

print("\n shape: the size of each dimension")
print(a.shape)

print("\n size: total size of the array")
print(a.size)

print("\n dtype: datatype of array")
print(a.dtype)

print("\n itemsize: size of each item in bytes")
print(a.itemsize)

print("\n nbytes: total size of array in bytes") 
print(a.nbytes)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]

 ndim: number of dimensions
2

 shape: the size of each dimension
(3, 3)

 size: total size of the array
9

 dtype: datatype of array
float64

 itemsize: size of each item in bytes
8

 nbytes: total size of array in bytes
72


In [6]:
# Indexing of arrays
# Getting and setting the value of individual array elements

# indexing in n-d array is done as a[a,b,c,..,n] 
a = np.array([[1,2,3],[4,5,6]])
print("\n Element in 2nd row and 2nd column = ",a[1,1])

# negative indexes work as weel 
print("\n last element is ",a[-1,-1])

# we can write values as well
a[1,2] = 3
a[1,1] += 2
print("\n",a)

# Array Slicing: Accessing Subarrays

# x[start:stop:step]

print("\n First 2 rows and first 2 columns ")
print(a[:2, :2])

print("\n first row")
print(a[0,:])      # simply a[0] also works
 
print("\n second column")
print(a[:,1])


# One important—and extremely useful—thing to know about array slices is that they
# return views rather than copies of the array data
# It means if we grab a slice of array, modify some elements in slice, the original array is also changed 
# this defualt behavour is quite useful

print("\n a is \n", a)
a_sub = a[:2,:3]
a_sub[1,1] = 45
print("changed a = \n",a)

# copy() method to return copy of slices and arrays

a_sub2 = a[:2,:3].copy()
a_sub2[1,1] = 34
print("\n a is not changed \n",a)

# Reshaping of arrays
# Fohpea_subr example, if you want to put the numbers 1 through 9 in a 3×3 grid, you can do the following:

grid = np.arange(1,10).reshape((3,3))
print("\n 3x3 reshaped grid \n",grid)

# Where possible, the reshape method will use a no-copy view of the
# initial array, but with noncontiguous memory buffers this is not always the case.

# Another common reshaping pattern is the conversion of a one-dimensional array
# into a two-dimensional row or column matrix. You can do this with the reshape
# method, or more easily by making use of the newaxis keyword within a slice operation:

x = np.array([1, 2, 3])   # x is a rank-1 array not a row vector 
print("\n x is a rank-1 array not a row vector ",x.shape)

# To convert it into a row or column vector 
x = x.reshape((1, 3))    # it can aslo be done with x[np.newaxis, :]
print("\n size of row vector x is = ",x.shape)

# To convert x into a column vector
x = x.reshape((3,1))    # it can aslo be done with x[:, np.newaxis] 
print("\n size of column vector x is = ",x.shape)




 Element in 2nd row and 2nd column =  5

 last element is  6

 [[1 2 3]
 [4 7 3]]

 First 2 rows and first 2 columns 
[[1 2]
 [4 7]]

 first row
[1 2 3]

 second column
[2 7]

 a is 
 [[1 2 3]
 [4 7 3]]
changed a = 
 [[ 1  2  3]
 [ 4 45  3]]

 a is not changed 
 [[ 1  2  3]
 [ 4 45  3]]

 3x3 reshaped grid 
 [[1 2 3]
 [4 5 6]
 [7 8 9]]

 x is a rank-1 array not a row vector  (3,)

 size of row vector x is =  (1, 3)

 size of column vector x is =  (3, 1)


In [7]:
# Concatenation of arrays 
# Concatenation of 2 or more arrays in NumPy is accomlished by np.concatenate(), np.vstack(), np.hstack() 

# np.concatenate() takes a tuple or list of arrays as its first argument
# This is similar as in Octave 

x = np.array([1,2,3])
y = np.array([3,2,1])
print("Concatenation of two arrays")
print(np.concatenate([x,y],axis = 0))

# For vectors and matrices, we can concatenate along both axes
grid = np.array([[1,2,3],[4,5,6]])

print("\n Conatenation along vertical axis")
print(np.concatenate([grid,grid],axis = 0)) # axis = 0 is default 

print("\n Conatenation along horizontall axis")
print(np.concatenate([grid,grid],axis = 1)) 

# vstack and hstack could be more useful 
print("\n Vertical stacking")
print(np.vstack([grid, grid]))

print("\n Horizontal stacking")
print(np.hstack([grid, grid]))

Concatenation of two arrays
[1 2 3 3 2 1]

 Conatenation along vertical axis
[[1 2 3]
 [4 5 6]
 [1 2 3]
 [4 5 6]]

 Conatenation along horizontall axis
[[1 2 3 1 2 3]
 [4 5 6 4 5 6]]

 Vertical stacking
[[1 2 3]
 [4 5 6]
 [1 2 3]
 [4 5 6]]

 Horizontal stacking
[[1 2 3 1 2 3]
 [4 5 6 4 5 6]]


In [8]:
# Splitting of arrays
# The opposite of concatenation is splitting, which is implemented by the functions
# np.split , np.hsplit , and np.vsplit . For each of these, we can pass a list of indices
# giving the split points:

# Can also be applied on normal lists

x = [1,2,3,99,99,3,2,1]
x1, x2, x3 = np.split(x,[3,5])
print("Original array = ",x)
print("\n Spliited parts = ",x1,x2,x3)

# vsplit and hsplit are similar
grid = np.arange(16).reshape((4,4))

print("\n Original grid = ")
print(grid)
print("\n Vertically splitted using vsplit = ")
upper, lower = np.vsplit(grid,[2])
print(upper,"\n\n",lower)

print("\n Horizontally splitted using hsplit = ")
left, right = np.hsplit(grid,[2])
print(left, "\n\n" ,right)

Original array =  [1, 2, 3, 99, 99, 3, 2, 1]

 Spliited parts =  [1 2 3] [99 99] [3 2 1]

 Original grid = 
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]

 Vertically splitted using vsplit = 
[[0 1 2 3]
 [4 5 6 7]] 

 [[ 8  9 10 11]
 [12 13 14 15]]

 Horizontally splitted using hsplit = 
[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]] 

 [[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


In [9]:
# Computation on NumPy Arrays: Universal Functions

# For many types of operations, NumPy provides a convenient interface into just this
# kind of statically typed, compiled routine. This is known as a vectorized operation.
# You can accomplish this by simply performing an operation on the array, which will
# then be applied to each element. This vectorized approach is designed to push the
# loop into the compiled layer that underlies NumPy, leading to much faster execution.


# These functions don't change the original array, they return a temporary array

v = np.arange(1,5);
print("\n Reciprocal array 1/v")
print(1/v)

print("\n 2 power 2**v  or np.exp2(v)")
print(2**v)

print("\n Exponent np.exp(v)")
np.exp(v)

a = np.arange(4)

print("\n Array division")
print(a/v)

print("\n Addition of a number")
print(a + 2)

print("\n log ")
print(np.log(v))

print("\n Trigonometric functions ")
print(np.sin(a))

# For large calculations, it could be useful to specify the array in which output is to be stored 
# rather than creating a temporary array 
# We can use out argument to specify the output array 
# there is numpy functions for performing operations
# eg: for addition we can just do a + 2 but to specify the output array we have to use np.add()

x = np.arange(5)
y = np.empty(5)
np.multiply(x,10,out = y)
print("\n Output array y = ")
print(y)

# We can also write to specified indices of output array
y = np.zeros(10)
np.power(2,x,out=y[::2])
print("\n Output of power y = ")
print(y)


# For binary ufuncs there are some interesting aggregates that can be computed directly from object 
x = np.arange(1,5)
print("\n sum of all elements in array = ",np.add.reduce(x))
print("\n product of all elements in array = ",np.multiply.reduce(x))
print("\n Inorder to store the intermediate reults")
print(np.add.accumulate(a))


# Outer products
print("\n Outer product x.xt = ")
print(np.outer(x,x))

# Matrix multiplication and transposses

print("\n transpose")
print(x.T)
print("\n matrix multiplication")
print(np.matmul(x.T,x))

# "@" can be used as shortcut for matrix multiplication 
print(x.T @ x)


 Reciprocal array 1/v
[1.         0.5        0.33333333 0.25      ]

 2 power 2**v  or np.exp2(v)
[ 2  4  8 16]

 Exponent np.exp(v)

 Array division
[0.         0.5        0.66666667 0.75      ]

 Addition of a number
[2 3 4 5]

 log 
[0.         0.69314718 1.09861229 1.38629436]

 Trigonometric functions 
[0.         0.84147098 0.90929743 0.14112001]

 Output array y = 
[ 0. 10. 20. 30. 40.]

 Output of power y = 
[ 1.  0.  2.  0.  4.  0.  8.  0. 16.  0.]

 sum of all elements in array =  10

 product of all elements in array =  24

 Inorder to store the intermediate reults
[0 1 3 6]

 Outer product x.xt = 
[[ 1  2  3  4]
 [ 2  4  6  8]
 [ 3  6  9 12]
 [ 4  8 12 16]]

 transpose
[1 2 3 4]

 matrix multiplication
30
30


In [10]:
# Aggregates - sum , min, max, median, etc

# axis = 0, operates columnwise 
# axis = 1 operates rowwise

a = np.array([1,2,3,4])
b = np.arange(1,17).reshape((4,4))

print("\n Sum of a")
print(np.sum(a))

# for ndarray's we can perform operations on axes
print("\n Sum of b")
print(np.sum(b))

print("\n sum of columns of b")
print(np.sum(b , axis = 0))

print("\n sum of rows of b")
print(np.sum(b, axis = 1))

print("\n It can also be written as")
print(b.sum(axis = 0))


 Sum of a
10

 Sum of b
136

 sum of columns of b
[28 32 36 40]

 sum of rows of b
[10 26 42 58]

 It can also be written as
[28 32 36 40]


### Other aggregate functions

![image.png](attachment:image.png)

In [11]:
# Broadcasting
# Broadcasting allows binary operations to be applied on arrays of different sizes

a = np.array([0,1,2])
M = np.ones((3,3))

print("\n Sum of arrays of different dimensions")
print(M + a)

# Here the 1 dimensional array is stretched or broadcasted to match the dimensions of M and then added

a = np.arange(3)
b = np.arange(3)[:, np.newaxis]

print("\n a and b are")
print(a,"\n\n", b)

print("\n a + b = ")
print(a + b)

# Rules of broadcasting

# • Rule 1: If the two arrays differ in their number of dimensions, the shape of the
# one with fewer dimensions is padded with ones on its leading (left) side.
# • Rule 2: If the shape of the two arrays does not match in any dimension, the array
# with shape equal to 1 in that dimension is stretched to match the other shape.
# • Rule 3: If in any dimension the sizes disagree and neither is equal to 1, an error is
# raised.




 Sum of arrays of different dimensions
[[1. 2. 3.]
 [1. 2. 3.]
 [1. 2. 3.]]

 a and b are
[0 1 2] 

 [[0]
 [1]
 [2]]

 a + b = 
[[0 1 2]
 [1 2 3]
 [2 3 4]]


In [19]:
# Comparisons, Masks, and Boolean Logic

# Masking comes up when you want to extract, modify, count, or
# otherwise manipulate values in an array based on some criterion

x = np.arange(10) 
print("x is ")
print(x)

print("\n Boolean mask for elements greater than 3 in x")
print(x > 3)

print("\n Boolean mask for elements greater than or equal to 5 in x")
print(x >+ 5)

# It is also possible to do element-by-element wise comparision along with using compound expressions 

print("\n element-by-element wise comparision along with using compound expressions")
print((2 * x) == (x ** 2))

# Working with Boolean arrays

# COunting entries

# We may want to count number of True entries in Boolean array, we can use np.sum() or np.count_nonzero()

print("\n To count the number of True entries in boolean array")
print(np.count_nonzero(x < 4))
print(np.sum(x < 4))

# The advantage of using np.sum() is we can perform summation along rows or columns by using axis 

grid = np.arange(16).reshape((4,4))
print("\n Number of True values in each column")
print(np.sum(grid < 6,axis = 0))

# If we are interested in checking whether any or all values are True we can use np.any() , np.all() 
print("\n Are all values in x less than 25")
print(np.all(x < 25))

print("\n Is there any negative value in x")
print(np.any(x < 0))


# Boolean operators 
# Python’s bitwise logic operators, & , | , ^ , and ~ . Like with the standard arith‐
# metic operators, NumPy overloads these as ufuncs that work element-wise on (usu‐
# ally Boolean) arrays.

print("\n How many values in x are less than 5 and greater than or equal to 7")
print(np.sum((x < 5) | (x >= 7)))

# Boolean arrays as masks
# We can get subsets of data from the array by using boolean array that we get after applying a condition

print("\n Elements in x that are less than 5")
print(x[x < 5])

x is 
[0 1 2 3 4 5 6 7 8 9]

 Boolean mask for elements greater than 3 in x
[False False False False  True  True  True  True  True  True]

 Boolean mask for elements greater than or equal to 5 in x
[False False False False False False  True  True  True  True]

 element-by-element wise comparision along with using compound expressions
[ True False  True False False False False False False False]

 To count the number of True entries in boolean array
4
4

 Number of True values in each column
[2 2 1 1]

 Are all values in x less than 25
True

 Is there any negative value in x
False

 How many values in x are less than 5 and greater than or equal to 7
8

 Elements in x that are less than 5
[0 1 2 3 4]


In [36]:
# Fancy Indexing
# Fancy indexing is conceptually simple: it means passing an array of indices to access
# multiple array elements at once.

x = np.random.randint(100,size = 10)
print("x is ")
print(x)

ind = [3,5,7]
print("\n Accessing multiple elements of x with fancy indexing")
print(x[ind])

# With fancy indexing , the shape of the result reflects the shape of index arrays rather than 
# the shape of the array being indexed 

ind = np.array([[3,7],[4,5]])
print(F"\n indices = \n{ind},\n the returned array is ")
print(x[ind])

X = np.arange(12).reshape((3,4))
row = np.array([0,1,2])
col = np.array([2,1,3])

print("\n X is")
print(X)
print("\n Applying fancy indexing on multiple dimension arrays row = ",row," col = ",col)
print(X[row, col])

# The pairing of indices in fancy indexing follows all the broadcasting rules
print("\n Passing column vector for first dimension to demonstrate broadcasting")
print(X[row[:, np.newaxis], col])

# We can combine fancy and simple indexes
print("\n Combining fancy and simple indexes")
print(X[2, [2,0,1]])

print("\n Combining fancy indexing and slicing")
print(X[1:,[2,0,1]])

print("\n Combining fancy indexing and masking")
mask = np.array([1, 0, 1, 0], dtype=bool)
print(X[row[:, np.newaxis], mask])

# Modifying values with fancy indexing
print("\n Modifying values with fancy indexing")
x = np.arange(10)
i = np.array([2, 1, 8, 4])
x[i] = 99
print(x)

# Binning data

np.random.seed(42)
x = np.random.randn(100)

# compute a histogram by hand

bins = np.linspace(-5, 5, 20)
counts = np.zeros_like(bins)

# find the appropriate bin for each x

i = np.searchsorted(bins, x)     # it returns the indices for elements in xshould be inserted into bins so that the resulting array is sorted

# add 1 to each of these bins

np.add.at(counts, i, 1)          # populating the frequency for each bin 



x is 
[94 81 65 68  9 69  6 21 76 47]

 Accessing multiple elements of x with fancy indexing
[68 69 21]

 indices = 
[[3 7]
 [4 5]],
 the returned array is 
[[68 21]
 [ 9 69]]

 X is
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

 Applying fancy indexing on multiple dimension arrays row =  [0 1 2]  col =  [2 1 3]
[ 2  5 11]

 Passing column vector for first dimension to demonstrate broadcasting
[[ 2  1  3]
 [ 6  5  7]
 [10  9 11]]

 Combining fancy and simple indexes
[10  8  9]

 Combining fancy indexing and slicing
[[ 6  4  5]
 [10  8  9]]

 Combining fancy indexing and masking
[[ 0  2]
 [ 4  6]
 [ 8 10]]

 Modifying values with fancy indexing
[ 0 99 99  3 99  5  6  7 99  9]


In [44]:
# Sorting
# Numpy has np.sort() and np.argsort() to handle sorting 

x = np.array([2,1,3,5,4])
print("Sorted x = ")
print(np.sort(x))

# np.argsort() returns the indices of sorted elements, first element indicates the index of smallest number
print("\n argosort of x")
print(np.argsort(x))

print("\n We can use this array to sort")
print(x[np.argsort(x)])

print("\n To perform inplace sorting")
x.sort()
print(x)

# sorting rows and columns 
X = np.random.randint(100,size=(4,5))
print("\n X is ")
print(X)

print("\n Sorting X by rows")
print(np.sort(X,axis=1))

print("\n Sorting X by columns")
print(np.sort(X,axis=0))

# Partial sorts: Partitioning 
# Sometimes instead of sorting entire array we want to get k smallest values. 
# This is accomplished by np.partition()

x = np.array([7, 2, 3, 1, 6, 5, 4])
print("\n x = ")
print(x)
print("\n we got first 3 smallest values")
print(np.partition(x, 3))

Sorted x = 
[1 2 3 4 5]

 argosort of x
[1 0 2 4 3]

 We can use this array to sort
[1 2 3 4 5]

 To perform inplace sorting
[1 2 3 4 5]

 X is 
[[22 30 93 41 98]
 [ 6 15 89 59  1]
 [ 0 47 11 68 36]
 [31  8 98 18 47]]

 Sorting X by rows
[[22 30 41 93 98]
 [ 1  6 15 59 89]
 [ 0 11 36 47 68]
 [ 8 18 31 47 98]]

 Sorting X by columns
[[ 0  8 11 18  1]
 [ 6 15 89 41 36]
 [22 30 93 59 47]
 [31 47 98 68 98]]

 x = 
[7 2 3 1 6 5 4]

 we got first 3 smallest values
[2 1 3 4 6 5 7]
