# 3 MARCH

# Numerical Python, or "Numpy" for short, is a foundational package on which many of the most common data science packages are built. Numpy provides us with high performance multi-dimensional arrays which we can use as vectors or matrices.

# The key features of numpy are:

# ndarrays: n-dimensional arrays of the same data type which are fast and space-efficient. There are a number of built-in methods for ndarrays which allow for rapid processing of data without using loops (e.g., compute the mean).
# Broadcasting: a useful tool which defines implicit behavior between multi-dimensional arrays of different sizes.
# Vectorization: enables numeric operations on ndarrays.
# Input/Output: simplifies reading and writing of data from/to file.

# How to create Rank 1 numpy arrays:

In [1]:
import numpy as np

an_array=np.array([3,33,333]) # Create a rank 1 array

print(type(an_array))        # The typemof an ndarray is : "<class 'numpy.ndarray'>"

<class 'numpy.ndarray'>


In [2]:
an_array2=np.array([[3,33,333],[4,44,444]])
an_array2.shape

(2, 3)

# test the shape of the array we just created, it should have just one dimension (Rank 1)

In [3]:
print(an_array.shape)

(3,)


In [4]:
# Because this is a 1-rank array, we need only one index to access each element

print(an_array[0],an_array[1],an_array[2])

3 33 333


In [5]:
an_array[0]=888 # ndarrays are mutable, here we change an element of the array
print(an_array)

[888  33 333]


# How to create a Rank 2 numpy array:

# A rank 2 ndarray is one with two dimensions. Notice the fformat below of[[row],[row]]. 
# 2 Dimensional arrays are great for representing matrices which are often used in data science

In [6]:
another=np.array([[11,12,13],[21,22,23]]) # Creates a rank 2 array

print(another)   # print the array

print('The shape is 2 rows, 3 columns:',another.shape)  # rows x columns

print('Accessing elements [0,0],[0,1], and [1,0] of the ndarray:',another[0,0],',',another[0,1],',',another[1,0])

[[11 12 13]
 [21 22 23]]
The shape is 2 rows, 3 columns: (2, 3)
Accessing elements [0,0],[0,1], and [1,0] of the ndarray: 11 , 12 , 21


# There are many way to create numpy arrays:


# Here we create a number of different size arrays with different shapes and different pre-filled values. numpy has a number of built in methods which help us quickly and easily create multidimensional arrays.

In [7]:
import numpy as np

# create a 2x2 array of zeroes

ex1=np.zeros((2,2))
print(ex1)

[[0. 0.]
 [0. 0.]]


In [8]:
# create a 2x2 array filled with 9.0

ex2=np.full((2,2),9.0)
print(ex2)

[[9. 9.]
 [9. 9.]]


In [9]:
# create a 2x2 matrix with diagonal as 1 and others as 0

ex3=np.eye(5,5)
print(ex3)

[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]


In [10]:
# create an array of ones

ex4=np.ones((2,2))
print(ex4)

[[1. 1.]
 [1. 1.]]


In [11]:
# Notice that the above ndarray ex4 is actually rank 2, it is a 2X1 array
# This means we need to use two indexes to access an element

print(ex4.shape)
print()
print(ex4[0,0]) # two indexes to access an element

(2, 2)

1.0


In [12]:
# create an array of random floats between 0 and 1
ex5=np.random.random((2,2))
print(ex5)

[[0.7652102  0.30552996]
 [0.14131296 0.20541592]]


# Array Indexing



# Slice indexing:

# Similar to the use of slice indexing with lists and strings, we can use slice indexing to pull out sub-regions of ndarrays.

In [13]:
import numpy as np

# Rank 2 array of shape (3,4)
an_array=np.array([[11,12,13,14],[21,22,23,24],[31,32,33,34]])
print(an_array)

[[11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]]


In [14]:
# Use array slicing to get a subarray of the first 2 rows x 2 clomuns
a_slice=an_array[0:2,1:3]
print(a_slice)

[[12 13]
 [22 23]]


In [15]:
# the above can also be done as follows
print(an_array[0:2,1:3])    # the first set of numbers is for rows & the second set is for columns

[[12 13]
 [22 23]]


# When you modify a slice, you actually modify the underlying array

In [16]:
print('Before',an_array[0,1])  # inspect the element at 0,1
a_slice[0,0]=1000    #a_slice[0,0] is the same piece of data as an_array[0,1]
print('After',an_array[0,1])
print(an_array)

Before 12
After 1000
[[  11 1000   13   14]
 [  21   22   23   24]
 [  31   32   33   34]]


# Use both integer indexing & slice indexing

We can use combinations of integer indexing and slice indexing to create different shaped matrices.

In [17]:
# Create a rank 2 array of shape (3,4)
an_array=np.array([[11,12,13,14],[21,22,23,24],[31,32,33,34]])
print(an_array)

[[11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]]


In [18]:
# Using both integer indexing & slicing generates an array of lower rank
row_rank1=an_array[1,:]   # Rank 1 view
print(row_rank1,row_rank1.shape)   # notice only a single[]

[21 22 23 24] (4,)


In [19]:
# Slicing alone: generates an array of the same rank as the an_array
row_rank2=an_array[1:2,:] # Rank 2 view
print(row_rank2,row_rank2.shape)   # Notice the [[]]

[[21 22 23 24]] (1, 4)


In [20]:
# We can do the same thing for columns of an array:

print()
col_rank1=an_array[:,1]
col_rank2=an_array[:,1:2]

print(col_rank1,col_rank1.shape)  # Rank 1
print()
print(col_rank2,col_rank2.shape)   # Rank 2


[12 22 32] (3,)

[[12]
 [22]
 [32]] (3, 1)


# Array Indexing for changing elements:

Sometimes it's useful to use an array of indexes to access or change elements.

In [21]:
# Create a new array
an_array=np.array([[11,12,13,],[21,22,23],[31,32,33],[41,42,43]])
print('Original Array:','\n',an_array)

Original Array: 
 [[11 12 13]
 [21 22 23]
 [31 32 33]
 [41 42 43]]


In [22]:
# Create an array of indices
col_indices=np.array([0,1,2,0])
print('\nCol indices picked:',col_indices)

row_indices=np.arange(4)  # arange will select 0,1,2,3
print('\nRow indices picked:',row_indices)


Col indices picked: [0 1 2 0]

Row indices picked: [0 1 2 3]


In [23]:
# Examine the pairings of row_indices and col_indices. These are the elements we'll change next
for i,j in zip(row_indices,col_indices):
    print(i,',',j)
'''for i in rwo_indices:
       for j in col_indices'''

0 , 0
1 , 1
2 , 2
3 , 0


'for i in rwo_indices:\n       for j in col_indices'

In [24]:
# Select on element from each row
print('Values in the array at those indices:',an_array[row_indices,col_indices])
#an_array[0, 0], an_array[1,1],an_array[2, 2], an_array[3,0] = an_array[row_indices, col_indices]

Values in the array at those indices: [11 22 33 41]


In [25]:
# # Change one element from each row using the indices selected
an_array[row_indices,col_indices]+=1000 # a=a+10 is same as a+=10
print('\nChanged Array is','\n',an_array)    ###(VALUES HAVE CHANGED DUE TO MULTIPLE RUN)


Changed Array is 
 [[1011   12   13]
 [  21 1022   23]
 [  31   32 1033]
 [1041   42   43]]


# Boolean Indexing

# Array Indexing for changing elements:

In [26]:
# Create a 3x2 array
an_array=np.array([[11,12],[21,22],[31,32]])
print(an_array)

[[11 12]
 [21 22]
 [31 32]]


In [27]:
# Create a filter which will be boolean values for whether each element meets this condition

filter=(an_array>15)
filter

array([[False, False],
       [ True,  True],
       [ True,  True]])

Notice that the filter is a same size ndarray as an_array which is filled with True for each element whose corresponding element in an_array which is greater than 15 and False for those elements whose value is less than 15.

In [28]:
# we can now select just those elements which meet that criteria
print(an_array[filter])

[21 22 31 32]


In [29]:
# For short we could have just used the approach below without the need for the separate filter array
an_array[an_array>15]

array([21, 22, 31, 32])

What is particularly useful is that we can actually change elements in the array applying a similar logical filter. Let's add 100 to all the even values.

In [30]:
an_array[an_array%2==0]+=100
print(an_array)

[[ 11 112]
 [ 21 122]
 [ 31 132]]


# Datatypes and Array Operations



Datatypes:

In [31]:
ex1=np.array([11,12]) # Python assigns the datatype
print(ex1.dtype)

int32


In [32]:
ex2=np.array([11.0,12.0])
print(ex2.dtype)

float64


In [33]:
ex3=np.array([11,12],dtype=np.int64) # You can also tell python the data type
print(ex3.dtype)

int64


In [34]:
# you can use this to force floats into integers (using floor function)
ex4=np.array([11.1,12.7],dtype=np.int64)
print(ex4.dtype)
print()
print(ex4)

int64

[11 12]


In [35]:
# You can use this to force integers into floats if you anticipate
# the values may change to floats later
ex5=np.array([11,12],dtype=np.float64)
print(ex5.dtype)
print()
print(ex5)

float64

[11. 12.]


# Arithmetic Array Operations:

In [36]:
import numpy as np
x=np.array([[111,112],[121,122]],dtype=np.int)
y=np.array([[211.1,212.1],[221.1,222.2]],dtype=np.float64)

print(x,'\n','\n',y)

[[111 112]
 [121 122]] 
 
 [[211.1 212.1]
 [221.1 222.2]]


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  x=np.array([[111,112],[121,122]],dtype=np.int)


In [37]:
# add
print(x+y)
print()
print(np.add(x,y))

[[322.1 324.1]
 [342.1 344.2]]

[[322.1 324.1]
 [342.1 344.2]]


In [38]:
# multiply
print(x*y)
print()
print(np.multiply(x,y))

[[23432.1 23755.2]
 [26753.1 27108.4]]

[[23432.1 23755.2]
 [26753.1 27108.4]]


In [39]:
# subtract
print(x-y)
print()
print(np.subtract(x,y))

[[-100.1 -100.1]
 [-100.1 -100.2]]

[[-100.1 -100.1]
 [-100.1 -100.2]]


In [40]:
# divide
print(x/y)
print()
print(np.divide(x,y))

[[0.52581715 0.52805281]
 [0.54726368 0.54905491]]

[[0.52581715 0.52805281]
 [0.54726368 0.54905491]]


In [41]:
# square root
print(np.sqrt(x))

[[10.53565375 10.58300524]
 [11.         11.04536102]]


In [42]:
print(np.sqrt(y))

[[14.52928078 14.56365339]
 [14.86943173 14.90637448]]


In [43]:
# exponential
print(np.exp(x))

[[1.60948707e+48 4.37503945e+48]
 [3.54513118e+52 9.63666567e+52]]


# Statistical Methods, Sorting, 

# and

# Set Operations:


Basic Statistical Operations:

In [44]:
# setup a random 2x5 matrix
arr=10*np.random.randn(2,5) #random.random or random.randn or random.randint
print(arr)

[[  3.1808216   14.87708475  17.23026371 -17.13366069   7.01674525]
 [  9.68100045   3.01663232   8.74600057  -4.87516735   4.90363381]]


In [45]:
# Compute the mean for all elements
print(arr.mean())

4.6643354416202705


In [46]:
# compute the mean by column
print(arr.mean(axis=0))

[  6.43091102   8.94685853  12.98813214 -11.00441402   5.96018953]


In [47]:
# compute the mean by row
print(arr.mean(axis=1))

[5.03425092 4.29441996]


In [48]:
# sum all the elements
print(arr.sum())

46.643354416202705


In [49]:
# compute the medians
print(np.median(arr,axis=1)) # by rows 1st and 2nd also

[7.01674525 4.90363381]


# Sorting

In [50]:
# create a 10 element array of randoms
unsorted=np.random.randn(10)

In [51]:
print(unsorted)

[ 0.63367939  1.10386723 -0.90144632  1.43697222 -0.41164361 -1.05391359
  0.20325139  1.14880915  0.23674708  1.30883805]


In [52]:
# create copy & sort
sorted=np.array(unsorted)
sorted.sort() # it will arrange the values in ascending order i.e sorting

print(sorted)
print()
print(unsorted)

[-1.05391359 -0.90144632 -0.41164361  0.20325139  0.23674708  0.63367939
  1.10386723  1.14880915  1.30883805  1.43697222]

[ 0.63367939  1.10386723 -0.90144632  1.43697222 -0.41164361 -1.05391359
  0.20325139  1.14880915  0.23674708  1.30883805]


In [53]:
# inplace sorting
unsorted.sort()
print(unsorted)

[-1.05391359 -0.90144632 -0.41164361  0.20325139  0.23674708  0.63367939
  1.10386723  1.14880915  1.30883805  1.43697222]


# Finding Unique elements:

In [54]:
import numpy as np
array=np.array([1,2,1,4,2,1,4,2])
print()
print(np.unique(array)) # This will return unique values


[1 2 4]


# Set Operations with np.array data type:

In [55]:
s1=np.array(['desk','chair','bulb'])
s2=np.array(['lamp','bulb','chair'])
print(s1,s2)

['desk' 'chair' 'bulb'] ['lamp' 'bulb' 'chair']


In [56]:
print(np.intersect1d(s1,s2)) # common values between s1 & s2

['bulb' 'chair']


In [57]:
print(np.union1d(s1,s2)) # common & uncommon vales between s1 & s2

['bulb' 'chair' 'desk' 'lamp']


In [58]:
print(np.setdiff1d(s1,s2)) # elements in s1 that are not in s2

['desk']


In [59]:
print(np.in1d(s1,s2)) # boolean o/p for which element of s1 is also in s2

[False  True  True]


# Broadcasting:

Introduction to broadcasting.
For more details, please see:
https://docs.scipy.org/doc/numpy-1.10.1/user/basics.broadcasting.html

In [60]:
import numpy as np

start = np.zeros((4,3))
print(start)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [61]:
# create a rank 1 ndarray with 3 values
add_rows=np.array([1,0,2])
print(add_rows)

[1 0 2]


In [62]:
y=start+add_rows # add to each row of start using broadcasting
print(y)

[[1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]]


In [63]:
# create an ndarray which is 4x1 to broadcast across columns
add_cols=np.array([[0,1,2,3]]) # print(add_cols)
print(add_cols)
add_cols=add_cols.T # transpose (T) converts rows into columns & vice versa

print(add_cols)

[[0 1 2 3]]
[[0]
 [1]
 [2]
 [3]]


In [64]:
# add to each column of start using broadcasting
y=start+add_cols
print(y)

[[0. 0. 0.]
 [1. 1. 1.]
 [2. 2. 2.]
 [3. 3. 3.]]


In [65]:
# this will just broadcast in both dimensions
add_scalar=np.array([1])
print(start+add_scalar)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


Examples from slides

In [66]:
# create a 3x4 matrix
arrA=np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
print(arrA)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [67]:
# create a 4x1 array
arrB=np.array([0,1,0,2])
print(arrB)

[0 1 0 2]


In [68]:
# add the two together using broadcasting
arrC=arrA+arrB
print(arrC)

[[ 1  3  3  6]
 [ 5  7  7 10]
 [ 9 11 11 14]]


# Binary Format

In [69]:
import numpy as np
x=np.array([23.23,24.24])

In [70]:
np.save('an_array',x) # saves the values in x to an_array

In [71]:
np.load('an_array.npy')

array([23.23, 24.24])

# Text Format

In [72]:
np.savetxt('array.txt',X=x,delimiter=',')

In [73]:
## For Windows replace 'cat' with 'type'
!cat array.txt

'cat' is not recognized as an internal or external command,
operable program or batch file.


In [74]:
np.savetxt('array.txt',X=x,delimiter=',') # savetxt function will store your file in text format
np.loadtxt('array.txt',delimiter=',') # delimiter and sep are same whic is separated by comma

array([23.23, 24.24])

# Additional Common ndarray Operations



Dot Product on Matrices and Inner Product on Vectors:

In [75]:
# determine the dot product of two matrices
x2d=np.array([[1,1],[1,1]])
y2d=np.array([[2,2],[2,2]])
print(x2d,'\n','\n',y2d,'\n','\n',x2d.dot(y2d),'\n','\n',np.dot(x2d,y2d))

[[1 1]
 [1 1]] 
 
 [[2 2]
 [2 2]] 
 
 [[4 4]
 [4 4]] 
 
 [[4 4]
 [4 4]]


In [76]:
# determine the inner product of two vectors
a1d=np.array([9,9])
b1d=np.array([10,10])
print(a1d.dot(b1d),'\n\n',np.dot(a1d,b1d))

180 

 180


In [77]:
# dot produce on an array & vector
print(x2d.dot(a1d),'\n\n',np.dot(x2d,a1d)) # 

[18 18] 

 [18 18]


# Sum

In [78]:
# Sum elements in the array
ex1=np.array([[11,12],[21,22]])
print(np.sum(ex1))

66


In [79]:
print(np.sum(ex1,axis=0)) # column wise addition

[32 34]


In [80]:
print(np.sum(ex1,axis=1)) # row wise addition

[23 43]


# Element-wise Functions:

For example, let's compare two arrays values to get the maximum of each.

In [81]:
# random array
x=np.random.random(8)
x



array([0.03844885, 0.51701358, 0.60276155, 0.3160591 , 0.91653087,
       0.52071688, 0.04483914, 0.93660726])

In [82]:
y=np.random.randn(8)
y

array([ 1.09076384,  0.32715444,  0.95648578,  0.40227383, -0.37730507,
        0.91124009, -1.11643181,  0.29083909])

In [83]:
# returns element wise maximum between two arrays
import numpy as np
x=np.random.randn(2,3)
y=np.random.randn(2,3)
np.maximum(x,y)

array([[-0.12257556,  0.64068261,  2.12226884],
       [ 0.21559239,  1.24028801,  1.25004225]])

# Reshaping array:

In [84]:
# grab values from 0 to 19 in an array
arr=np.arange(20)
print(arr)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]


In [85]:
# reshape to be a 4x5 matrix
arr.reshape(4,5)   #5,4

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

# Transpose

In [86]:
# Transpose
ex1=np.array([[11,12],[21,22]])

print(ex1)

ex1.T

[[11 12]
 [21 22]]


array([[11, 21],
       [12, 22]])

# Indexing using where():

In [87]:
x_1=np.array([1,2,3,4,5])

y_1=np.array([11,22,33,44,55])
# where we have true in filter it will print x_1 value & for false y_1
# this is the working of where function
filter=np.array([True,False,True,False,True])

out=np.where(filter,x_1,y_1)
print(out)

[ 1 22  3 44  5]


In [88]:
mat=np.random.rand(5,5)
mat

array([[0.63866389, 0.05123469, 0.57956474, 0.08899281, 0.57959856],
       [0.60740482, 0.20372464, 0.91753593, 0.45078948, 0.22504227],
       [0.29516367, 0.90076304, 0.01418945, 0.48450323, 0.12737466],
       [0.08489877, 0.39924409, 0.36097953, 0.99823562, 0.93350493],
       [0.32192965, 0.66169055, 0.09343011, 0.74272434, 0.45817854]])

In [89]:
np.where(mat>0.5,100,-1)

'''if mat>0.5:
      replace(100)
   else:replace(-1)'''

'if mat>0.5:\n      replace(100)\n   else:replace(-1)'

# "any" or "all" conditionals:

In [90]:
arr_bools=np.array([True,False,True,True,False])

In [91]:
arr_bools.any() # is any value true

True

In [92]:
arr_bools.all() # are all values True

False

# Random Number Generation:

In [93]:
import numpy as np
Y=np.random.normal(size=(1,5))
print(Y)

[[ 1.06359735  0.95880298 -0.32713935 -0.95894936 -1.31602844]]


In [94]:
Z=np.random.randint(low=2,high=50,size=4) # int integer values
print(Z) # low for lower limits & high for higher limits

[ 5  8  9 46]


In [95]:
np.random.permutation(Z) # return a new ordering of elements in Z

array([ 8,  9, 46,  5])

In [96]:
np.random.uniform(size=4) #uiform distribution

array([0.47217324, 0.41767479, 0.28109406, 0.31754582])

In [97]:
np.random.normal(size=4) # normal distrubution size=4

array([-0.2774316 , -0.16492061,  0.44386325, -0.91805543])

# Merging data sets:

In [98]:
K=np.random.randint(low=2,high=50,size=(2,2))
print(K)

print()
M=np.random.randint(low=2,high=50,size=(2,2))
print(M)

[[ 5 49]
 [22 25]]

[[ 2 23]
 [44 17]]


In [99]:
np.vstack((K,M)) #join vertically

array([[ 5, 49],
       [22, 25],
       [ 2, 23],
       [44, 17]])

In [100]:
np.hstack((K,M)) # join horizontally

array([[ 5, 49,  2, 23],
       [22, 25, 44, 17]])

In [101]:
np.concatenate([K,M],axis=0)

array([[ 5, 49],
       [22, 25],
       [ 2, 23],
       [44, 17]])

In [102]:
np.concatenate([K,M.T],axis=1)

array([[ 5, 49,  2, 44],
       [22, 25, 23, 17]])