 Numerical Python, or "Numpy" for short, is a foundational package on which many of the most common data science packages 
are built. 
Numpy provides us with high performance multi-dimensional arrays which we can use as vectors or matrices.

The key features of numpy are:

ndarrays: n-dimensional arrays of the same data type which are fast and space-efficient. 
There are a number of built-in methods for ndarrays which allow for rapid processing of data without using loops 
(e.g., compute the mean).

Broadcasting: a useful tool which defines implicit behavior between multi-dimensional arrays of different sizes.
Vectorization: enables numeric operations on ndarrays.
Input/Output: simplifies reading and writing of data from/to file.
    

#how to create rank 1 numpy arrays

In [8]:
import numpy as np

an_array = np.array([3, 33, 333])  # Create a rank 1 array

print(type(an_array))              # The type of an ndarray is: "<class 'numpy.ndarray'>"

<class 'numpy.ndarray'>


In [9]:
an_array2 = np.array([[3, 33, 333],[4,44,444]])
an_array2.shape

(2, 3)

In [10]:
# test the shape of the array we just created, it should have just one dimension (Rank 1)
print(an_array.shape)

(3,)


In [11]:
# because this is a 1-rank array, we need only one index to accesss each element
print(an_array[0], an_array[1], an_array[2])

3 33 333


In [12]:
an_array[0] =888            # ndarrays are mutable, here we change an element of the array

print(an_array)

[888  33 333]


# How to create a Rank 2 numpy array:

In [15]:
another = np.array([[11,12,13],[21,22,23]])   # Create a rank 2 array

print(another)  # print the array

print("The shape is 2 rows, 3 columns: ", another.shape)  # rows x columns                   

print("Accessing elements [0,0], [0,1], and [1,0] of the ndarray: ", another[0, 0], ", ",another[0, 1],", ", another[1, 0])

[[11 12 13]
 [21 22 23]]
The shape is 2 rows, 3 columns:  (2, 3)
Accessing elements [0,0], [0,1], and [1,0] of the ndarray:  11 ,  12 ,  21


In [17]:
#There are many way to create numpy arrays:

In [18]:
import numpy as np

# create a 2x2 array of zeros
ex1 = np.zeros((2,2))      
print(ex1) 

[[0. 0.]
 [0. 0.]]


In [19]:
# create a 2x2 array filled with 9.0
ex2 = np.full((3,3), 9.0)  
print(ex2) 

[[9. 9. 9.]
 [9. 9. 9.]
 [9. 9. 9.]]


In [20]:
# create a 2x2 matrix with the diagonal 1s and the others 0
ex3 = np.eye(5,5)
print(ex3)

[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]


In [21]:
# create an array of ones
ex4 = np.ones((2,2))
print(ex4)

[[1. 1.]
 [1. 1.]]


In [22]:
# notice that the above ndarray (ex4) is actually rank 2, it is a 2x1 array
print(ex4.shape)

# which means we need to use two indexes to access an element
print()
print(ex4[0,0])

(2, 2)

1.0


In [23]:
# create an array of random floats between 0 and 1
ex5 = np.random.random((2,2))
print(ex5)

[[0.93727232 0.46584503]
 [0.84969676 0.35897667]]


# Array Indexing

In [24]:
import numpy as np

# Rank 2 array of shape (3, 4)
an_array = np.array([[11,12,13,14], [21,22,23,24], [31,32,33,34]])
print(an_array)

[[11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]]


In [25]:
a_slice = an_array[0:2, 1:3]
#select 0 and 1 rows, 1 and 2 column
print(a_slice)

[[12 13]
 [22 23]]


In [26]:
print("Before:", an_array[0, 1])   #inspect the element at 0, 1  
a_slice[0, 0] = 1000    # a_slice[0, 0] is the same piece of data as an_array[0, 1]
print("After:", an_array[0, 1])
print(an_array)

Before: 12
After: 1000
[[  11 1000   13   14]
 [  21   22   23   24]
 [  31   32   33   34]]


In [27]:
# Create a Rank 2 array of shape (3, 4)
an_array = np.array([[11,12,13,14], [21,22,23,24], [31,32,33,34]])
print(an_array)

[[11 12 13 14]
 [21 22 23 24]
 [31 32 33 34]]


In [28]:
# Using both integer indexing & slicing generates an array of lower rank
row_rank1 = an_array[1, :]    # Rank 1 view 
print(row_rank1, row_rank1.shape)  # notice only a single []

[21 22 23 24] (4,)


In [29]:
# Slicing alone: generates an array of the same rank as the an_array
row_rank2 = an_array[1:2, :]  # Rank 2 view 
print(row_rank2, row_rank2.shape)   # Notice the [[ ]]

[[21 22 23 24]] (1, 4)


In [30]:
#We can do the same thing for columns of an array:

print()
col_rank1 = an_array[:, 1] 
col_rank2 = an_array[:, 1:2]

print(col_rank1, col_rank1.shape)  # Rank 1
print()
print(col_rank2, col_rank2.shape)  # Rank 2


[12 22 32] (3,)

[[12]
 [22]
 [32]] (3, 1)


In [32]:
   #Array Indexing for changing elements:

In [33]:
# Create a new array
an_array = np.array([[11,12,13], [21,22,23], [31,32,33], [41,42,43]])

print('Original Array:')
print(an_array)

Original Array:
[[11 12 13]
 [21 22 23]
 [31 32 33]
 [41 42 43]]


In [34]:
# Create an array of indices
col_indices = np.array([0, 1, 2, 0])
print('\nCol indices picked : ', col_indices)

row_indices = np.arange(4) #arange will select 0, 1, 2, 3
print('\nRows indices picked : ', row_indices)


Col indices picked :  [0 1 2 0]

Rows indices picked :  [0 1 2 3]


In [None]:
# Examine the pairings of row_indices and col_indices.  These are the elements we'll change next.
for i,j in zip(row_indices,col_indices):
    print(i, ", ",j)
'''for i in row_indices:
    for j in col_indices:'''

In [36]:
# Select one element from each row
print('Values in the array at those indices: ',an_array[row_indices, col_indices])
#an_array[0, 0], an_array[1,1],an_array[2, 2], an_array[3,0] = an_array[row_indices, col_indices]

Values in the array at those indices:  [11 22 33 41]


In [38]:
# Change one element from each row using the indices selected
an_array[row_indices, col_indices] += 100000 #a = a + 10 is same  as a+=10

print('\nChanged Array:')
print(an_array)


Changed Array:
[[200011     12     13]
 [    21 200022     23]
 [    31     32 200033]
 [200041     42     43]]


# Boolean Indexing

In [39]:
# create a 3x2 array
an_array = np.array([[11,12], [21, 22], [31, 32]])
print(an_array)

[[11 12]
 [21 22]
 [31 32]]


In [40]:
# create a filter which will be boolean values for whether each element meets this condition
filter = (an_array > 15)
filter

array([[False, False],
       [ True,  True],
       [ True,  True]])

In [41]:
# we can now select just those elements which meet that criteria
print(an_array[filter]) #values that are greater than 15.

[21 22 31 32]


In [42]:
# For short, we could have just used the approach below without the need for the separate filter array.

an_array[an_array > 15]

array([21, 22, 31, 32])

In [43]:
an_array[an_array % 2 == 0] +=100 # a+=100 is same as a = a + 100
print(an_array)

[[ 11 112]
 [ 21 122]
 [ 31 132]]


# Data type And Array Operations

In [44]:
ex1 = np.array([11, 12]) # Python assigns the  data type
print(ex1.dtype)

int32


In [45]:
ex2 = np.array([11.0, 12.0]) # Python assigns the  data type
print(ex2.dtype)

float64


In [46]:
ex3 = np.array([11, 21], dtype=np.int64) #You can also tell Python the  data type
print(ex3.dtype)

int64


In [47]:
# you can use this to force floats into integers (using floor function)
ex4 = np.array([11.1,12.7], dtype=np.int64)
print(ex4.dtype)
print()
print(ex4)

int64

[11 12]


# Arithmatic Array Operations

In [49]:
import numpy as np
x = np.array([[111,112],[121,122]], dtype=np.int)
y = np.array([[211.1,212.1],[221.1,222.1]], dtype=np.float64)

print(x)
print()
print(y)

[[111 112]
 [121 122]]

[[211.1 212.1]
 [221.1 222.1]]


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  x = np.array([[111,112],[121,122]], dtype=np.int)


In [50]:
# add
print(x + y)         # The plus sign works
print()
print(np.add(x, y))  # so does the numpy function "add"

[[322.1 324.1]
 [342.1 344.1]]

[[322.1 324.1]
 [342.1 344.1]]


In [51]:
# multiply
print(x * y)
print()
print(np.multiply(x, y))

[[23432.1 23755.2]
 [26753.1 27096.2]]

[[23432.1 23755.2]
 [26753.1 27096.2]]


In [52]:
# subtract
print(x - y)
print()
print(np.subtract(x, y))

[[-100.1 -100.1]
 [-100.1 -100.1]]

[[-100.1 -100.1]
 [-100.1 -100.1]]


In [53]:
# divide
print(x / y)
print()
print(np.divide(x, y))

[[0.52581715 0.52805281]
 [0.54726368 0.54930212]]

[[0.52581715 0.52805281]
 [0.54726368 0.54930212]]


In [54]:
# square root
print(np.sqrt(x))

[[10.53565375 10.58300524]
 [11.         11.04536102]]


In [55]:
# exponent (e ** x)
print(np.exp(x))

[[1.60948707e+48 4.37503945e+48]
 [3.54513118e+52 9.63666567e+52]]


# Statistical Methods, Sorting, and

Set Operations:

In [56]:
# setup a random 2 x 5 matrix
arr = 10 * np.random.randn(2,5) #random.random or random.randn or random.randint
print(arr)

[[ 6.51717623 -5.3617925  -6.32449959 -8.30375688 -0.23986206]
 [-3.03942999  6.87621649  8.60059715  4.36503496 -7.39041806]]


In [57]:
# compute the mean for all elements
print(arr.mean())

-0.4300734232479148


In [58]:
# compute the means by row
print(arr.mean(axis = 1))

[-2.74254696  1.88240011]


In [59]:
# compute the means by column
print(arr.mean(axis = 0))

[ 1.73887312  0.757212    1.13804878 -1.96936096 -3.81514006]


In [60]:
# sum all the elements
print(arr.sum())

-4.300734232479148


In [61]:
# compute the medians
print(np.median(arr, axis = 1)) # by rows 1st and 2nd also

[-5.3617925   4.36503496]


In [62]:
# create a 10 element array of randoms
unsorted = np.random.randn(10)

print(unsorted)

[-0.61448005 -0.68196135 -0.67341912 -0.32587259  0.78468443  0.75619032
  1.2153145   0.17583234  0.26893246 -0.48355477]


In [63]:
# create copy and sort
sorted = np.array(unsorted)
sorted.sort() # it will arrange values in ascending order. i.e sorting

print(sorted)
print()
print(unsorted)

[-0.68196135 -0.67341912 -0.61448005 -0.48355477 -0.32587259  0.17583234
  0.26893246  0.75619032  0.78468443  1.2153145 ]

[-0.61448005 -0.68196135 -0.67341912 -0.32587259  0.78468443  0.75619032
  1.2153145   0.17583234  0.26893246 -0.48355477]


In [64]:
# inplace sorting
unsorted.sort() 
print(unsorted)

[-0.68196135 -0.67341912 -0.61448005 -0.48355477 -0.32587259  0.17583234
  0.26893246  0.75619032  0.78468443  1.2153145 ]


In [65]:
import numpy as np
array = np.array([1,2,1,4,2,1,4,2])
#print(array.unique())
print(np.unique(array)) # it will return unquie values

[1 2 4]


In [66]:
s1 = np.array(['desk','chair','bulb'])
s2 = np.array(['lamp','bulb','chair'])
print(s1, s2)

['desk' 'chair' 'bulb'] ['lamp' 'bulb' 'chair']


In [67]:
print( np.intersect1d(s1, s2) )  #common values between s1 and s2

['bulb' 'chair']


In [68]:
print( np.union1d(s1, s2) ) #common and uncommon values between s1 and s2

['bulb' 'chair' 'desk' 'lamp']


In [69]:
print( np.setdiff1d(s1, s2) )# elements in s1 that are not in s2

['desk']


In [70]:
print( np.in1d(s1, s2) )#which element of s1 is also in s2

[False  True  True]


# Broadcasting

In [71]:
import numpy as np

start = np.zeros((4,3))
print(start)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [72]:
# create a rank 1 ndarray with 3 values
add_rows = np.array([1, 0, 2])
print(add_rows)

[1 0 2]


In [73]:
y = start + add_rows  # add to each row of 'start' using broadcasting
print(y)

[[1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]
 [1. 0. 2.]]


In [74]:
# create an ndarray which is 4 x 1 to broadcast across columns
add_cols = np.array([[0,1,2,3]]) # print(add_cols) 
print(add_cols)
add_cols = add_cols.T #transpose: coverts rows into columns and vice versa

print(add_cols)

[[0 1 2 3]]
[[0]
 [1]
 [2]
 [3]]


In [75]:
# add to each column of 'start' using broadcasting
y = start + add_cols 
print(y)

[[0. 0. 0.]
 [1. 1. 1.]
 [2. 2. 2.]
 [3. 3. 3.]]


In [76]:
# this will just broadcast in both dimensions
add_scalar = np.array([1])  
print(start+add_scalar)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


In [77]:
# create our 3x4 matrix
arrA = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
print(arrA)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [78]:
# create our 4x1 array
arrB = [0,1,0,2]
print(arrB)

[0, 1, 0, 2]


In [79]:
# add the two together using broadcasting
print(arrA + arrB)

[[ 1  3  3  6]
 [ 5  7  7 10]
 [ 9 11 11 14]]


In [80]:
import numpy as np
x = np.array([ 23.23, 24.24] )

In [81]:
np.save('an_array', x) #saves the values in x to an_array

In [82]:
np.load('an_array.npy')

array([23.23, 24.24])

In [83]:
np.savetxt('array.txt', X=x, delimiter=',')

In [84]:
## For Windows replace "cat" with "type"
!cat array.txt

'cat' is not recognized as an internal or external command,
operable program or batch file.


# Additional Common ndarray Operations



Dot Product on Matrices and Inner Product on Vectors:

In [85]:
# determine the dot product of two matrices
x2d = np.array([[1,1],[1,1]])
y2d = np.array([[2,2],[2,2]]) 
print(x2d)
print()
print(y2d)
print()
print(x2d.dot(y2d))
print()
print(np.dot(x2d, y2d))

[[1 1]
 [1 1]]

[[2 2]
 [2 2]]

[[4 4]
 [4 4]]

[[4 4]
 [4 4]]


In [86]:
# determine the inner product of two vectors
a1d = np.array([9 , 9 ])
b1d = np.array([10, 10])

print(a1d.dot(b1d))
print()
print(np.dot(a1d, b1d))

180

180


In [87]:
# dot produce on an array and vector
print(x2d.dot(a1d)) #([9 , 9 ]), ([[1,1],[1,1]]) [1*9 + 1*9, 1*9 + 1*9] = [18, 18]
print()
print(np.dot(x2d, a1d))

[18 18]

[18 18]


In [88]:
# sum elements in the array
ex1 = np.array([[11,12],[21,22]])

print(np.sum(ex1))          # add all members

66


In [89]:
print(np.sum(ex1, axis=0))  # columnwise sum

[32 34]


In [90]:
print(np.sum(ex1, axis=1))  # rowwise sum

[23 43]


In [91]:
# random array
x = np.random.randn(8)
x

array([ 0.59326882, -0.77020295, -0.66825157,  0.74176536,  0.4857854 ,
       -0.33014235,  0.67145809, -1.06213269])

In [92]:
# another random array
y = np.random.randn(8)
y

array([ 1.14279805,  0.73247597,  1.08735203, -0.6857993 , -1.77846424,
       -0.15721179, -0.32869387,  1.27566219])

In [93]:
# returns element wise maximum between two arrays
import numpy as np
x = np.random.randn(2,3)
y = np.random.randn(2,3)
np.maximum(x, y)

array([[ 0.65435319, -0.48683013,  1.82758794],
       [ 1.33262676,  1.20464429,  0.62497302]])

In [94]:
# grab values from 0 through 19 in an array
arr = np.arange(20)
print(arr)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]


In [95]:
# reshape to be a 4 x 5 matrix
arr.reshape(4,5) #5, 4

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [96]:
# transpose
ex1 = np.array([[11,12],[21,22]])

ex1.T

array([[11, 21],
       [12, 22]])

In [97]:
x_1 = np.array([1,2,3,4,5])

y_1 = np.array([11,22,33,44,55])
# where we have true in filter it will print x_1 value and for false y_1 value
# this is the working of .where function
filter = np.array([True, False, True, False, True])

In [98]:
out = np.where(filter, x_1, y_1)
print(out)

[ 1 22  3 44  5]


In [99]:
mat = np.random.rand(5,5)
mat

array([[0.43390991, 0.12971378, 0.28424716, 0.23362052, 0.10567232],
       [0.56292072, 0.35822832, 0.29025652, 0.57828805, 0.98600732],
       [0.11197462, 0.19111098, 0.50595346, 0.51847324, 0.09915884],
       [0.37086853, 0.86265984, 0.3243313 , 0.15619121, 0.66882823],
       [0.93577166, 0.51778992, 0.08733355, 0.75767561, 0.48240554]])

In [100]:
np.where( mat > 0.5, 1000, -1)

array([[  -1,   -1,   -1,   -1,   -1],
       [1000,   -1,   -1, 1000, 1000],
       [  -1,   -1, 1000, 1000,   -1],
       [  -1, 1000,   -1,   -1, 1000],
       [1000, 1000,   -1, 1000,   -1]])

In [101]:
'''
if mat > 0.5:
    replace(1000)
else:
    replace(-1)'''

'\nif mat > 0.5:\n    replace(1000)\nelse:\n    replace(-1)'

In [102]:
arr_bools = np.array([ True, False, True, True, False ])

In [103]:
arr_bools.any() #is any value True

True

In [104]:
arr_bools.all() # are all values True

False

In [105]:
import numpy as np
Y = np.random.normal(size = (1,5))
print(Y)

[[-0.58307916 -0.58238703 -0.70274503 -0.13714589  1.0070359 ]]


In [106]:
Z = np.random.randint(low=2,high=50,size=4) #int integer values
print(Z) # low for lower limits and high for higher limits

[32  5 48  9]


In [107]:
np.random.permutation(Z) #return a new ordering of elements in Z

array([ 5, 32, 48,  9])

In [108]:
np.random.uniform(size=4) #uniform distribution

array([0.1423954 , 0.98569591, 0.31770907, 0.72944228])

In [109]:
np.random.normal(size=4) #normal distributionsize=4

array([-1.7173111 , -0.25451445,  1.26931482,  1.50132514])

In [110]:
K = np.random.randint(low=2,high=50,size=(2,2))
print(K)

print()
M = np.random.randint(low=2,high=50,size=(2,2))
print(M)

[[ 6 26]
 [25  6]]

[[26 31]
 [15 38]]


In [111]:
np.vstack((K,M)) # join vertically

array([[ 6, 26],
       [25,  6],
       [26, 31],
       [15, 38]])

In [112]:
np.hstack((K,M)) # join horizontally

array([[ 6, 26, 26, 31],
       [25,  6, 15, 38]])

In [113]:
np.concatenate([K, M], axis = 0)

array([[ 6, 26],
       [25,  6],
       [26, 31],
       [15, 38]])

In [114]:
np.concatenate([K, M.T], axis = 1)

array([[ 6, 26, 26, 15],
       [25,  6, 31, 38]])

In [115]:
np.concatenate([K, M], axis = 1)

array([[ 6, 26, 26, 31],
       [25,  6, 15, 38]])