NumPy (or Numpy) is a Linear Algebra Library for Python, the reason it is so important for Data Science 
with Python is that almost all of the libraries in the PyData Ecosystem rely on NumPy as one of their main 
building blocks.

In [3]:
import numpy as np

Numpy has many built-in functions and capabilities. We won't cover them all but instead we will focus on some of 
the most important aspects of Numpy: vectors, arrays, matrices, and number generation. 
Let's start by discussing arrays.

NumPy arrays are the main way we will use Numpy. Numpy arrays essentially come in two flavors: 
vectors and matrices. Vectors are strictly 1-d arrays and matrices are 2-d (but you should note a matrix can still have only one row or one column).
We can create an array by directly converting a list or list of lists

In [7]:
my_list = [1, 2, 3]
print(my_list)

# converting list to array (1-d array)
np.array(my_list)

my_matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
print(my_matrix)

# now we have 3x3 matrix array (2d array)
np.array(my_matrix)

[1, 2, 3]
[[1, 2, 3], [4, 5, 6], [7, 8, 9]]


array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [8]:
'''
Built-in Methods
There are lots of built-in ways to generate Arrays.
arange : Return evenly spaced values within a given interval.
'''
# similar to range() method of python
print(np.arange(0, 10))
# with two steps
print(np.arange(0, 11, 2))

[0 1 2 3 4 5 6 7 8 9]
[ 0  2  4  6  8 10]


In [14]:
# Generate arrays of zeros 
print(np.zeros(3))
# 5x5 matrix array
print("\n" ,np.zeros((5, 5)))

[0. 0. 0.]

 [[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]


In [15]:
print(np.ones(3))
print("\n" ,np.ones((3, 3)))

[1. 1. 1.]

 [[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


In [16]:
# linspace : Return evenly spaced numbers over a specified interval.
# here we want 3 evenly spaced numbers between 0 and 10
print(np.linspace(0, 10, 3))
print(np.linspace(0, 10, 50))

[ 0.  5. 10.]
[ 0.          0.20408163  0.40816327  0.6122449   0.81632653  1.02040816
  1.2244898   1.42857143  1.63265306  1.83673469  2.04081633  2.24489796
  2.44897959  2.65306122  2.85714286  3.06122449  3.26530612  3.46938776
  3.67346939  3.87755102  4.08163265  4.28571429  4.48979592  4.69387755
  4.89795918  5.10204082  5.30612245  5.51020408  5.71428571  5.91836735
  6.12244898  6.32653061  6.53061224  6.73469388  6.93877551  7.14285714
  7.34693878  7.55102041  7.75510204  7.95918367  8.16326531  8.36734694
  8.57142857  8.7755102   8.97959184  9.18367347  9.3877551   9.59183673
  9.79591837 10.        ]


In [17]:
# eye : Creates an identity matrix
np.eye(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [19]:
# Random : Numpy also has lots of ways to create random number arrays
# rand : Create an array of the given shape and populate it with random samples from a uniform distribution over [0, 1)
print(np.random.rand(2))
print(np.random.rand(5,5))

[0.62210313 0.78363364]
[[0.1175341  0.29519089 0.36774883 0.11544592 0.94785798]
 [0.70868902 0.74466778 0.38549443 0.17103113 0.69463217]
 [0.76818586 0.53730692 0.85944874 0.2345959  0.7129761 ]
 [0.46052787 0.89516573 0.62474162 0.38002253 0.35480015]
 [0.49173879 0.52105468 0.06692618 0.86422183 0.47563516]]


In [37]:
# randn : Return a sample (or samples) from the "standard normal distribution". Unlike rand which is uniform
# randn() can be both positive and negative
print(np.random.randn(2))
print(np.random.randn(5, 5))

[-0.00612352 -1.31462001]
[[ 1.26738818 -1.13132519 -0.63867349 -1.30524209 -0.73852453]
 [-0.78224532  0.30444662  0.26584211 -1.49547954 -1.03836932]
 [-0.08742542 -1.06850143 -2.03951743 -0.49023537  0.30926613]
 [ 1.85707113  0.11752059  0.96562222  0.58070417 -1.19692949]
 [ 0.47782467  0.73928491  1.22183759 -1.69620299  1.09290026]]


In [None]:
# randint : Return random integers from low (inclusive) to high (exclusive).
print(np.random.randint(1, 100))
print(np.random.randint(1, 100))
print(np.random.randint(1, 100, 10))  # return 10 numbers between 1 and 100, including 1

43
21
[17 75 54  2 58 11 34 62 28 18]


In [67]:
# an array from 0 to 25
arr = np.arange(25)
print(arr)

ranarr = np.random.randint(1, 100, 10)
print(ranarr)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24]
[24 36 13 30  1 27 17 20 50 38]


In [54]:
# Reshape : Returns an array containing the same data with a new shape.
# turn arr into a 5x5 matrix array
arr.reshape(5, 5)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [68]:
# max, min, argmax, argmin
# These are useful methods for finding max or min values. Or to find their index locations using argmin or argmax
print(ranarr)
print(ranarr.max())
# location of max element (start from zero)
print(ranarr.argmax())

print(ranarr.min())
print(ranarr.argmin())

[24 36 13 30  1 27 17 20 50 38]
50
8
1
4


In [74]:
# shape is an attribute that arrays have
print(arr.shape)
# notice the two sets of brackets
print(arr.reshape(1, 25))
print(arr.reshape(25,1))
print(arr.reshape(25, 1).shape)

(25,)
[[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
  24]]
[[ 0]
 [ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]
 [12]
 [13]
 [14]
 [15]
 [16]
 [17]
 [18]
 [19]
 [20]
 [21]
 [22]
 [23]
 [24]]
(25, 1)


In [75]:
# dtype : grab the datatype of the array
arr.dtype

dtype('int32')

In [79]:
# numpy Indexing and Selection
arr = np.arange(0, 11)
print(arr)
# bracket indexing and selection
# get a value at index 8
print(arr[8])
# get a value in a range (5 is not included)
print(arr[1:5])
print(arr[0:5])

[ 0  1  2  3  4  5  6  7  8  9 10]
8
[1 2 3 4]
[0 1 2 3 4]


In [81]:
# Broadcasting : numpy arrays differ from normal python lists because of their ability to broadcast
# Setting a value with index range (Broadcasting) sets every element from 0 to 5 to be 100
arr[0:5]=100
print(arr)

[100 100 100 100 100   5   6   7   8   9  10]


In [86]:
arr = np.arange(0, 11)
slice_of_arr = arr[0:6]
print(slice_of_arr)

slice_of_arr[:] = 99
print(slice_of_arr)
# note that the changes also occur in the original array
print(arr)

[0 1 2 3 4 5]
[99 99 99 99 99 99]
[99 99 99 99 99 99  6  7  8  9 10]


In [87]:
# normally data is not copied, it just points to original array, to avoid this, use copy
arr_copy = arr.copy()
arr_copy

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [95]:
# Indexing a 2D array (matrix)
# The general format is arr_2d[row][col] or arr_2d[row, col]
arr_2d = np.array([[5,10,15],[20,25,30],[35,40,45]])
print(arr_2d)
# indexing row
print(arr_2d[1])
# grtting individual element value
print(arr_2d[1][2])
print(arr_2d[1,2])

# 2d slicing, shape (2,2) from top right corner
print(arr_2d[:2, 1:])
# shape bottom row
arr_2d[2]

[[ 5 10 15]
 [20 25 30]
 [35 40 45]]
[20 25 30]
30
30
[[10 15]
 [25 30]]


array([35, 40, 45])

In [110]:
# Fancy indexing : allows you to select entire rows or columns out of order

# setup a matrix
arr = np.zeros((10,10))
print(arr)
# length of array
print(arr.shape)
# coloumn length
print(arr.shape[1])
# setup array
for i in range(arr.shape[1]):
    # every line in this array will have same number as its row number => we are using broadcasting here
    arr[i] = i

print(arr)

# fancy indexing : give user the rows that all this elements are 2,4,...
print(arr[[2,4,6,8]])
# allows in any order
print(arr[[5, 0, 3]])

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
(10, 10)
10
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]
 [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]
 [4. 4. 4. 4. 4. 4. 4. 4. 4. 4.]
 [5. 5. 5. 5. 5. 5. 5. 5. 5. 5.]
 [6. 6. 6. 6. 6. 6. 6. 6. 6. 6.]
 [7. 7. 7. 7. 7. 7. 7. 7. 7. 7.]
 [8. 8. 8. 8. 8. 8. 8. 8. 8. 8.]
 [9. 9. 9. 9. 9. 9. 9. 9. 9. 9.]]
[[2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]
 [4. 4. 4. 4. 4. 4. 4. 4. 4. 4.]
 [6. 6. 6. 6. 6. 6. 6. 6. 6. 6.]
 [8. 8. 8. 8. 8. 8. 8. 8. 8. 8.]]
[[5. 5. 5. 5. 5. 5. 5. 5. 5. 5.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]


In [111]:
# selection
# lets go over how to use brackets for selection based off of comparison operators
arr = np.arange(1, 11)
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [114]:
print(arr > 4)

bool_arr = arr > 4
print(bool_arr)
# return all values that have True value (are bigger than 4)
print(arr[bool_arr])

[False False False False  True  True  True  True  True  True]
[False False False False  True  True  True  True  True  True]
[ 5  6  7  8  9 10]


In [117]:
print(arr[arr>2])

x = 3
print(arr[arr > x])

[ 3  4  5  6  7  8  9 10]
[ 4  5  6  7  8  9 10]


In [125]:
# Numpy Operations
# Arithmetic : You can easily perform array with array arithmetic, or scalar with array arithmetic.
arr = np.arange(0, 10)
print(arr)
print(arr + arr)
print(arr * arr)
print(arr - arr)
# warning on division by zero, it will replace 0/0 by nan value
print(arr / arr)
# 1/0 is infinite
print(1 / arr)
# to the power of 3
print(arr**3)

[0 1 2 3 4 5 6 7 8 9]
[ 0  2  4  6  8 10 12 14 16 18]
[ 0  1  4  9 16 25 36 49 64 81]
[0 0 0 0 0 0 0 0 0 0]
[nan  1.  1.  1.  1.  1.  1.  1.  1.  1.]
[       inf 1.         0.5        0.33333333 0.25       0.2
 0.16666667 0.14285714 0.125      0.11111111]
[  0   1   8  27  64 125 216 343 512 729]


  print(arr / arr)
  print(1 / arr)


In [129]:
# Universal Array Functions
# Numpy comes with many universal array functions, which are essentially just mathematical operations you can use 
# to perform the operation across the array.
print(np.sqrt(arr)) # square root
print(np.exp(arr)) # exponential (e^)
print(np.max(arr)) # same as arr.max()
print(np.sin(arr))
print(np.log(arr)) # logarithm to the power of 2

[0.         1.         1.41421356 1.73205081 2.         2.23606798
 2.44948974 2.64575131 2.82842712 3.        ]
[1.00000000e+00 2.71828183e+00 7.38905610e+00 2.00855369e+01
 5.45981500e+01 1.48413159e+02 4.03428793e+02 1.09663316e+03
 2.98095799e+03 8.10308393e+03]
9
[ 0.          0.84147098  0.90929743  0.14112001 -0.7568025  -0.95892427
 -0.2794155   0.6569866   0.98935825  0.41211849]
[      -inf 0.         0.69314718 1.09861229 1.38629436 1.60943791
 1.79175947 1.94591015 2.07944154 2.19722458]


  print(np.log(arr)) # logarithm to the power of 2


In [130]:
# create an array of 10 fives
x = np.ones(10)
x[:] = 5
x

array([5., 5., 5., 5., 5., 5., 5., 5., 5., 5.])

In [131]:
# create an array of the integers from 10 to 50
np.arange(10, 51)

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
       27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
       44, 45, 46, 47, 48, 49, 50])

In [133]:
# create an array of all even integers from 10 to 50
print(np.arange(10, 51, 2))
print(np.arange(0.01, 1.01, 0.01).reshape(10, 10))

[10 12 14 16 18 20 22 24 26 28 30 32 34 36 38 40 42 44 46 48 50]
[[0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 0.1 ]
 [0.11 0.12 0.13 0.14 0.15 0.16 0.17 0.18 0.19 0.2 ]
 [0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29 0.3 ]
 [0.31 0.32 0.33 0.34 0.35 0.36 0.37 0.38 0.39 0.4 ]
 [0.41 0.42 0.43 0.44 0.45 0.46 0.47 0.48 0.49 0.5 ]
 [0.51 0.52 0.53 0.54 0.55 0.56 0.57 0.58 0.59 0.6 ]
 [0.61 0.62 0.63 0.64 0.65 0.66 0.67 0.68 0.69 0.7 ]
 [0.71 0.72 0.73 0.74 0.75 0.76 0.77 0.78 0.79 0.8 ]
 [0.81 0.82 0.83 0.84 0.85 0.86 0.87 0.88 0.89 0.9 ]
 [0.91 0.92 0.93 0.94 0.95 0.96 0.97 0.98 0.99 1.  ]]


In [137]:
# create an array of 10 linearly spaced points between 0 and 1
np.linspace(0, 1, 10)

array([0.        , 0.11111111, 0.22222222, 0.33333333, 0.44444444,
       0.55555556, 0.66666667, 0.77777778, 0.88888889, 1.        ])

In [149]:
# .ravel() in NumPy is a method that returns a flattened (1D) view of an array.
a = np.array([[1, 2], [3, 4]])
print(a)
flat = a.ravel()
print(flat)

[[1 2]
 [3 4]]
[1 2 3 4]


In [None]:
# given a 9x9 matrix of sudoku numbers, check if it has the correct formatting?
import numpy as np

np_sudoku = np.random.randint(1, 9, (9,9))
print(np_sudoku)
print(np_sudoku.shape[0])
print(np_sudoku[:, 2])  # third column selected
print(np_sudoku[:, 5])  # sixth column selected

print(np_sudoku[3, :]) # fourth row selected

def sudoku_checker(numpy_arr):
    s = {1,2,3,4,5,6,7,8,9}

    for i in range(numpy_arr.shape[0]):  
        # It loops through each row of the Sudoku board. it should have only 1 to 9
        if set(numpy_arr[i, :]) != s:  
            return False
        
    for j in range(numpy_arr.shape[1]):
        # It loops through each column of the Sudoku board. it should have only 1 to 9
        if set(numpy_arr[:, i]) != s:
            return False
        
    # each 9 values in a 3x3 section of the sudoku matrix should also contain all values 1-9
    x = 0
    while x < 9:
        y = 0
        while y < 9:
            if set(numpy_arr[x:x+3, y:y+3].ravel()) != s:
                return False
            y += 3
        x += 3

sudoku_checker(np_sudoku)

count= 0
while True:
    count += 1
    print(count)
    np_sudoku = np.random.randint(1, 9, (9,9))
    res = sudoku_checker(np_sudoku)
    if res:
        print(np_sudoku)
        break

[[5 1 4 8 3 5 7 8 1]
 [3 4 1 1 6 1 1 6 3]
 [6 8 7 2 3 1 1 1 1]
 [1 8 2 5 2 8 7 1 3]
 [3 2 3 2 1 4 6 5 8]
 [5 8 3 5 6 7 1 4 4]
 [8 1 4 1 2 7 8 7 1]
 [1 3 5 5 7 5 5 1 5]
 [4 4 4 1 3 7 7 7 4]]
9
[4 1 7 2 3 3 4 5 4]
[5 1 1 8 4 7 7 5 7]
[1 8 2 5 2 8 7 1 3]


False

In [166]:
# How do I create an empty array/matrix in NumPy?
'''
You have the wrong mental model for using NumPy efficiently. NumPy arrays are stored in contiguous blocks of memory. If you want to add rows or columns to an existing array, 
the entire array needs to be copied to a new block of memory, creating gaps for the new elements to be stored. This is very inefficient if done repeatedly to build an array.
In the case of adding rows, your best bet is to create an array that is as big as your data set will eventually be, and then assign data to it row-by-row:
'''

a = np.zeros(shape=(5,2))
print(a)
# add data to array
a[0] = [1, 2]
a[1] = [2, 3]

print(a)

[[0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]]
[[1. 2.]
 [2. 3.]
 [0. 0.]
 [0. 0.]
 [0. 0.]]


In [169]:
# 'numpy.ndarray' object has no attribute 'index'
v = np.random.randn(10)
print(v)
max = np.max(v)
min = np.min(v)
print(max, min)

# this is the part that throws the error
# v.index(maximum, minimum) 

# solution
max_index = np.where(v == max)
min_index = np.where(v == min)
print(max_index, min_index)

[-1.49609598  0.94183558 -1.1795027  -1.52290805 -0.44427213  0.30490193
  1.3279674   0.80825202  0.73217291 -0.66263037]
1.3279674034674398 -1.5229080515304272
(array([6], dtype=int64),) (array([3], dtype=int64),)
