### NumPy

* Numeric Array/Linear Algebra Library for Python. Almost all of the libraries in the NumFOCUS and PyData ecosystems rely on NumPy
* Will be directly useful in specific situations where dealing with number arrays require more efficiency, but other libraries try to use it underneath anyway
* Crucial building block in supporting **vectorized** (or numerical array-based) operations
* Sentiments about **loops being evil** have merit but may be overboard
* Libraries are loaded using the import command


In [1]:
# import numpy as np

import numpy as np


### Create NumPy Arrays

#### List Conversion

In [5]:
# Define the list
my_list = [10, 20, 30, 40]
print(type(my_list))

# Call np.array()
my_arr = np.array(my_list)
print(my_arr)
print(type(my_arr))

# Dump in interactive mode
my_arr


<class 'list'>
[10 20 30 40]
<class 'numpy.ndarray'>


array([10, 20, 30, 40])

#### 2D List Conversion

In [9]:
# define 2D List
my_list = [[1,2,3],[4,5,6],[7,8,9]]
print(my_list)


# define "matrix"
my_mat = np.array(my_list)
print(my_mat)
print(type(my_mat))




[[1, 2, 3], [4, 5, 6], [7, 8, 9]]
[[1 2 3]
 [4 5 6]
 [7 8 9]]
<class 'numpy.ndarray'>


#### Built-In methods

In [14]:
# arange
# Return evenly spaced values within a given interval
# np.arange(start*, end, interval*)
# like slicing, end is exclusive

print(np.arange(20))
print(type(np.arange(20)))
print(np.arange(1,10))
print(np.arange(0,11,2))




[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
<class 'numpy.ndarray'>
[1 2 3 4 5 6 7 8 9]
[ 0  2  4  6  8 10]


In [19]:
# zeroes
# generates arrays of zeros

print(np.zeros(3))
print("------")
print(np.zeros((3,3)))
print("------")
print(np.zeros((3,1)))
print("------")
print(np.zeros((1,3)))



[0. 0. 0.]
------
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
------
[[0.]
 [0.]
 [0.]]
------
[[0. 0. 0.]]


In [20]:
# ones
# generates arrays of ones

print(np.ones(3))
print("------")
print(np.ones((3,3)))
print("------")
print(np.ones((3,1)))
print("------")
print(np.ones((1,3)))





[1. 1. 1.]
------
[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]
------
[[1.]
 [1.]
 [1.]]
------
[[1. 1. 1.]]


In [22]:
# eyes
# generates the nxn identity matrix

print(np.eye(3))
print(np.eye(4))
print(np.eye(5))





[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]
[[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]


In [23]:
# linspace
# Return evenly spaced numbers over a specified interval
# np.linspace(start, end, interval)
# UNLIKE slicing, end is INCLUSIVE

#[0 5 10]

print(np.linspace(0,10,3))

[ 0.  5. 10.]


#### Random

Numpy also has lots of ways to create random number arrays:

* `random.rand`
* `random.randn`
* `random.randint`
* `random.normal`

Difference between **rand** and **randn**


![image.png](attachment:image.png)

![image.png](attachment:image.png)

In [26]:
# rand
# Creates an array of the given shape and populate it with random samples from a uniform distribution over [0, 1]

# np.random.rand(shape*)
print(np.random.rand(3))
print(np.random.rand(3).dtype)
print(np.random.rand(5,5))

 

[0.86789489 0.54455636 0.92727726]
float64
[[0.38378656 0.6682259  0.75821928 0.95721734 0.15333678]
 [0.33959076 0.87645783 0.62139393 0.72798137 0.34943297]
 [0.50234037 0.75553065 0.39267838 0.50742306 0.87954221]
 [0.53886156 0.57999522 0.97412751 0.64321603 0.95888794]
 [0.2790987  0.29966339 0.14237336 0.49631157 0.10202688]]


In [27]:
# randn
# Return samples from the "standard normal" distribution, aka a Gaussian distribution
# syntax: np.random.randn(shape*)
print(np.random.randn(3))
print(np.random.randn(3).dtype)
print(np.random.randn(5,5))




[-1.60072266  1.38059442  0.09832857]
float64
[[-1.65051022  1.57884778  0.89649864 -1.83029839 -1.34298422]
 [-0.08484189  1.04589264 -0.59597191  1.23407458 -0.44224625]
 [-0.8460611  -1.58364314 -0.06456501  2.20751566  0.15551448]
 [ 0.55775199  0.24500007  0.12397148 -0.91456691  0.67481038]
 [-0.84975208 -0.44807009 -0.29928673 -0.13615446 -1.17116931]]


In [30]:
# normal
# syntax: np.random.normal(loc=mean,scale=stddev,size=shape)

print(np.random.normal(165,5))
print("------")
print(np.random.normal(165,5,10))
print("------")
print(np.random.normal(165,5,(10,2)))
print("------")



175.50859954108725
------
[171.08950069 158.55615494 164.92581958 171.78579065 165.17927732
 166.73587703 157.49090041 169.52086866 161.28734857 159.49874841]
------
[[175.06515137 159.78094372]
 [167.05042534 168.9318581 ]
 [163.42566882 166.49590493]
 [174.24471753 171.52257726]
 [166.16924579 167.04809853]
 [173.43758896 158.63733383]
 [170.64178343 166.32442755]
 [169.94578011 164.31348363]
 [173.65951623 158.802661  ]
 [164.0940315  162.1265979 ]]
------


### Useful NumPy Commands

**attributes**  
shape  
dtype  

**Methods**  
reshape  
argmax  
argmin  


In [40]:
## 2D Indexing and Slicing
#Accessing or indexing elements in the 2d array may be done in various ways:

# use my_mat
print(my_mat)

#index a row:
#arr[<row>]
print("------")
print(my_mat[0])
print(my_mat[2])


#index an individual element:
#arr[<row>, <col>]  # arr[2, 3]
print("------")
print(my_mat[1,1])

#2D array slicing
#arr[<rstart:rend>, <cstart:cend>]
print("------")
print(my_mat[0:1,1:2])
print("------")
print(my_mat[0:1,1:])
print("------")
print(my_mat[0:,1:])








[[1 2 3]
 [4 5 6]
 [7 8 9]]
------
[1 2 3]
[7 8 9]
------
5
------
[[2]]
------
[[2 3]]
------
[[2 3]
 [5 6]
 [8 9]]


### Combining comparison operators on square brackets can select/filter arrays



In [44]:
# define new array

arr = np.arange(1,6)

print(arr)
print(arr > 2)
print( (arr > 2).dtype )
print("------")
print(arr[arr > 2])




[1 2 3 4 5]
[False False  True  True  True]
bool
------
[3 4 5]


### Simple Use Case: Batch Grading

Let's simulate a scoring distribution of 200 students with mean of 85 and a standard deviation of 3.

In [51]:
# import numpy as np
import numpy as np

# generate random scores following a normal distribution
raw_scores = np.random.normal(85,3,200)
print(raw_scores)
# round scores
raw_scores = raw_scores.round()
print(raw_scores)

# check datatype
print(raw_scores.dtype)

# convert to int

raw_scores = raw_scores.astype(int)
print(raw_scores)
print(raw_scores.dtype)
# inspect new array


[82.68510974 80.63029406 84.64958986 85.39728292 90.65959639 84.64062098
 88.2604224  84.21430688 88.99333789 86.03664348 82.86760199 83.76576065
 89.17557207 81.0505369  89.35130346 82.69143974 82.01020001 84.31409935
 84.98938598 88.2435959  86.73756626 86.75745984 81.34922362 89.36810131
 75.52005984 85.52495449 87.57272052 85.53256275 84.19877864 91.02317007
 90.8469755  85.63624989 88.0280989  81.22793549 87.4187403  83.45128021
 86.21243635 78.7725303  83.85724118 83.01243008 85.4649477  80.47502666
 84.96289929 83.3400078  87.16938087 86.27863557 88.51622093 88.477571
 83.69912148 79.89310946 86.74964422 89.40374601 85.7120405  84.32940673
 84.90455851 88.65695288 86.1344579  81.16458251 79.69239417 82.71138795
 83.1651849  81.56492308 80.69532949 87.50592366 83.09900778 91.50023567
 87.62556084 80.55832703 87.03537531 82.48849537 92.01878165 88.30008748
 81.55100482 89.05823783 82.68695389 80.70770602 85.65036588 83.60461352
 89.32035888 81.01800095 82.66126878 88.58691327 87.4

### Next, determine who passed or failed

In [54]:
# Pass or fail?

passing_score = 60
print(raw_scores >= passing_score)

[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  T

#### Translate to letter grade equivalent

In [58]:
# feel free to copy function definition from Canvas page and paste here
# or type from scratch if you prefer
def get_letter_grade(grade):
    if grade >=92 and grade <= 100:
        return("A")
    elif grade >=86 and grade <= 91:
        return("B+")
    elif grade >=80 and grade <= 85:
        return("B")
    elif grade >=74 and grade <= 79:
        return("C+")
    elif grade >=67 and grade <= 73:
        return("C")
    elif grade >=60 and grade <= 66:
        return("D")
    elif grade >=0 and grade <= 59:
        return("F")

# test
print(get_letter_grade(60))

D


#### vectorize with a function

In [67]:
# vectorize a function call

print(raw_scores)
vfunc = np.vectorize(get_letter_grade)
print(vfunc(raw_scores))
print(raw_scores[50:60])
print(vfunc(raw_scores[50:60]))

a_scores = raw_scores[raw_scores >= 92]
print(a_scores)
print(vfunc(a_scores))

[83 81 85 85 91 85 88 84 89 86 83 84 89 81 89 83 82 84 85 88 87 87 81 89
 76 86 88 86 84 91 91 86 88 81 87 83 86 79 84 83 85 80 85 83 87 86 89 88
 84 80 87 89 86 84 85 89 86 81 80 83 83 82 81 88 83 92 88 81 87 82 92 88
 82 89 83 81 86 84 89 81 83 89 87 76 85 85 81 88 87 82 84 85 82 84 85 85
 81 85 91 83 90 81 85 88 82 87 85 86 86 80 85 84 82 86 86 87 84 79 89 88
 83 85 91 80 82 85 88 86 88 89 86 84 90 84 93 84 85 90 84 86 86 84 81 83
 89 80 86 90 82 86 89 88 85 82 81 89 85 86 82 87 75 80 83 85 90 81 90 87
 82 84 86 88 89 85 85 85 86 84 79 87 85 86 87 87 89 90 81 86 87 88 84 86
 78 82 85 87 84 84 86 84]
['B' 'B' 'B' 'B' 'B+' 'B' 'B+' 'B' 'B+' 'B+' 'B' 'B' 'B+' 'B' 'B+' 'B' 'B'
 'B' 'B' 'B+' 'B+' 'B+' 'B' 'B+' 'C+' 'B+' 'B+' 'B+' 'B' 'B+' 'B+' 'B+'
 'B+' 'B' 'B+' 'B' 'B+' 'C+' 'B' 'B' 'B' 'B' 'B' 'B' 'B+' 'B+' 'B+' 'B+'
 'B' 'B' 'B+' 'B+' 'B+' 'B' 'B' 'B+' 'B+' 'B' 'B' 'B' 'B' 'B' 'B' 'B+' 'B'
 'A' 'B+' 'B' 'B+' 'B' 'A' 'B+' 'B' 'B+' 'B' 'B' 'B+' 'B' 'B+' 'B' 'B'
 'B+' 'B+' 'C+' 'B' 'B' 

#### Additional Array Math

In [70]:
# Get the average grade
print("Average Score: ", raw_scores.mean())


# Get highest score
print("Highest Score: ", raw_scores.max())


# Get lowest score
print("Highest Score: ", raw_scores.min())



Average Score:  85.08
Highest Score:  93
Highest Score:  75


### Miscellaneous Queries

In [76]:
# Find all scores greater than or equal to 92
print(raw_scores[raw_scores >= 92])

# Count scores greater than or equal to 92
print(len(raw_scores[raw_scores >= 92]))

# Find all scores less than or equal to 85
print("------")
print(raw_scores[raw_scores <= 85])


# Count scores less than or equal to 85
print("------")
print(len(raw_scores[raw_scores <= 85]))


# What percentage of students scored less than or equal to 85?
scorecount = len(raw_scores[raw_scores <= 85])
allcount = len(raw_scores)
print("------")
print("Percentage of students who scored less than or equal to 85: ",(scorecount/allcount)*100,"%")


[92 92 93]
3
------
[83 81 85 85 85 84 83 84 81 83 82 84 85 81 76 84 81 83 79 84 83 85 80 85
 83 84 80 84 85 81 80 83 83 82 81 83 81 82 82 83 81 84 81 83 76 85 85 81
 82 84 85 82 84 85 85 81 85 83 81 85 82 85 80 85 84 82 84 79 83 85 80 82
 85 84 84 84 85 84 84 81 83 80 82 85 82 81 85 82 75 80 83 85 81 82 84 85
 85 85 84 79 85 81 84 78 82 85 84 84 84]
------
109
------
Percentage of students who scored less than or equal to 85:  54.50000000000001 %


In [77]:
109/200

0.545