In [2]:
# References:
# Quickstart tutorial: https://docs.scipy.org/doc/numpy/user/quickstart.html
# Docs - Numpy Reference: https://docs.scipy.org/doc/numpy/reference/index.html
# Stanford CS Class (Justin Johnson): http://cs231n.github.io/python-numpy-tutorial/
# Datacamp cheat sheet: https://s3.amazonaws.com/assets.datacamp.com/blog_assets/Numpy_Python_Cheat_Sheet.pdf

In [3]:
# Imports
import random as rand
import numpy as np
np.__version__

'1.17.3'

In [8]:
# NumPy
# Array object - homogenous (same type of element) multidimensional array (table)
# Sophisticated functions
# Linear algebra

In [9]:
# Life without arrays
rand.seed(22)
names = ['Krista', 'Jose', 'Arun', 'Marek', 'Joe']
heights = [rand.randint(150,200) for i in names]
ages = [rand.randint(18,80) for i in names]
print(names)
print(heights)
print(ages)

['Krista', 'Jose', 'Arun', 'Marek', 'Joe']
[158, 165, 151, 189, 178]
[29, 62, 25, 65, 59]


In [10]:
# Multiply age by 2
def elementwise_multiplication(my_list, var):
    new = []
    for element in my_list:
        new.append(element*var)
    return new

age_by_two = elementwise_multiplication(ages, 2)
print(age_by_two)   

# With numpy
print(np.array(ages)*2)

[58, 124, 50, 130, 118]
[ 58 124  50 130 118]


In [11]:
# NumPy also facilitates faster operations when large datasets are involved because NumPy uses vectorized operations
# No loops necessary

basic_python = range(10000)
numpy_fun = np.arange(10000)

% timeit elementwise_multiplication(basic_python,500)
% timeit numpy_fun*500

# milliseconds (ms), microseconds (us)

UsageError: Line magic function `%` not found.


In [12]:
# Create an array
a = np.array([1, 2, 3])   # Create a rank 1 array
print(a)
print(type(a)) 

[1 2 3]
<class 'numpy.ndarray'>


In [13]:
# List-like functionality
print(a[0]) # Indexing
a[0] = 100 # Replacing values
print(a)

1
[100   2   3]


In [14]:
# iterating over values
for i in a:
    print(i)

100
2
3


In [15]:
print(a.shape)  # View the shape  
print(len(a))

(3,)
3


In [16]:
# Multidimensional array
m = np.array([[1,2,3], [4,5,6]])
print(m)
print()

print(m.shape) # (rows, columns) 

[[1 2 3]
 [4 5 6]]

(2, 3)


In [17]:
# Creating different types of multidimensional arrays
a = np.zeros((2,2))   # Create an array of all zeros
print(a)                   

[[0. 0.]
 [0. 0.]]


In [18]:
b = np.ones((2,2))    # Create an array of all ones
print(b)  
c = np.full((2,2), 7)  # Create a constant array
print(c)          

[[1. 1.]
 [1. 1.]]
[[7 7]
 [7 7]]


In [19]:
d = np.eye(2,2)         # Create a 2x2 identity matrix
print(d)              
e = np.random.random((2,2))  # Create an array filled with random values
print(e)      

[[1. 0.]
 [0. 1.]]
[[0.66529586 0.34973539]
 [0.96205144 0.7486022 ]]


In [20]:
# Creating 1 dimensional arrays
f = np.arange(10) # Creates 1D array - numbers 0 to 10
print(f)
g = np.arange(2,11,2) # (start, end, step)
print(g)
h = np.repeat(22,5) # Create 1D array of 22s 
print(h)

[0 1 2 3 4 5 6 7 8 9]
[ 2  4  6  8 10]
[22 22 22 22 22]


In [21]:
i = np.linspace(0, 2, 3) # 3 numbers from 0 to 2, evenly spaced
print(i)

[0. 1. 2.]


In [22]:
# Viewing an array
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
print(a) 
print()
print(a.shape) #(rows, columns) - axes
print()
print(a.ndim) # Dimensions = length of shape 
print()
print(a.size) # Number of elements in array (product of shape)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]

(3, 4)

2

12


In [23]:
# A three-dimensional array
three_d = np.array([[[1],[2],[3],[4]],[[5],[6],[7],[8]]])
print(three_d)
print(three_d.shape)
print(three_d.ndim)

[[[1]
  [2]
  [3]
  [4]]

 [[5]
  [6]
  [7]
  [8]]]
(2, 4, 1)
3


In [24]:
# Indexing multidimensional arrays
A = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
print(A)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [25]:
# Get first two columns of first row
print(A[0, :2]) # [index rows, index columns]
print()

[1 2]



In [26]:
# Get first two columns of first two rows
print(A[:2, :2])
print()

[[1 2]
 [5 6]]



In [27]:
# Get last column 
print(A[:,-1])

[ 4  8 12]


In [28]:
# Last row
print(a[-1,:])
print(a[-1]) # Assumes all columns

[ 9 10 11 12]
[ 9 10 11 12]


In [29]:
# Iterating over a multidimensional array is row-based
for row in a:
    print(row)
# (use a.flat to iterate over each element)

[1 2 3 4]
[5 6 7 8]
[ 9 10 11 12]


In [30]:
### Exercise ###
# Create a 3x3 identity matrix
# Guess the size of the 3x3 matrix and then verify your guess
i3 = np.eye(3,3) 
print(i3.size)

9


In [40]:
# Identify two different ways to get a one-dimensional array with the values [1,0,0]
# Check that both of the arrays you created are in fact one dimensional
print(i3[:,0])
print(i3[0,:])
print((np.array([1,0,0])).ndim)

[1. 0. 0.]
[1. 0. 0.]
1


In [41]:
# Create an array using the heights and ages lists defined at the beginning 
# of this notebook, call it "my_array"
# Guess the shape and dimensions of the array, check using numpy
# Select Arun's information only
my_array = np.array([heights,ages])
print(my_array)
arun_index=names.index('Arun')
print(my_array[:,arun_index])
for i in range(len(names)):
    if names[i] == 'Arun':
        print(my_array[:,i])

[[158 165 151 189 178]
 [ 29  62  25  65  59]]
[151  25]
[151  25]


In [68]:
# Boolean indexing
bool_id=a>6
print(bool_id)
print()

print(a[bool_id])
print()
print(a[a>6])

[[False False False False]
 [False False  True  True]
 [ True  True  True  True]]

[ 7  8  9 10 11 12]

[ 7  8  9 10 11 12]


In [42]:
# Datatypes 
print(A.dtype)

int32


In [108]:
float_list = [1.,2.]
print(np.array(float_list).dtype)

print(np.array(float_list, dtype='int32')) # Force a certain type
print(np.array(float_list, dtype='int32').dtype) # Force a certain type

float64
[1 2]
int32


In [109]:
print(np.array(['a','b','c']).dtype) # Check type

print(np.array([1.,'a', 2])) # Unklike lists, the elements of a NumPy array must be the same type

<U1
['1.0' 'a' '2']


In [51]:
# Exercise:
# Using "my_array" defined in the last exercise
# Select only ages where height is above 160 cm
# Print the current datatype
# Convert the datatype to float (hint: astype())
my_array[1,my_array[0]>160]

array([62, 65, 59])

In [112]:
# Arithmetic operators - apply elementwise!
A = np.array([[4,6],[8,10]], dtype=np.float64)
B = np.array([[1,1],[2,2]], dtype=np.float64)

print(A)
print(B)
print()

print(A - B)  # Elementwise subtraction
print(np.subtract(A,B)) 
print()

print(A + B) # np.add()

[[ 4.  6.]
 [ 8. 10.]]
[[1. 1.]
 [2. 2.]]

[[3. 5.]
 [6. 8.]]
[[3. 5.]
 [6. 8.]]

[[ 5.  7.]
 [10. 12.]]


In [113]:
print(A * B) # np.multiply() (broadcasting)
print()

print(A / B) #np.divide() 
print()


[[ 4.  6.]
 [16. 20.]]

[[4. 6.]
 [4. 5.]]

[[2. 3.]
 [4. 5.]]



In [114]:
print(A/2)
print()

print(A*2)

[[2. 3.]
 [4. 5.]]

[[ 8. 12.]
 [16. 20.]]


More on broadcasting...

The basics:
* Two matrices can be broadcast (multiplied, added, divided elementwise) if they have compatible dimensions
* Dimensions are compatible if they are equal or if one of them is 1
* The arrays do not have to have the same number of dimensions (example below)

Learn more here:
* https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html

* http://cs231n.github.io/python-numpy-tutorial/#numpy-broadcasting


In [117]:
print(A)
one_d = np.array([1,2])
print(one_d.shape)

print(A * one_d)

[[ 4.  6.]
 [ 8. 10.]]
(2,)
[[ 4. 12.]
 [ 8. 20.]]


In [119]:
len_4 = np.arange(4)
len_5 = np.arange(5)

print(len_4, len_5)

[0 1 2 3] [0 1 2 3 4]


In [125]:
len_4 + len_5

ValueError: operands could not be broadcast together with shapes (4,) (5,) 

In [122]:
four_one = len_4.reshape(4, 1)
print(four_one)
print(four_one.shape)

[[0]
 [1]
 [2]
 [3]]
(4, 1)


In [126]:
print(four_one + len_5)

[[0 1 2 3 4]
 [1 2 3 4 5]
 [2 3 4 5 6]
 [3 4 5 6 7]]


In [127]:
print(np.sqrt(A))
print()

print(np.sin(A))

[[2.         2.44948974]
 [2.82842712 3.16227766]]

[[-0.7568025  -0.2794155 ]
 [ 0.98935825 -0.54402111]]


In [128]:
print(np.log(A))
print()

print(np.exp(np.log(A)))
print()


[[1.38629436 1.79175947]
 [2.07944154 2.30258509]]

[[ 4.  6.]
 [ 8. 10.]]



In [74]:
# Aggregation functions
print(A)
print()

print(A.sum()) # sums all elements together
print(np.sum(A))

print(np.sum(A, axis=0)) # sums down each column
print()

print(np.mean(A))
print()

print(np.round(np.percentile(A,.25),2))

[[ 4.  6.]
 [ 8. 10.]]

28.0
28.0
[12. 16.]

7.0

4.02


In [64]:
# Exercise:
# Using "my_array"
# Tell me the mean of both heights and ages in one line of code
# Tell me the square root of Arun's age and height
# Subract 1 from each height and 2 from each age
# Find the log of all heights and ages
print(my_array)
print(np.mean(my_array, axis=1))
arun_index=names.index('Arun')
print(np.sqrt(my_array[:,arun_index]))
print(np.array([my_array[0]-1, my_array[1]-2]))
np.log(my_array)

[[158 165 151 189 178]
 [ 29  62  25  65  59]]
[168.2  48. ]
[12.28820573  5.        ]
[[157 164 150 188 177]
 [ 27  60  23  63  57]]


array([[5.06259503, 5.10594547, 5.01727984, 5.24174702, 5.18178355],
       [3.36729583, 4.12713439, 3.21887582, 4.17438727, 4.07753744]])

In [79]:
# Matrix multiplication
a = np.array([1,2])
b = np.array([2,10])
print(np.dot(a,b)) # Dot product of two vectors
print(a.dot(b))

22
22


In [80]:
print(A)
print(B)
print()
print(A.dot(B)) # Matrix product
print(A @ B)  # Note difference from A*B

[[ 4.  6.]
 [ 8. 10.]]
[[1. 1.]
 [2. 2.]]

[[16. 16.]
 [28. 28.]]
[[16. 16.]
 [28. 28.]]


In [81]:
a.dot(B)

array([5., 5.])

In [82]:
# Exercise:
# Verify the above by hand

In [83]:
A

array([[ 4.,  6.],
       [ 8., 10.]])

In [84]:
# Reshaping arrays
A.T # Transpose rows->columns, columns->rows

array([[ 4.,  8.],
       [ 6., 10.]])

In [85]:
A.ravel() # Flatten

array([ 4.,  6.,  8., 10.])

In [86]:
# Reshape the array to have one column and four rows
A.reshape(4,1) 

array([[ 4.],
       [ 6.],
       [ 8.],
       [10.]])

In [87]:
print(A) # original array is not modified

[[ 4.  6.]
 [ 8. 10.]]


In [88]:
# Convert back to list(s)
A.tolist() 

[[4.0, 6.0], [8.0, 10.0]]

In [89]:
# Sorting
a=np.array([3,1,5,2,4])
sort_i=np.argsort(a)
print(sort_i)
print(a[sort_i])

[1 3 0 4 2]
[1 2 3 4 5]


In [86]:
# Exercise
# Sort "my_array" by height
# Create a new array that has the information for each person (height, age) in rows instead of columns
# Select Arun's information from the new array

my_array[:,np.argsort(my_array[0])]
new_arr = my_array.T
arun_index=names.index('Arun')
new_arr[arun_index,:]

array([151,  25])

In [91]:
# Create a deep copy of a NumPy array
new_array = my_array.copy()
new_array.shape

(2, 5)

In [92]:
# Demo of modifying slices
TEST = np.full((3,3),7)
print(TEST)
slice_test = TEST[0]
slice_test[0]=50
print()
print(TEST)
print()
print(slice_test)

[[7 7 7]
 [7 7 7]
 [7 7 7]]

[[50  7  7]
 [ 7  7  7]
 [ 7  7  7]]

[50  7  7]


In [93]:
# Exercise:
# After creating a copy of my_array, alter the new array
# Check that my_array hasn't changed and that the new array has
