# Week 4 Demo

## Creating 1D Numpy arrays 

In [2]:
import numpy as np

In [20]:
l1 = [1, 2, 3, 4, 5] # These are lists
l2 = [6, 7, 8, 9, 10]
l1 + l2 # lists act differently. They do not inherently allow vectorized operations

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

Some common numpy array creation functions:

In [24]:
array1 = np.array([1, 2, 3])
display(array1)
print(array1.dtype) # Numpy will automatically assign a datatype to an array depending on the input values' type

array([1, 2, 3])

int64


In [30]:
array2 = np.array([1.05, 1, -3])
display(array2)
print(array2.dtype) # Input list had floats, so numpy casts all input values to a float

array([ 1.05,  1.  , -3.  ])

float64


In [40]:
empty = np.array([]) # One way to make an empty array for initialization before a loop, for example
print(empty)

[]


`np.arange()` syntax:

```python
np.arange(start (inclusive), stop (exclusive), step)
```

Notes:
- Only providing one input will automatically start at 0 and go up to (but not including) that input value by ones

`np.linspace()` syntax:

```python
np.linspace(start (inclusive), stop (inclusive), step)
```

Notes:
- Linspace requires at least start and stop inputs
- Will assume 50 equally spaced values between start and stop if step is omitted

In [62]:
np.linspace(1,40, 40) # To get array of values ascending from 1 to 40 at intervals of 1

array([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.,
       14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25., 26.,
       27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39.,
       40.])

In [34]:
array3 = np.arange(0, 10, 3)
display(array3)

array4 = np.arange(20)
display(array4)

array([0, 3, 6, 9])

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [42]:
array5 = np.zeros(4) # Creates numpy array of zeros of size 4 
print(array5)

array6 = np.ones(4) # Creates numpy array of ones of size 4
print(array6) 

array7 = np.zeros_like(array6) # Creates numpy array of zeros of size array6 (e.g. useful if you want to make a new array
# of the same length of another array that may be super long
print(array7)

[0. 0. 0. 0.]
[1. 1. 1. 1.]
[0. 0. 0. 0.]


In [43]:
x = np.array([1, 2, 3, 4, 5]) # OR x = np.array(l1)
y = np.array([6, 7, 8, 9]) # OR y = np.array[l2]
print(x)

[1 2 3 4 5]


In [8]:
concat = np.concatenate((x, y)) # The same as adding two lists with +
print(concat)

[1 2 3 4 5 6 7 8 9]


## Vectorized Operations - Strength of Numpy

# POLLEV 1:
## What will happen when I run this block?

In [9]:
x + y

ValueError: operands could not be broadcast together with shapes (5,) (4,) 

In [10]:
print(x.size)
print(y.shape) # Two ways to figure out the amount of elements in an array

5
(4,)


In [72]:
y = np.array([6, 7, 8, 9, 10]) # Make sure our array is same size as the other we are adding 
a = x + y # This is adding the two arrays element-wise (vectorization)!
print(a)

[ 7  9 11 13 15]


### Mapping functions

In [66]:
# Mapping (element wise operations)
print(a*4)
print(a - 3)
print(3*(a**2) + 5/(a**(1/2)))
print(np.exp(a)) # same as e^(a)
print(np.log(a)) # natural log
print(np.sin(a)) # trig function sin

[28 36 44 52 60]
[ 4  6  8 10 12]
[148.88982237 244.66666667 364.50755672 508.38675049 676.29099445]
[1.09663316e+03 8.10308393e+03 5.98741417e+04 4.42413392e+05
 3.26901737e+06]
[1.94591015 2.19722458 2.39789527 2.56494936 2.7080502 ]
[ 0.6569866   0.41211849 -0.99999021  0.42016704  0.65028784]


### *A key difference between python lists and numpy arrays: You do not always need for loops for element-wise operations or stats!*

In [70]:
# Using just lists:
my_list = [-3, 5, -2, 1, -3, -6, 5]

# Want absolute value of list:
new_list = []
for i in my_list:
    if i < 0:
        new_list.append(i*-1)
    else:
        new_list.append(i)

print(new_list)

# VERSUS using numpy:
my_array = np.array(my_list)
new_array = np.abs(my_array) # Just use numpy's absolute value function!
display(new_array)


[3, 5, 2, 1, 3, 6, 5]


array([3, 5, 2, 1, 3, 6, 5])

## Numpy Indexing and Slicing

In [46]:
# Indexing
array = np.arange(0, 101, 10)
print(array)
print(array[3])
print(array[-2])
print(array[2:7:2])

[  0  10  20  30  40  50  60  70  80  90 100]
30
90
[20 40 60]


## Boolean indexing/slicing in Numpy

# PollEV 2
## How many "False" values (if any) will be in the array called mask?

In [80]:
data = np.array([25, 2, 26, 29, 22, 30, 24, 100, 27], dtype = float) # Say there is some temperature data 
# that you know can't be outside a certain range
# You must specify the dtype in the above line to a float because None is incompatible with int types
mask = (data > 20) & (data < 100) # Construct this boolean "mask" with our conditionals 
print(mask)

[ True False  True  True  True  True  True False  True]


In [81]:
print(data[mask]) # Index our numpy array with our boolean mask

[25. 26. 29. 22. 30. 24. 27.]


In [82]:
data[~mask] = None # We want to replace the mask values of False with True to change them to nans
print(data)

[25. nan 26. 29. 22. 30. 24. nan 27.]


# Think Pair Share:
## Why is using "nan" for suspect values better than deleting them outright?

- preserves data integrity (doesn't reduce statistical power, helps with bias)
- propagated automatically through math operations providing nan result so you know there is a problem in your data
- Leaves room for simple interpolation

## Numpy statistics

In [16]:
avg = np.mean(data) # Take mean of 1D Numpy array
print(avg)

nan


In [48]:
avg = np.nanmean(data) # Take mean of 1D Numpy array but skip over NaN values
print(avg.round(3))

26.143


In [18]:
max = np.nanmax(data) # Get maximum value of array while skipping NaNs
min = np.nanmin(data) # Get minimum value of array while skipping NaNs
print('Maximum is ', max)
print('Minimum is ', min)

Maximum is  30.0
Minimum is  22.0


### Full 1D Numpy data process example:

In [49]:
heights = np.array([
    177, 169, 182, 173, 171, 179, 179, 171, 175, 163, 
    180, 179, 179, 164, 173, 175, 177, 176, 181, 167, 
    173, 171, 175, 177, 181, 172, 178, 165, 174, 180, 
    173, 176, 180, 180, 179, 174, 175, 173, 179, 186, 
    175, 172, 175, 164, 176, 170, 174, 175, 178, 176
]) # My data

print(heights.size)

50


In [63]:
people = np.linspace(1, 50, 50) # Person 1 to 50
print(people)

[ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16. 17. 18.
 19. 20. 21. 22. 23. 24. 25. 26. 27. 28. 29. 30. 31. 32. 33. 34. 35. 36.
 37. 38. 39. 40. 41. 42. 43. 44. 45. 46. 47. 48. 49. 50.]


In [65]:
max_height = np.max(heights) # Find the maximum height
print(max_height)

186


# PollEV 3
## If I want to find the **index** of the tallest height from the array heights, what numpy function do I use?

In [54]:
max_index = np.argmax(heights) # Finding index of maximum height
tallest_person = people[max_index] # Indexing 
print(tallest_person) # The tallest person
print(max_index) # The index of the tallest person - notice how it is different than the actual number of the person?
print(heights[max_index]) # Height of tallest person (can also just use heights.max here)

40.0
39
186


In [40]:
# More numpy stats:
print(np.mean(heights)) # Mean
print(np.var(heights)) # Variance
print(np.std(heights)) # Standard Deviation
print(np.median(heights)) # Median
print(np.quantile(heights, 0.25)) # Quantile specified by decimal
print(np.sum(heights)) # Sum

# Plus more!

174.92
23.433600000000002
4.840826375733796
175.0
173.0
8746


In [42]:
unorg = np.array([10, 12, 6, 1, 4, 100, 14, 169, 18, 65])
print(np.sort(unorg)) # Order an array by ascending numbers

[  1   4   6  10  12  14  18  65 100 169]
