# Week 4 Demo

## Creating 1D Numpy arrays 

In [None]:
import numpy as np

In [None]:
l1 = [1, 2, 3, 4, 5] # These are lists
l2 = [6, 7, 8, 9, 10]
l1 + l2 # lists act differently. They do not inherently allow vectorized operations

Some common numpy array creation functions:

In [None]:
array1 = np.array([1, 2, 3])
display(array1)
print(array1.dtype) # Numpy will automatically assign a datatype to an array depending on the input values' type

In [None]:
array2 = np.array([1.05, 1, -3])
display(array2)
print(array2.dtype) # Input list had floats, so numpy casts all input values to a float

In [None]:
empty = np.array([]) # One way to make an empty array for initialization before a loop, for example
print(empty)

`np.arange()` syntax:

```python
np.arange(start (inclusive), stop (exclusive), step)
```

Notes:
- Only providing one input will automatically start at 0 and go up to (but not including) that input value by ones

`np.linspace()` syntax:

```python
np.linspace(start (inclusive), stop (inclusive), step)
```

Notes:
- Linspace requires at least start and stop inputs
- Will assume 50 equally spaced values between start and stop if step is omitted

In [None]:
np.linspace(1,40, 40) # To get array of values ascending from 1 to 40 at intervals of 1

In [None]:
array3 = np.arange(0, 10, 3)
display(array3)

array4 = np.arange(20)
display(array4)

In [None]:
array5 = np.zeros(4) # Creates numpy array of zeros of size 4 
print(array5)

array6 = np.ones(4) # Creates numpy array of ones of size 4
print(array6) 

array7 = np.zeros_like(array6) # Creates numpy array of zeros of size array6 (e.g. useful if you want to make a new array
# of the same length of another array that may be super long
print(array7)

In [None]:
x = np.array([1, 2, 3, 4, 5]) # OR x = np.array(l1)
y = np.array([6, 7, 8, 9]) # OR y = np.array[l2]
print(x)

In [None]:
concat = np.concatenate((x, y)) # The same as adding two lists with +
print(concat)

## Vectorized Operations - Strength of Numpy

# POLLEV 1:
## What will happen when I run this block?

```python
x + y
```

In [None]:
print(x.size)
print(y.shape) # Two ways to figure out the amount of elements in an array

In [None]:
y = np.array([6, 7, 8, 9, 10]) # Make sure our array is same size as the other we are adding 
a = x + y # This is adding the two arrays element-wise (vectorization)!
print(a)

### Mapping functions

In [None]:
# Mapping (element wise operations)
print(a*4)
print(a - 3)
print(3*(a**2) + 5/(a**(1/2)))
print(np.exp(a)) # same as e^(a)
print(np.log(a)) # natural log
print(np.sin(a)) # trig function sin

### *A key difference between python lists and numpy arrays: You do not always need for loops for element-wise operations or stats!*

In [None]:
# Using just lists:
my_list = [-3, 5, -2, 1, -3, -6, 5]

# Want absolute value of list:
new_list = []
for i in my_list:
    if i < 0:
        new_list.append(i*-1)
    else:
        new_list.append(i)

print(new_list)

# VERSUS using numpy:
my_array = np.array(my_list)
new_array = np.abs(my_array) # Just use numpy's absolute value function!
display(new_array)


## Numpy Indexing and Slicing

In [None]:
# Indexing
array = np.arange(0, 101, 10)
print(array)
print(array[3])
print(array[-2])
print(array[2:7:2])

## Boolean indexing/slicing in Numpy

# PollEV 2
## How many "False" values (if any) will be in the array called mask?

In [None]:
data = np.array([25, 2, 26, 29, 22, 30, 24, 100, 27], dtype = float) # Say there is some temperature data 
# that you know can't be outside a certain range
# You must specify the dtype in the above line to a float because None is incompatible with int types
mask = (data > 20) & (data < 100) # Construct this boolean "mask" with our conditionals 

In [None]:
print(data[mask]) # Index our numpy array with our boolean mask

In [None]:
data[~mask] = None # We want to replace the mask values of False with True to change them to nans
print(data)

# Think Pair Share:
## Why is using "nan" for suspect values better than deleting them outright?

## Numpy statistics

In [None]:
avg = np.mean(data) # Take mean of 1D Numpy array
print(avg)

In [None]:
avg = np.nanmean(data) # Take mean of 1D Numpy array but skip over NaN values
print(avg.round(3))

In [None]:
max = np.nanmax(data) # Get maximum value of array while skipping NaNs
min = np.nanmin(data) # Get minimum value of array while skipping NaNs
print('Maximum is ', max)
print('Minimum is ', min)

### Full 1D Numpy data process example:

In [None]:
heights = np.array([
    177, 169, 182, 173, 171, 179, 179, 171, 175, 163, 
    180, 179, 179, 164, 173, 175, 177, 176, 181, 167, 
    173, 171, 175, 177, 181, 172, 178, 165, 174, 180, 
    173, 176, 180, 180, 179, 174, 175, 173, 179, 186, 
    175, 172, 175, 164, 176, 170, 174, 175, 178, 176
]) # My data

print(heights.size)

In [None]:
people = np.linspace(1, 50, 50) # Person 1 to 50
print(people)

In [None]:
max_height = np.max(heights) # Find the maximum height
print(max_height)

# PollEV 3
## If I want to find the **index** of the tallest height from the array heights, what numpy function do I use?

In [None]:
# Finding index of maximum height
tallest_person = people[max_index] # Indexing 
print(tallest_person) # The tallest person
print(max_index) # The index of the tallest person - notice how it is different than the actual number of the person?
print(heights[max_index]) # Height of tallest person (can also just use heights.max here)

In [None]:
# More numpy stats:
print(np.mean(heights)) # Mean
print(np.var(heights)) # Variance
print(np.std(heights)) # Standard Deviation
print(np.median(heights)) # Median
print(np.quantile(heights, 0.25)) # Quantile specified by decimal
print(np.sum(heights)) # Sum

# Plus more!

In [None]:
unorg = np.array([10, 12, 6, 1, 4, 100, 14, 169, 18, 65])
print(np.sort(unorg)) # Order an array by ascending numbers