In [1]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

## Joins

**Consider the `total_infected` and the `total_deaths` tables in the code cell below. The `total_infected` table
lists Wheaton's county (DuPage) with it's surrounding counties, and the total of their population who have or had COVID-19. The `total_deaths`
table also provides the total number of deaths from COVID-19 in each county.**

*Note: These values were gathered on Feb 16, 2021 from coronavirus.jhu.edu*

In [2]:
# Run this cell to load the table
total_infected = Table().with_columns(
    'County', make_array('Cook','DuPage',  'Kane', 'Kendall'),
    'total infections', make_array(466422, 74991,  49669, 10793)
)
total_deaths = Table().with_columns(
    'county', make_array('DuPage', 'Will','Cook',  'Kane'),
    'total deaths', make_array(1156, 873, 9159,  700)
)

In [3]:
# Run this cell to show the table
total_infected

County,total infections
Cook,466422
DuPage,74991
Kane,49669
Kendall,10793


In [4]:
# Run this cell to show the table
total_deaths

county,total deaths
DuPage,1156
Will,873
Cook,9159
Kane,700


In [8]:
# Use join to make a Table with columns 'county', total infections' and 'total deaths'
# Did we lose any information?
covid19 = total_infected.join('County', total_deaths, 'county')
covid19

County,total infections,total deaths
Cook,466422,9159
DuPage,74991,1156
Kane,49669,700


In [10]:
covid19.with_column(
    'Death Rate', covid19.column("total deaths") / covid19.column("total infections"))

County,total infections,total deaths,Death Rate
Cook,466422,9159,0.0196367
DuPage,74991,1156,0.0154152
Kane,49669,700,0.0140933


## Comparison ##

In [11]:
3 > 1

True

In [12]:
type(3 > 1)

bool

In [13]:
True

True

In [14]:
true

NameError: name 'true' is not defined

In [17]:
four = 3

In [18]:
four

3

In [21]:
3 == 3.000000

True

In [22]:
10 != 2

True

In [23]:
x = 14
y = 3

In [27]:
x > 9

True

In [25]:
12 < x

True

In [28]:
x < 20

True

In [31]:
12 < x < 17

True

In [32]:
10 < x-y < 13

True

In [33]:
x-y

11

In [36]:
x > 15 and y > 3.14159

False

## Comparisons with arrays

In [37]:
pets = make_array('cat', 'cat', 'dog', 'cat', 'dog', 'rabbit')

In [38]:
pets == 'cat'

array([ True,  True, False,  True, False, False])

In [39]:
1 + 1 + 0 + 1 + 0 + 0

3

In [41]:
sum(make_array(True, True, False, True, False, False))

3

In [43]:
sum(pets == 'dog')

2

In [44]:
np.count_nonzero(pets == 'dog')

2

In [46]:
x = np.arange(20, 31)
x

array([20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30])

In [47]:
x > 28

array([False, False, False, False, False, False, False, False, False,
        True,  True])

## Simulation

Let's play a game: we each roll a die. 

If my number is bigger: you pay me a dollar.

If they're the same: we do nothing.

If your number is bigger: I pay you a dollar.

Steps:
1. Find a way to simulate two dice rolls.
2. Compute how much money we win/lose based on the result.
3. Do steps 1 and 2 10,000 times.

### Conditional Statements

In [None]:
# Work in progress
def one_round(my_roll, your_roll):
    if my_roll > your_roll:
        return 1

In [None]:
one_round(4, 3)

In [None]:
one_round(2, 6)

In [None]:
# Final correct version
def one_round(my_roll, your_roll):
    if my_roll > your_roll:
        return 1
    elif your_roll > my_roll:
        return -1
    elif your_roll == my_roll:
        return 0

In [None]:
one_round(1, 1)

In [None]:
one_round(6, 5)

In [None]:
one_round(7, -1)

### Random Selection

In [None]:
mornings = make_array('wake up', 'sleep in')

In [None]:
np.random.choice(mornings)

In [None]:
np.random.choice(mornings)

In [None]:
np.random.choice(mornings)

In [None]:
np.random.choice(mornings, 7)

In [None]:
sum(np.random.choice(mornings, 7) == 'wake up')

In [None]:
sum(np.random.choice(mornings, 7) == 'sleep in')

In [None]:
morning_week = np.random.choice(mornings, 7)
morning_week

In [None]:
sum(morning_week == 'wake up')

In [None]:
sum(morning_week == 'sleep in')

In [None]:
die_faces = np.arange(1, 7)

In [None]:
np.random.choice(die_faces)

In [None]:
def simulate_one_round():
    my_roll = np.random.choice(die_faces)
    your_roll = np.random.choice(die_faces)
    return one_round(my_roll, your_roll)

In [None]:
simulate_one_round()

### Appending Arrays

In [None]:
first = np.arange(4)
second = np.arange(10, 17)

In [None]:
np.append(first, 6)

In [None]:
first

In [None]:
np.append(first, second)

In [None]:
first

In [None]:
second

### Repeated Betting ###

In [None]:
results = make_array()

In [None]:
results = np.append(results, simulate_one_round())
results

## `For` Statements

In [None]:
for pet in make_array('cat', 'dog', 'rabbit'):
    print('I love my ' + pet)

In [None]:
pet = make_array('cat', 'dog', 'rabbit').item(0)
print('I love my ' + pet)

pet = make_array('cat', 'dog', 'rabbit').item(1)
print('I love my ' + pet)

pet = make_array('cat', 'dog', 'rabbit').item(2)
print('I love my ' + pet)

In [None]:
game_outcomes = make_array()

for i in np.arange(5):
    game_outcomes = np.append(game_outcomes, simulate_one_round())
    
game_outcomes

In [None]:
game_outcomes = make_array()

for i in np.arange(10000):
    game_outcomes = np.append(game_outcomes, simulate_one_round())
    
game_outcomes

In [None]:
len(game_outcomes)

In [None]:
results = Table().with_column('My winnings', game_outcomes)

In [None]:
results

In [None]:
results.group('My winnings').barh('My winnings')

In [None]:
# Bonus question: This simulation is relatively simple. 
# Can you find a way to run it without using a for loop?

### Another example: simulating heads in 100 coin tosses

In [None]:
coin = make_array('heads', 'tails')

In [None]:
sum(np.random.choice(coin, 100) == 'heads')

In [None]:
# Simulate one outcome

def num_heads():
    return sum(np.random.choice(coin, 100) == 'heads')

In [None]:
# Decide how many times you want to repeat the experiment

repetitions = 10000

In [None]:
# Simulate that many outcomes

outcomes = make_array()

for i in np.arange(repetitions):
    outcomes = np.append(outcomes, num_heads())

In [None]:
heads = Table().with_column('Heads', outcomes)
heads.hist(bins = np.arange(29.5, 70.6))

### Optional: Bay Area Bike Share

In [None]:
trip = Table.read_table('trip.csv')
trip

In [None]:
commute = trip.where('Duration', are.below(1800))
commute.hist('Duration')

In [None]:
commute.hist('Duration', bins=60, unit='second')

In [None]:
# Percent of people who have a ride duration between 500 and 250 seconds
(500-250) * 0.15 

In [None]:
starts = commute.group('Start Station').sort('count', descending=True)
starts

In [None]:
commute.pivot('Start Station', 'End Station')

In [None]:
duration = trip.select('Start Station', 'End Station', 'Duration')
duration

## Optional: Advanced `where` ##

In [None]:
ages = make_array(16, 22, 18, 15, 19, 15, 16, 21)
age = Table().with_column('Age', ages)

In [None]:
age

In [None]:
age.where('Age', are.above_or_equal_to(18))

In [None]:
voter = ages >= 18

In [None]:
voter

In [None]:
age.where(voter)

In [None]:
is_voter = are.above_or_equal_to(18)

In [None]:
type(is_voter)

In [None]:
is_voter(22)

In [None]:
is_voter(3)

In [None]:
age.apply(is_voter, 'Age')

In [None]:
ages >= 18

In [None]:
voter

In [None]:
def my_voter_function(x):
    return x >= 18

In [None]:
age.where('Age', are.above_or_equal_to(18))

In [None]:
age.where(voter)

In [None]:
age.where('Age', my_voter_function)