In [None]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

## Bikes

The bike datasets in this section is big, and could lead to kernel crashes.

In [None]:
trip = Table.read_table('trip.csv')
trip

In [None]:
commute = trip.where('Duration', are.below(1800))
commute.hist('Duration')

In [None]:
commute.hist('Duration', bins=60, unit='second')

In [None]:
# Percent of people who have a ride duration between 500 and 250 seconds
(500-250) * 0.15 

In [None]:
starts = commute.group('Start Station').sort('count', descending=True)
starts

In [None]:
commute.pivot('Start Station', 'End Station')

In [None]:
duration = trip.select('Start Station', 'End Station', 'Duration')
duration

In [None]:
shortest = duration.group(['Start Station', 'End Station'], min).relabeled('Duration min', 'Minimum Duration')
shortest

In [None]:
from_cc = shortest.where('Start Station', are.containing('Civic Center BART')).sort('Minimum Duration')
from_cc

## Maps, again

In [None]:
stations = Table.read_table('station.csv')
stations

In [None]:
Marker.map_table(stations.select('lat', 'long', 'name'))

In [None]:
sf = stations.where('landmark', 'San Francisco')
Circle.map_table(sf.select('lat', 'long', 'name'), color='green', radius=15)

## Extra practice

### Write a Python expression below each of the following descriptions that computes its value. You may use more than one line.

In [None]:
# The name of the station where the most rentals ended (assume no ties).


In [None]:
# The number of stations for which the average duration ending at that station was more than 10 minutes.


In [None]:
# The number of stations that have more than 500 starts AND more than 500 ends

In [None]:
# The name of the station where the most rentals ended (assume no ties).
# First, find end counts
# Then, find the station with the highest end count
trip.group('End').sort('count', descending=True).column(0).item(0)

In [None]:
# The number of stations for which the average duration ending at that station was more than 10 minutes.

# First, find the average end time for each station
# Then, keep the ones above 10 minutes
# Then, count them
trip.group('End', np.average).where(2, are.above(10*60)).num_rows

In [None]:
# The number of stations that have more than 500 starts AND more than 500 ends.

# First, group the trip on starts and ends, separately
# Then, join the two grouped tables
# Then, count the number of rows where the stations' start and end counts are bove 500
start_counts, end_counts = trip.group('Start').relabeled('count', 'Start Count'), trip.group('End').relabeled('count', 'End Count')
start_counts.join('Start', end_counts, 'End').where(
    'Start Count', are.above(500)).where('End Count', are.above(500)).num_rows

## Comparison ##

In [None]:
3 > 1

In [None]:
type(3 > 1)

In [None]:
True

In [None]:
true

In [None]:
3 = 3

In [None]:
3 == 3.0

In [None]:
10 != 2

In [None]:
x = 14
y = 3

In [None]:
x > 15

In [None]:
12 < x

In [None]:
x < 20

In [None]:
12 < x < 20

In [None]:
10 < x-y < 13

In [None]:
x > 13 and y < 3.14159

## Comparisons with arrays

In [None]:
pets = make_array('cat', 'cat', 'dog', 'cat', 'dog', 'rabbit')

In [None]:
pets == 'cat'

In [None]:
1 + 1 + 0 + 1 + 0 + 0

In [None]:
def equalToRabbit(animal):
    return animal == 'rabbit'
equalToRabbit(pets)

In [None]:
animals = Table().with_column('Type',pets)

In [None]:
sum(make_array(True, True, False, True, False, False))

In [None]:
sum(pets == 'dog')

In [None]:
np.count_nonzero(pets == 'dog')

In [None]:
x = np.arange(20, 31)

In [None]:
x > 28


In [None]:
animals.where(animals.apply(equalToRabbit, 'Type'))

In [None]:
np.append(pets, pets)


In [None]:
pets

## Simulation

Let's play a game: we each roll a die. 

If my number is bigger: you pay me a dollar.

If they're the same: we do nothing.

If your number is bigger: I pay you a dollar.

Steps:
1. Find a way to simulate two dice rolls.
2. Compute how much money we win/lose based on the result.
3. Do steps 1 and 2 10,000 times.

### Random Selection

In [None]:
mornings = make_array('wake up', 'sleep in')

In [None]:
np.random.choice(mornings)

In [None]:
np.random.choice(mornings)

In [None]:
np.random.choice(mornings)

In [None]:
np.random.choice(mornings, 7)

In [None]:
sum(np.random.choice(mornings, 7) == 'wake up')

In [None]:
sum(np.random.choice(mornings, 7) == 'sleep in')

In [None]:
morning_week = np.random.choice(mornings, 7)
morning_week

In [None]:
sum(morning_week == 'wake up')

In [None]:
sum(morning_week == 'sleep in')

In [None]:
die_faces = np.arange(1, 7)
die_faces

In [None]:
np.random.choice(die_faces)

### Conditional Statements

In [None]:
# Work in progress
def one_round(my_roll, your_roll):
    if my_roll > your_roll:
        return 1

In [None]:
one_round(4, 3)

In [None]:
one_round(2, 6)

In [None]:
# Final correct version
def one_round(my_roll, your_roll):
    if my_roll > your_roll:
        return 1
    elif your_roll > my_roll:
        return -1
    elif your_roll == my_roll:
        return 0

In [None]:
one_round(1, 1)

In [None]:
one_round(6, 5)

In [None]:
one_round(7, -1)

In [None]:
def simulate_one_round():
    my_roll = np.random.choice(die_faces)
    your_roll = np.random.choice(die_faces)
    return one_round(my_roll, your_roll)

In [None]:
simulate_one_round()

### Repeated Betting ###

In [None]:
results = make_array()
results

In [None]:
results = np.append(results, simulate_one_round())
results

## `For` Statements

In [None]:
for pet in make_array('cat', 'dog', 'rabbit'):
    print('I love my ' + pet)

In [None]:
pet = make_array('cat', 'dog', 'rabbit').item(0)
print('I love my ' + pet)

pet = make_array('cat', 'dog', 'rabbit').item(1)
print('I love my ' + pet)

pet = make_array('cat', 'dog', 'rabbit').item(2)
print('I love my ' + pet)

In [None]:
game_outcomes = make_array()

for i in np.arange(5):
    game_outcomes = np.append(game_outcomes, simulate_one_round())
    
game_outcomes

In [None]:
game_outcomes = make_array()

for i in np.arange(10000):
    game_outcomes = np.append(game_outcomes, simulate_one_round())
    
game_outcomes

In [None]:
len(game_outcomes)

In [None]:
results = Table().with_column('My winnings', game_outcomes)

In [None]:
results

In [None]:
results.group('My winnings').barh('My winnings')

### Another example: simulating heads in 100 coin tosses

In [None]:
coin = make_array('heads', 'tails')

In [None]:
sum(np.random.choice(coin, 100) == 'heads')

In [None]:
# Simulate one outcome

def num_heads():
    return sum(np.random.choice(coin, 100) == 'heads')

In [None]:
# Decide how many times you want to repeat the experiment

repetitions = 10000

In [None]:
# Simulate that many outcomes

outcomes = make_array()

for i in np.arange(repetitions):
    outcomes = np.append(outcomes, num_heads())

In [None]:
outcomes

In [None]:
heads = Table().with_column('Heads', outcomes)
heads.hist(bins = np.arange(25, 75))

## Optional: Advanced `where` ##

In [None]:
ages = make_array(16, 22, 18, 15, 19, 15, 16, 21)
age = Table().with_column('Age', ages)

In [None]:
age

In [None]:
age.where('Age', are.above_or_equal_to(18))

In [None]:
voter = ages >= 18

In [None]:
voter

In [None]:
age.where(voter)

In [None]:
is_voter = are.above_or_equal_to(18)

In [None]:
type(is_voter)

In [None]:
is_voter(22)

In [None]:
is_voter(3)

In [None]:
age.apply(is_voter, 'Age')

In [None]:
ages >= 18

In [None]:
voter

In [None]:
def my_voter_function(x):
    return x >= 18

In [None]:
age.where('Age', are.above_or_equal_to(18))

In [None]:
age.where(voter)

In [None]:
age.where('Age', my_voter_function)

## Probability

In [None]:
one23 = 3/6
one23

## Monty Hall

In [None]:
doors = make_array('car', 'first goat', 'second goat')

In [None]:
goats = make_array('first goat', 'second goat')

def other_goat(a_goat):
    if a_goat == 'first goat':
        return 'second goat'
    elif a_goat == 'second goat':
        return 'first goat'


In [None]:
other_goat('first goat')

In [None]:
other_goat('wheeler hall')

In [None]:
def monty_hall():
    
    contestant_choice = np.random.choice(doors)
    
    if contestant_choice == 'first goat':
        monty_choice = 'second goat'
        remaining_door = 'car'
        
    elif contestant_choice == 'second goat':
        monty_choice = 'first goat'
        remaining_door = 'car'
        
    elif contestant_choice == 'car':
        monty_choice = np.random.choice(goats)
        remaining_door = other_goat(monty_choice)
        
    return [contestant_choice, monty_choice, remaining_door]

In [None]:
monty_hall()

In [None]:
games = Table(['Guess', 'Revealed', 'Remaining'])

In [None]:
games.append(monty_hall())

In [None]:
games = Table(['Guess', 'Revealed', 'Remaining'])
for i in range(3000):
    games.append(monty_hall())
    
games

In [None]:
games.group('Remaining')

In [None]:
games.group('Guess')

## Probability ##

In [None]:
# Discussion Question (a):
# P(Mo and Jo both appear)

In [None]:
# Discussion Question (b)
# P(neither Mo nor Jo appears)
