# Simulation Exercises

## How likely is it that you roll doubles when rolling two dice?

In [1]:
import numpy as np
import pandas as pd

In [2]:
ntrials = ncols = 2
nsims = nrows = 100_000

rolls = pd.DataFrame(np.random.choice([1, 2, 3, 4, 5, 6], size=(nsims, ntrials)))
rolls

Unnamed: 0,0,1
0,5,5
1,5,2
2,3,3
3,3,4
4,4,6
...,...,...
99995,5,4
99996,6,3
99997,6,4
99998,4,1


In [3]:
doubles = rolls[0] == rolls[1]
doubles

0         True
1        False
2         True
3        False
4        False
         ...  
99995    False
99996    False
99997    False
99998    False
99999     True
Length: 100000, dtype: bool

In [4]:
P_of_doubles = doubles.mean()
P_of_doubles

0.16683

## If you flip 8 coins, what is the probability of getting exactly 3 heads? What is the probability of getting more than 3 heads?

In [5]:
ntrials = 8
nsims = 100_000

# 1 heads, 0 tails
flips = np.random.choice([0, 1], size=(nsims, ntrials))
flips

array([[0, 1, 0, ..., 0, 0, 1],
       [1, 0, 0, ..., 1, 1, 0],
       [1, 0, 0, ..., 1, 0, 1],
       ...,
       [0, 0, 1, ..., 1, 0, 0],
       [1, 0, 0, ..., 1, 1, 0],
       [0, 1, 0, ..., 1, 0, 1]])

In [6]:
exactly_3_heads = (flips == 1).sum(axis=1) == 3

In [7]:
exactly_3_heads.mean()

0.21961

In [8]:
more_than_3_heads = (flips == 1).sum(axis=1) > 3
more_than_3_heads

array([False,  True,  True, ...,  True,  True,  True])

In [9]:
more_than_3_heads.mean()

0.63703

## There are approximitely 3 web development cohorts for every 1 data science cohort at Codeup. Assuming that Codeup randomly selects an alumni to put on a billboard, what are the odds that the two billboards I drive past both have data science students on them?

In [10]:
ntrials = 1
nsims = 100_000

billboards = np.random.choice(['Web Dev', 'Data Science'], p=[3/4, 1/4], size=(nsims, ntrials))
billboards

array([['Web Dev'],
       ['Web Dev'],
       ['Web Dev'],
       ...,
       ['Web Dev'],
       ['Web Dev'],
       ['Web Dev']], dtype='<U12')

In [11]:
(billboards == 'Data Science').mean()

0.25155

## Codeup students buy, on average, 3 poptart packages (+- 1.5) a day from the snack vending machine. If on monday the machine is restocked with 17 poptart packages, how likely is it that I will be able to buy some poptarts on Friday afternoon?

In [12]:
ntrials = ncols = 5
nsims = nrows = 100_000

poptarts = np.random.normal(3, 1.5, size=(nrows, ncols)).round()
poptarts

array([[ 2., -0.,  4.,  3.,  3.],
       [ 2.,  2.,  4.,  2.,  4.],
       [ 0.,  3.,  2.,  4.,  3.],
       ...,
       [ 3.,  1.,  2., -0.,  2.],
       [ 3.,  4.,  1.,  4.,  2.],
       [ 2.,  1.,  5.,  3.,  4.]])

In [13]:
no_neg_poptarts = np.where(poptarts < 0, 0, poptarts)
no_neg_poptarts

array([[ 2., -0.,  4.,  3.,  3.],
       [ 2.,  2.,  4.,  2.,  4.],
       [ 0.,  3.,  2.,  4.,  3.],
       ...,
       [ 3.,  1.,  2., -0.,  2.],
       [ 3.,  4.,  1.,  4.,  2.],
       [ 2.,  1.,  5.,  3.,  4.]])

In [14]:
has_left_overs = no_neg_poptarts.sum(axis=1) < 17
has_left_overs

array([ True,  True,  True, ...,  True,  True,  True])

In [15]:
has_left_overs.mean()

0.66822

## Compare Heights

* Men have an average height of 178 cm and standard deviation of 8cm.
* Women have a mean of 170, sd = 6cm.
* If a man and woman are chosen at random, P(woman taller than man)?

In [16]:
nsims = 100_000

men = np.random.normal(178, 8, size=nsims)
men

array([162.91607985, 187.66354835, 176.07185036, ..., 186.65164642,
       168.89064504, 181.52628749])

In [17]:
women = np.random.normal(170, 6, size=nsims)
women

array([167.8469225 , 162.82257311, 174.70244701, ..., 173.45106158,
       163.68078151, 169.17918833])

In [18]:
(men < women).mean()

0.2109

## When installing anaconda on a student's computer, there's a 1 in 250 chance that the download is corrupted and the installation fails. What are the odds that after having 50 students download anaconda, no one has an installation issue? 100 students?

In [19]:
def prob_no_anaconda_corruption(students, number_of_sims_to_run):
    ntrials = ncols = students
    nsims = nrows = number_of_sims_to_run

    installs = np.random.choice([0, 1], p=[1/250, 249/250], size=(nrows, ncols))
    
    no_corruptions = installs.sum(axis=1) == students
    
    return no_corruptions.mean()

In [20]:
prob_no_anaconda_corruption(50, 100_000)

0.8185

In [21]:
prob_no_anaconda_corruption(100, 100_000)

0.66997

### What is the probability that we observe an installation issue within the first 150 students that download anaconda?

In [22]:
def prob_anaconda_corruption(students, number_of_sims_to_run):
    ntrials = ncols = students
    nsims = nrows = number_of_sims_to_run

    installs = np.random.choice([0, 1], p=[1/250, 249/250], size=(nrows, ncols))
    
    has_corruptions = installs.sum(axis=1) < students
    
    return has_corruptions.mean()

In [23]:
prob_anaconda_corruption(150, 100_000)

0.45265

### How likely is it that 450 students all download anaconda without an issue?

In [24]:
prob_no_anaconda_corruption(450, 100_000)

0.16761

## There's a 70% chance on any given day that there will be at least one food truck at Travis Park. However, you haven't seen a food truck there in 3 days. How unlikely is this?

In [25]:
ntrials = 3
nsims = 100_000

# 0 is no food trucks, 1 is at least 1 food truck
food_trucks = np.random.choice([0, 1], p=[.3, .7], size=(nsims, ntrials))
food_trucks

array([[1, 0, 0],
       [1, 1, 1],
       [1, 1, 0],
       ...,
       [0, 0, 1],
       [1, 1, 1],
       [1, 1, 1]])

In [26]:
no_food_trucks = food_trucks.sum(axis=1) == 0
no_food_trucks

array([False, False, False, ..., False, False, False])

In [27]:
no_food_trucks.mean()

0.02763

### How likely is it that a food truck will show up sometime this week?

In [28]:
ntrials = 7
nsims = 100_000

food_trucks = np.random.choice([0, 1], p=[.3, .7], size =(nsims, ntrials))
food_trucks

array([[0, 1, 1, ..., 0, 0, 1],
       [1, 0, 0, ..., 0, 1, 1],
       [0, 1, 1, ..., 0, 1, 1],
       ...,
       [1, 1, 1, ..., 1, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       [0, 1, 1, ..., 1, 1, 1]])

In [29]:
at_least_one_food_truck = food_trucks.sum(axis=1) > 0
at_least_one_food_truck

array([ True,  True,  True, ...,  True,  True,  True])

In [30]:
at_least_one_food_truck.mean()

0.99969