# Simulation Exercises

## How likely is it that you roll doubles when rolling two dice?

In [1]:
import numpy as np
import pandas as pd

In [2]:
ntrials = ncols = 2
nsims = nrows = 100_000

rolls = pd.DataFrame(np.random.choice([1, 2, 3, 4, 5, 6], size=(nsims, ntrials)))
rolls

Unnamed: 0,0,1
0,3,4
1,5,1
2,5,3
3,4,5
4,5,4
...,...,...
99995,5,1
99996,6,6
99997,3,6
99998,6,3


In [3]:
doubles = rolls[0] == rolls[1]
doubles

0        False
1        False
2        False
3        False
4        False
         ...  
99995    False
99996     True
99997    False
99998    False
99999    False
Length: 100000, dtype: bool

In [4]:
P_of_doubles = doubles.mean()
P_of_doubles

0.16701

## If you flip 8 coins, what is the probability of getting exactly 3 heads? What is the probability of getting more than 3 heads?

In [5]:
ntrials = 8
nsims = 100_000

# 1 heads, 0 tails
flips = np.random.choice([0, 1], size=(nsims, ntrials))
flips

array([[1, 0, 1, ..., 1, 0, 0],
       [1, 0, 1, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 1, 1],
       ...,
       [0, 0, 0, ..., 1, 0, 1],
       [0, 0, 1, ..., 0, 1, 1],
       [1, 1, 0, ..., 1, 1, 0]])

In [6]:
exactly_3_heads = (flips == 1).sum(axis=1) == 3

In [7]:
exactly_3_heads.mean()

0.2167

In [8]:
more_than_3_heads = (flips == 1).sum(axis=1) > 3
more_than_3_heads

array([ True, False,  True, ..., False,  True,  True])

In [9]:
more_than_3_heads.mean()

0.63735

## There are approximitely 3 web development cohorts for every 1 data science cohort at Codeup. Assuming that Codeup randomly selects an alumni to put on a billboard, what are the odds that the two billboards I drive past both have data science students on them?

In [10]:
ntrials = 1
nsims = 100_000

billboards = np.random.choice(['Web Dev', 'Data Science'], p=[3/4, 1/4], size=(nsims, ntrials))
billboards

array([['Web Dev'],
       ['Web Dev'],
       ['Web Dev'],
       ...,
       ['Web Dev'],
       ['Web Dev'],
       ['Web Dev']], dtype='<U12')

In [11]:
(billboards == 'Data Science').mean()

0.25036

## Codeup students buy, on average, 3 poptart packages (+- 1.5) a day from the snack vending machine. If on monday the machine is restocked with 17 poptart packages, how likely is it that I will be able to buy some poptarts on Friday afternoon?

In [12]:
ntrials = ncols = 5
nsims = nrows = 100_000

poptarts = np.random.normal(3, 1.5, size=(nrows, ncols)).round()
poptarts

array([[ 7.,  5.,  4.,  3.,  2.],
       [ 4.,  4.,  2.,  2.,  4.],
       [ 5.,  2.,  4.,  4.,  1.],
       ...,
       [ 3.,  3.,  1.,  2.,  3.],
       [ 4.,  2.,  5.,  3.,  5.],
       [ 4.,  3.,  2., -0.,  1.]])

In [13]:
no_neg_poptarts = np.where(poptarts < 0, 0, poptarts)
no_neg_poptarts

array([[ 7.,  5.,  4.,  3.,  2.],
       [ 4.,  4.,  2.,  2.,  4.],
       [ 5.,  2.,  4.,  4.,  1.],
       ...,
       [ 3.,  3.,  1.,  2.,  3.],
       [ 4.,  2.,  5.,  3.,  5.],
       [ 4.,  3.,  2., -0.,  1.]])

In [14]:
has_left_overs = no_neg_poptarts.sum(axis=1) < 17
has_left_overs

array([False,  True,  True, ...,  True, False,  True])

In [15]:
has_left_overs.mean()

0.6687

## Compare Heights

* Men have an average height of 178 cm and standard deviation of 8cm.
* Women have a mean of 170, sd = 6cm.
* If a man and woman are chosen at random, P(woman taller than man)?

In [16]:
nsims = 100_000

men = np.random.normal(178, 8, size=nsims)
men

array([176.62950063, 184.47949867, 179.4778153 , ..., 181.2350099 ,
       173.66569496, 178.92639493])

In [17]:
women = np.random.normal(170, 6, size=nsims)
women

array([173.03383669, 164.69157065, 168.93520294, ..., 165.76402269,
       172.82700328, 178.59823777])

In [18]:
(men < women).mean()

0.20924

## When installing anaconda on a student's computer, there's a 1 in 250 chance that the download is corrupted and the installation fails. What are the odds that after having 50 students download anaconda, no one has an installation issue? 100 students?

In [19]:
def prob_no_anaconda_corruption(students, number_of_sims_to_run):
    ntrials = ncols = students
    nsims = nrows = number_of_sims_to_run

    installs = np.random.choice([0, 1], p=[1/250, 249/250], size=(nrows, ncols))
    
    no_corruptions = installs.sum(axis=1) == students
    
    return no_corruptions.mean()

In [20]:
prob_no_anaconda_corruption(50, 100_000)

0.81928

In [21]:
prob_no_anaconda_corruption(100, 100_000)

0.66926

### What is the probability that we observe an installation issue within the first 150 students that download anaconda?

In [22]:
def prob_anaconda_corruption(students, number_of_sims_to_run):
    ntrials = ncols = students
    nsims = nrows = number_of_sims_to_run

    installs = np.random.choice([0, 1], p=[1/250, 249/250], size=(nrows, ncols))
    
    has_corruptions = installs.sum(axis=1) < students
    
    return has_corruptions.mean()

In [23]:
prob_anaconda_corruption(150, 100_000)

0.45365

### How likely is it that 450 students all download anaconda without an issue?

In [24]:
prob_no_anaconda_corruption(450, 100_000)

0.16432

## There's a 70% chance on any given day that there will be at least one food truck at Travis Park. However, you haven't seen a food truck there in 3 days. How unlikely is this?

In [25]:
ntrials = 3
nsims = 100_000

# 0 is no food trucks, 1 is at least 1 food truck
food_trucks = np.random.choice([0, 1], p=[.3, .7], size=(nsims, ntrials))
food_trucks

array([[1, 0, 0],
       [1, 1, 1],
       [0, 1, 0],
       ...,
       [0, 0, 1],
       [1, 1, 1],
       [1, 1, 1]])

In [26]:
no_food_trucks = food_trucks.sum(axis=1) == 0
no_food_trucks

array([False, False, False, ..., False, False, False])

In [27]:
no_food_trucks.mean()

0.02701

### How likely is it that a food truck will show up sometime this week?

In [28]:
ntrials = 7
nsims = 100_000

food_trucks = np.random.choice([0, 1], p=[.3, .7], size =(nsims, ntrials))
food_trucks

array([[1, 1, 1, ..., 1, 1, 1],
       [1, 0, 0, ..., 0, 1, 1],
       [1, 1, 1, ..., 1, 1, 1],
       ...,
       [1, 1, 1, ..., 1, 0, 1],
       [0, 0, 1, ..., 1, 1, 0],
       [0, 0, 1, ..., 1, 1, 1]])

In [29]:
at_least_one_food_truck = food_trucks.sum(axis=1) > 0
at_least_one_food_truck

array([ True,  True,  True, ...,  True,  True,  True])

In [30]:
at_least_one_food_truck.mean()

0.99971

## If 23 people are in the same room, what are the odds that two of them share a birthday? What if it's 20 people? 40?

In [31]:
ntrials = 23
nsims = 100000

birthdays = pd.DataFrame(np.random.uniform(1, 366, size=(nsims, ntrials)).round())
birthdays

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,13,14,15,16,17,18,19,20,21,22
0,200.0,282.0,186.0,264.0,351.0,322.0,171.0,111.0,332.0,227.0,...,215.0,159.0,348.0,23.0,310.0,175.0,343.0,324.0,93.0,1.0
1,11.0,124.0,81.0,271.0,56.0,288.0,51.0,43.0,281.0,240.0,...,308.0,6.0,268.0,89.0,174.0,155.0,240.0,85.0,154.0,159.0
2,62.0,329.0,195.0,26.0,8.0,317.0,347.0,23.0,181.0,238.0,...,52.0,287.0,160.0,255.0,311.0,302.0,281.0,342.0,145.0,197.0
3,204.0,66.0,291.0,199.0,9.0,332.0,201.0,282.0,297.0,138.0,...,80.0,169.0,266.0,268.0,274.0,263.0,96.0,74.0,141.0,287.0
4,62.0,358.0,85.0,174.0,261.0,142.0,343.0,123.0,199.0,336.0,...,95.0,94.0,12.0,206.0,319.0,288.0,139.0,39.0,59.0,50.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,136.0,355.0,282.0,169.0,328.0,104.0,216.0,229.0,73.0,91.0,...,43.0,198.0,42.0,154.0,354.0,131.0,236.0,120.0,173.0,276.0
99996,209.0,222.0,140.0,96.0,182.0,120.0,263.0,12.0,161.0,111.0,...,264.0,285.0,65.0,43.0,271.0,214.0,200.0,311.0,265.0,102.0
99997,262.0,209.0,111.0,56.0,216.0,354.0,206.0,122.0,308.0,71.0,...,147.0,256.0,143.0,180.0,177.0,322.0,109.0,115.0,39.0,330.0
99998,75.0,145.0,288.0,174.0,208.0,361.0,329.0,149.0,153.0,135.0,...,85.0,243.0,237.0,166.0,165.0,24.0,244.0,359.0,330.0,94.0


In [32]:
def birthday_match(one_party):
    return one_party.duplicated().any()

In [33]:
matching_birthdays = birthdays.apply(birthday_match, axis=1)
matching_birthdays

0        False
1         True
2        False
3        False
4         True
         ...  
99995     True
99996    False
99997    False
99998    False
99999     True
Length: 100000, dtype: bool

In [34]:
matching_birthdays.mean()

0.50517

I'm going to make a function that excutes the above so that I can give each answer simply

In [35]:
def prob_of_mathching_birthdays(n_people, n_parties):
    ntrials = n_people
    nsims = n_parties

    birthdays = pd.DataFrame(np.random.uniform(1, 366, size=(nsims, ntrials)).round())
    print(f'Generated {n_parties} parties of {n_people} people')
    print('Checking for matches...')
    
    matching_birthdays = birthdays.apply(birthday_match, axis=1)
    print('Matches determined, calculating probability...')
    
    return matching_birthdays.mean()

In [36]:
prob_of_mathching_birthdays(23, 100000)

Generated 100000 parties of 23 people
Checking for matches...
Matches determined, calculating probability...


0.50823

In [37]:
prob_of_mathching_birthdays(20, 100000)

Generated 100000 parties of 20 people
Checking for matches...
Matches determined, calculating probability...


0.41159

In [38]:
prob_of_mathching_birthdays(40, 100000)

Generated 100000 parties of 40 people
Checking for matches...
Matches determined, calculating probability...


0.89126