# Simulation Exercises

1. How likely is it that you roll doubles when rolling two dice?

In [1]:
import numpy as np
import pandas as pd

In [2]:
n_trials = nrows = 10
n_dice = ncols = 2

rolls = np.random.choice([1, 2, 3, 4, 5, 6], n_trials * n_dice).reshape(nrows, ncols)
rolls

array([[2, 2],
       [1, 5],
       [5, 6],
       [1, 1],
       [6, 5],
       [3, 4],
       [2, 4],
       [6, 1],
       [5, 5],
       [3, 4]])

In [3]:
def doubles_trial(rolls):
    return np.array([roll[0] == roll[1] for roll in rolls])
doubles_bool = doubles_trial(rolls)
doubles_bool

array([ True, False, False,  True, False, False, False, False,  True,
       False])

In [4]:
doubles_rate = doubles_bool.mean()
doubles_rate

0.3

2. If you flip 8 coins, what is the probability of getting exactly 3 heads? What is the probability of getting more than 3 heads?

In [5]:
n_trials = nrows = 10000
n_coins = ncols = 8

flips = np.random.choice([0, 1], (n_trials, n_coins))
flips

array([[1, 1, 0, ..., 1, 1, 1],
       [0, 1, 0, ..., 1, 1, 0],
       [0, 0, 1, ..., 0, 1, 1],
       ...,
       [1, 1, 0, ..., 0, 1, 1],
       [0, 0, 0, ..., 1, 0, 1],
       [0, 1, 1, ..., 0, 0, 0]])

In [7]:
heads_bool = flips == 1
heads_bool
df_heads = pd.DataFrame(heads_bool)
df_heads

Unnamed: 0,0,1,2,3,4,5,6,7
0,True,True,False,True,False,True,True,True
1,False,True,False,False,True,True,True,False
2,False,False,True,False,True,False,True,True
3,False,False,False,False,True,False,False,True
4,True,True,True,False,False,False,True,True
...,...,...,...,...,...,...,...,...
9995,True,True,True,False,True,False,True,True
9996,True,True,False,False,False,False,False,False
9997,True,True,False,True,True,False,True,True
9998,False,False,False,True,True,True,False,True


In [8]:
# probability of rolling heads exactly 3 times tossing 8 coins

exactly_three = ((df_heads.sum(axis=1)) == 3).mean()
exactly_three

0.22

In [9]:
# probability of rolling heads 3+ times tossing 8 coins

more_than_three = ((df_heads.sum(axis=1)) > 3).mean()
more_than_three

0.6347

3. There are approximitely 3 web development cohorts for every 1 data science cohort at Codeup. Assuming that Codeup randomly selects an alumni to put on a billboard, what are the odds that the two billboards I drive past both have data science students on them?

In [10]:
n_sims = 100000
n_billboards = 2

drive_by = np.random.choice([1, 0], (n_sims, n_billboards))
drive_by

array([[1, 1],
       [0, 1],
       [0, 1],
       ...,
       [1, 1],
       [0, 1],
       [0, 1]])

In [11]:
ds_df = pd.DataFrame(drive_by)
ds_df

Unnamed: 0,0,1
0,1,1
1,0,1
2,0,1
3,0,0
4,1,1
...,...,...
99995,0,0
99996,0,0
99997,1,1
99998,0,1


In [12]:
ds_df.sum(axis=1) == 2

0         True
1        False
2        False
3        False
4         True
         ...  
99995    False
99996    False
99997     True
99998    False
99999    False
Length: 100000, dtype: bool

4. Codeup students buy, on average, 3 poptart packages with a standard deviation of 1.5 a day from the snack vending machine. If on monday the machine is restocked with 17 poptart packages, how likely is it that I will be able to buy some poptarts on Friday afternoon? (Remember, if you have mean and standard deviation, use the np.random.normal) You'll need to make a judgement call on how to handle some of your values

In [13]:
sims = rows = 10000
days = cols = 5

poptart_sales = np.round(np.random.normal(3, 1.5, (sims, days)))

poptart_prob = (
 (pd.DataFrame(poptart_sales)
  .sum(axis=1) >= 17)
  .mean()
)
poptart_prob

0.3334

5. Compare Heights

- Men have an average height of 178 cm and standard deviation of 8cm.

- Women have a mean of 170, sd = 6cm.

- Since you have means and standard deviations, you can use np.random.normal to generate observations.

- If a man and woman are chosen at random, what is the likelihood the woman is taller than the man?

In [14]:
male_height = np.random.normal(178, 8, 100_000)
female_height = np.random.normal(170, 6, 100_000)

# single dataframe where each row reprsents a female and male height chosen at random
df = pd.DataFrame({"male_height" : male_height,
                   "female_height" : female_height})

# add boolean column showing true if the female height is greater in each row (trial)
df['female_is_taller'] = df.female_height > df.male_height
df

Unnamed: 0,male_height,female_height,female_is_taller
0,170.793371,168.397242,False
1,168.866175,170.468017,True
2,173.297740,173.407018,True
3,185.072551,172.239909,False
4,167.576749,167.530607,False
...,...,...,...
99995,171.381033,169.375220,False
99996,171.133706,163.913797,False
99997,179.499418,179.616171,True
99998,182.577523,169.672224,False


In [15]:
taller_female = df['female_is_taller'].mean()

print(f"The probability of a woman being taller than a man, chosen at random is {round(taller_female*100)}%")

The probability of a woman being taller than a man, chosen at random is 21%


6. When installing anaconda on a student's computer, there's a 1 in 250 chance that the download is corrupted and the installation fails. What are the odds that after having 50 students download anaconda, no one has an installation issue? 100 students?

What is the probability that we observe an installation issue within the first 150 students that download anaconda?

In [16]:
nrows = 100_000
ncols = 50

issue_range = [False for r in range(1,250)]
issue_range.append(True)

installs = np.random.choice(issue_range, nrows * ncols).reshape(nrows, ncols)

df = pd.DataFrame(installs)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [17]:
has_problem = df.apply(lambda row: row.values.sum() == 0, axis=1).mean()
print(f'Odds of 50 students dowloading Anaconda without a problem: {round(has_problem*100)}%')

Odds of 50 students dowloading Anaconda without a problem: 82%


7. There's a 70% chance on any given day that there will be at least one food truck at Travis Park. However, you haven't seen a food truck there in 3 days. How unlikely is this?

How likely is it that a food truck will show up sometime this week?

In [18]:
def food_truck(days):
    nrows = 100_000
    ncols = days
    sightings = np.random.choice([True, False], nrows * ncols, p=[.7,.3]).reshape(nrows, ncols)
    df = pd.DataFrame(sightings)

In [19]:
no_truck = df.apply(lambda row: row.values.sum() == 0, axis=1).mean()
truck = df.apply(lambda row: row.values.sum() > 0, axis=1).mean()

print(f'The odds of not seeing a food truck in Travis Park for {days} days is {round(no_truck*100)}%')
print(f'The odds of seeing at least one food truck in Travis Park in {days} days is {round(truck*100)}%')

The odds of not seeing a food truck in Travis Park for 5 days is 82%
The odds of seeing at least one food truck in Travis Park in 5 days is 18%


8. If 23 people are in the same room, what are the odds that two of them share a birthday? What if it's 20 people? 40?

In [20]:
def shared_birthday(people):

    nrows = 100_000
    ncols = people
    sightings = np.random.choice([r for r in range(366)], nrows * ncols).reshape(nrows, ncols)
    df = pd.DataFrame(sightings)

    df['matching_birthdays'] = df.nunique(axis=1) < ncols


    matching_birthdays = df['matching_birthdays'].mean()
    print(f'The odds of a room of {ncols} people having at least two people that share a birthday are {round(matching_birthdays*100)}%')

In [21]:
num_list = [23, 20, 40]

for num in num_list:
    shared_birthday(num)

The odds of a room of 23 people having at least two people that share a birthday are 51%
The odds of a room of 20 people having at least two people that share a birthday are 41%
The odds of a room of 40 people having at least two people that share a birthday are 89%
