## Simulation Exercises

In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import viz # curriculum example visualizations

np.random.seed(29)

### 1. How likely is it that you roll doubles when rolling two dice?

In [None]:
n_trials = nrows = 10000
n_dice = ncols = 2

In [12]:
rolls = np.random.choice((
    [1,2,3,4,5,6]), size = (n_trials, n_dice))
rolls

array([[2, 3],
       [3, 6],
       [5, 4],
       ...,
       [6, 5],
       [3, 2],
       [6, 5]])

In [23]:
rolls[:,0]

array([False, False, False, ..., False, False, False])

In [24]:
rolls[:,1]

array([3, 6, 4, ..., 5, 2, 5])

In [25]:
rolls[:,0] == rolls[:,1]

array([False, False, False, ..., False, False, False])

In [27]:
(rolls[:,0] == rolls[:,1]).mean()

0.1672

#### There is a 16.7% chance we roll doubles

In [17]:
#another method: lambda
pd.DataFrame(rolls).head(25)

Unnamed: 0,0,1
0,2,3
1,3,6
2,5,4
3,2,5
4,5,6
5,1,2
6,1,3
7,3,2
8,5,4
9,5,2


In [30]:
df_rolls = pd.DataFrame(rolls).apply(lambda row: row[0] == row[1], axis=1).head(25)

In [31]:
df_rolls.mean()

0.16

### 2. If you flip 8 coins, what is the probability of getting exactly 3 heads? What is the probability of getting more than 3 heads?

In [53]:
# heads = 1
# tails = 0

coin_toss = [1,0]
flip_times = 8
sim_size = 1000000

In [54]:
coin_8 = np.random.choice(coin_toss, size = (sim_size,flip_times))
coin_8

array([[0, 1, 1, ..., 0, 1, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 1, 1, ..., 1, 1, 0],
       ...,
       [1, 1, 0, ..., 1, 1, 0],
       [0, 1, 1, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 0]])

In [56]:
heads = coin_8.sum(axis=1)
heads

array([4, 2, 6, ..., 6, 4, 1])

In [59]:
three_heads = (heads == 3)
more_than_three_heads = (heads > 3)

In [61]:
three_heads.mean()

0.219407

#### Probability of exactly three heads = 0.219

In [62]:
more_than_three_heads.mean()

0.636582

#### Probability of more than three heads = 0.637

### 3. There are approximitely 3 web development cohorts for every 1 data science cohort at Codeup. Assuming that Codeup randomly selects an alumni to put on a billboard, what are the odds that the two billboards I drive past both have data science students on them?

1 of every 4 cohorts is data science; thus, the probability of choosing a student from a data science cohort is: 1/4 or 0.25

In [64]:
p_data_stu = 0.25
sim_size = 1000000

In [66]:
# 2 billboards, 2 opportunities
drive_past_board = n_cols = 2

drive_by = np.random.random((sim_size,drive_past_board))
drive_by

array([[0.06163575, 0.71201443],
       [0.46544835, 0.62268262],
       [0.70894091, 0.08664236],
       ...,
       [0.76505303, 0.72209678],
       [0.60164862, 0.50423587],
       [0.59772599, 0.09654694]])

In [68]:
is_data_stu = drive_by < p_data_stu
is_data_stu

array([[ True, False],
       [False, False],
       [False,  True],
       ...,
       [False, False],
       [False, False],
       [False,  True]])

In [69]:
(is_data_stu.sum(axis=1) == 2).mean()

0.062057

### 4. Codeup students buy, on average, 3 poptart packages with a standard deviation of 1.5 a day from the snack vending machine. If on monday the machine is restocked with 17 poptart packages, how likely is it that I will be able to buy some poptarts on Friday afternoon? (Remember, if you have mean and standard deviation, use the np.random.normal) You'll need to make a judgement call on how to handle some of your values

In [98]:
mean = 3
std = 1.5
days = 4

In [174]:
poptart = np.random.normal(mean,std,size = (10,4))
poptart

array([[1.82764718, 0.73071624, 0.49969661, 4.14028829],
       [1.68002001, 2.0293688 , 4.85766044, 6.95084311],
       [4.68488967, 3.46679117, 4.25556713, 3.70486259],
       [1.09387601, 5.55793513, 3.09815467, 1.00370535],
       [4.85290494, 3.90992509, 1.91385495, 2.7101591 ],
       [5.12990372, 4.50399241, 3.48772726, 0.55119654],
       [2.95125727, 2.48677164, 4.02217038, 2.48753593],
       [6.69495661, 2.47754689, 3.12269861, 2.56494433],
       [2.56165106, 2.1727189 , 1.05038355, 5.18087073],
       [1.79432636, 4.9055071 , 1.54505794, 0.5919901 ]])

In [176]:
p_poptart = poptart.sum() < 16
p_poptart

False

In [117]:
#p_poptart.mean()

1.0

### 5. Compare Heights
- Men have an average height of 178 cm and standard deviation of 8cm.
- Women have a mean of 170, sd = 6cm.
- Since you have means and standard deviations, you can use np.random.normal to generate observations.
- If a man and woman are chosen at random, what is the likelihood the woman is taller than the man?

In [89]:
m_mean = 178
m_std = 8

w_mean = 170
w_std = 6

sim_size = 1000000

In [103]:
men = np.random.normal(m_mean,m_std,sim_size)
men

array([178.54345216, 171.52793938, 190.49102255, ..., 168.58297498,
       173.36702554, 185.66452862])

In [108]:
men.mean()

177.9949718201851

In [104]:
women = np.random.normal(w_mean,w_std,sim_size)
women

array([177.54671752, 177.78805091, 172.7866646 , ..., 181.01987307,
       185.32967088, 168.09443656])

In [106]:
women.mean()

170.00465226453005

In [110]:
p_w_t_m = women > men
p_w_t_m

array([False,  True, False, ...,  True,  True, False])

In [111]:
p_w_t_m.mean()

0.212355

#### 0.21 probability the woman is taller than the man

### 6. When installing anaconda on a student's computer, there's a 1 in 250 chance that the download is corrupted and the installation fails. 
- What are the odds that after having 50 students download anaconda, no one has an installation issue? 100 students?

- What is the probability that we observe an installation issue within the first 150 students that download anaconda?

- How likely is it that 450 students all download anaconda without an issue?

In [154]:
# fail = 0
# succeed = 1

sof = [1,0]
stu_1 = 50
stu_2 = 100
p_corr = 1/250
sim_size = 1000000

In [163]:
inst50 = np.random.random(size = (sim_size,stu_1))
inst50

array([[0.0237615 , 0.4415125 , 0.13674159, ..., 0.58983684, 0.51258223,
        0.78664985],
       [0.24917423, 0.14948341, 0.54453614, ..., 0.53710996, 0.43927086,
        0.81021402],
       [0.517036  , 0.20995394, 0.0418408 , ..., 0.92140466, 0.72389631,
        0.56217646],
       ...,
       [0.39623767, 0.59637009, 0.78001951, ..., 0.97410949, 0.66018724,
        0.70478004],
       [0.12631373, 0.84680954, 0.15527551, ..., 0.98218419, 0.59275287,
        0.18079772],
       [0.3579006 , 0.6664397 , 0.69335004, ..., 0.60463288, 0.94135258,
        0.03488349]])

In [164]:
p50 = inst50 < p_corr
p50

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [166]:
sum_p50 = p50.sum(axis=1)

In [168]:
(sum_p50 == 0).mean()

0.818104

#### If 50 students, probability is 0.818

In [169]:
inst100 = np.random.random(size = (sim_size,stu_2))
inst100

array([[0.52084839, 0.97385308, 0.04909355, ..., 0.23924993, 0.73535178,
        0.47851349],
       [0.8709363 , 0.61087479, 0.57870395, ..., 0.69030897, 0.39880567,
        0.10426797],
       [0.46756336, 0.80298349, 0.9964031 , ..., 0.94169969, 0.42439175,
        0.0325202 ],
       ...,
       [0.58881683, 0.94354985, 0.19264057, ..., 0.14301358, 0.55764332,
        0.12138923],
       [0.27216366, 0.66067099, 0.6988434 , ..., 0.8274693 , 0.12203945,
        0.54902727],
       [0.21743419, 0.81769245, 0.22262857, ..., 0.20149499, 0.83568857,
        0.05132862]])

In [170]:
p100 = inst100 < p_corr
p100

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [172]:
sum_p100 = p100.sum(axis=1)
sum_p100

array([1, 0, 0, ..., 0, 0, 1])

In [173]:
(sum_p50 == 0).mean()

0.818104

In [None]:
#### If 100 students, probability is 0.818

### 7. There's a 70% chance on any given day that there will be at least one food truck at Travis Park. However, you haven't seen a food truck there in 3 days. 
- How unlikely is this?
- How likely is it that a food truck will show up sometime this week?

### 8. If 23 people are in the same room, what are the odds that two of them share a birthday? What if it's 20 people? 40?