## Simulation Exercises

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import viz # curriculum example visualizations

np.random.seed(29)

### 1. How likely is it that you roll doubles when rolling two dice?

In [2]:
n_trials = nrows = 10000
n_dice = ncols = 2

In [3]:
rolls = np.random.choice((
    [1,2,3,4,5,6]), size = (n_trials, n_dice))
rolls

array([[6, 4],
       [5, 6],
       [3, 1],
       ...,
       [2, 5],
       [1, 4],
       [4, 6]])

In [4]:
rolls[:,0]

array([6, 5, 3, ..., 2, 1, 4])

In [5]:
rolls[:,1]

array([4, 6, 1, ..., 5, 4, 6])

In [6]:
rolls[:,0] == rolls[:,1]

array([False, False, False, ..., False, False, False])

In [7]:
(rolls[:,0] == rolls[:,1]).mean()

0.1709

#### There is a 16.7% chance we roll doubles

In [8]:
#another method: lambda
pd.DataFrame(rolls).head(25)

Unnamed: 0,0,1
0,6,4
1,5,6
2,3,1
3,1,2
4,2,1
5,6,4
6,2,1
7,2,4
8,4,6
9,5,1


In [9]:
df_rolls = pd.DataFrame(rolls).apply(lambda row: row[0] == row[1], axis=1).head(25)

In [10]:
df_rolls.mean()

0.12

### 2. If you flip 8 coins, what is the probability of getting exactly 3 heads? What is the probability of getting more than 3 heads?

In [11]:
# heads = 1
# tails = 0

coin_toss = [1,0]
flip_times = 8
sim_size = 1000000

In [12]:
coin_8 = np.random.choice(coin_toss, size = (sim_size,flip_times))
coin_8

array([[1, 1, 1, ..., 1, 0, 0],
       [1, 1, 0, ..., 0, 0, 1],
       [0, 1, 0, ..., 0, 0, 0],
       ...,
       [1, 1, 0, ..., 1, 1, 1],
       [1, 1, 0, ..., 1, 1, 0],
       [0, 0, 1, ..., 0, 1, 0]])

In [13]:
heads = coin_8.sum(axis=1)
heads

array([5, 3, 2, ..., 5, 5, 2])

In [14]:
three_heads = (heads == 3)
more_than_three_heads = (heads > 3)

In [15]:
three_heads.mean()

0.2189

#### Probability of exactly three heads = 0.219

In [16]:
more_than_three_heads.mean()

0.636661

#### Probability of more than three heads = 0.637

### 3. There are approximitely 3 web development cohorts for every 1 data science cohort at Codeup. Assuming that Codeup randomly selects an alumni to put on a billboard, what are the odds that the two billboards I drive past both have data science students on them?

1 of every 4 cohorts is data science; thus, the probability of choosing a student from a data science cohort is: 1/4 or 0.25

In [17]:
p_data_stu = 0.25
sim_size = 1000000

In [18]:
# 2 billboards, 2 opportunities
drive_past_board = n_cols = 2

drive_by = np.random.random((sim_size,drive_past_board))
drive_by

array([[0.80808238, 0.30730607],
       [0.62843761, 0.57475089],
       [0.87158667, 0.4590678 ],
       ...,
       [0.09351186, 0.10381401],
       [0.05081495, 0.04506325],
       [0.24660764, 0.84122846]])

In [19]:
is_data_stu = drive_by < p_data_stu
is_data_stu

array([[False, False],
       [False, False],
       [False, False],
       ...,
       [ True,  True],
       [ True,  True],
       [ True, False]])

In [20]:
(is_data_stu.sum(axis=1) == 2).mean()

0.062335

In [118]:
# theoretical prob
0.25 * 0.25

0.0625

#### Probability that both billboards have ds students on them: 0.062

### 4. Codeup students buy, on average, 3 poptart packages with a standard deviation of 1.5 a day from the snack vending machine. If on monday the machine is restocked with 17 poptart packages, how likely is it that I will be able to buy some poptarts on Friday afternoon? (Remember, if you have mean and standard deviation, use the np.random.normal) You'll need to make a judgement call on how to handle some of your values

In [21]:
mean = 3
std = 1.5
days = 4
sim_size = 1000000

In [119]:
poptart = np.random.normal(mean,std,size = (sim_size,5))
poptart

array([[ 0.83106838, -0.36462749,  0.21165029,  6.68395751,  5.1681002 ],
       [ 2.36054514,  2.23890054,  3.70139416,  1.4737922 ,  4.52703556],
       [ 5.61556893,  3.67569018, -1.37939889,  2.86906227,  6.02606414],
       ...,
       [ 1.58370278,  1.61952558,  3.85459416,  5.87421609,  2.94274881],
       [ 3.86539997,  3.19849574,  0.70281375,  2.52204717,  5.98385745],
       [ 3.1984542 ,  2.06470384,  5.31781487,  2.34680425,  2.51030563]])

In [120]:
poptart_sum = poptart.sum(axis=1) 
poptart_sum

array([12.53014889, 14.3016676 , 16.80698664, ..., 15.87478741,
       16.27261408, 15.43808279])

In [121]:
p_poptart = poptart_sum < 17
p_poptart

array([ True,  True,  True, ...,  True,  True,  True])

In [122]:
p_poptart.mean()

0.724695

#### Probability that I'll be able to by a poptart on Friday afternoon: 0.725

### 5. Compare Heights
- Men have an average height of 178 cm and standard deviation of 8cm.
- Women have a mean of 170, sd = 6cm.
- Since you have means and standard deviations, you can use np.random.normal to generate observations.
- If a man and woman are chosen at random, what is the likelihood the woman is taller than the man?

In [25]:
m_mean = 178
m_std = 8

w_mean = 170
w_std = 6

sim_size = 1000000

In [26]:
men = np.random.normal(m_mean,m_std,sim_size)
men

array([164.63769545, 190.74711258, 176.94142316, ..., 174.47613533,
       167.28918983, 176.50138938])

In [27]:
men.mean()

178.00672513085112

In [28]:
women = np.random.normal(w_mean,w_std,sim_size)
women

array([165.51492677, 170.06131423, 165.82991616, ..., 172.5340187 ,
       155.35719257, 173.00874032])

In [29]:
women.mean()

170.00406325532452

In [30]:
p_w_t_m = women > men
p_w_t_m

array([ True, False, False, ..., False, False, False])

In [31]:
p_w_t_m.mean()

0.211762

#### 0.21 probability the woman is taller than the man

### 6. When installing anaconda on a student's computer, there's a 1 in 250 chance that the download is corrupted and the installation fails. 
- What are the odds that after having 50 students download anaconda, no one has an installation issue? 100 students?

- What is the probability that we observe an installation issue within the first 150 students that download anaconda?

- How likely is it that 450 students all download anaconda without an issue?

In [34]:
# fail = 0
# succeed = 1

stu_1 = 50
stu_2 = 100
p_corr = 1/250
sim_size = 1000000

In [67]:
inst50 = np.random.choice([0,1], p=[0.996, 0.004], size = (sim_size,stu_1))
inst50

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [124]:
success = (inst50.sum(axis=1) ==0).mean()
success

0.818269

#### If 50 students, probability is 0.818

In [69]:
inst100 = np.random.choice([0,1], p=[0.996, 0.004], size = (sim_size,stu_2))
inst100

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [123]:
success100 = (inst100.sum(axis=1) == 0).mean()
success100

0.66933

#### If 100 students, probability is 0.669

In [50]:
stu_3 = 150
stu_4 = 450

In [51]:
inst150 = np.random.choice([0,1], p=[0.996, 0.004], size = (sim_size,stu_3))
inst150

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [125]:
success150 = (inst150.sum(axis=1) ==0).mean()
success150

0.548114

#### If 150 students, probability is 0.548

In [59]:
inst450 = np.random.choice([0,1], p=[0.996, 0.004], size = (sim_size,stu_4))
inst450

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [126]:
success_450 = (inst450.sum(axis=1) == 0).mean()
success_450

0.165107

#### If 450 students, probability is 0.165

### 7. There's a 70% chance on any given day that there will be at least one food truck at Travis Park. However, you haven't seen a food truck there in 3 days. 
- How unlikely is this?
- How likely is it that a food truck will show up sometime this week?

In [61]:
p_ft = .7
sim_size = 1000000
days = 3

In [72]:
days_3 = np.random.random((sim_size, days))
days_3

array([[0.19916982, 0.03316647, 0.81756583],
       [0.28272749, 0.84301437, 0.73888357],
       [0.62899206, 0.73115379, 0.41204914],
       ...,
       [0.15010693, 0.74441698, 0.73886964],
       [0.8835463 , 0.52537809, 0.77104546],
       [0.63100106, 0.83947662, 0.14702443]])

In [76]:
ftp = days_3 < p_ft
ftp

array([[ True,  True, False],
       [ True, False, False],
       [ True, False,  True],
       ...,
       [ True, False, False],
       [False,  True, False],
       [ True, False,  True]])

In [78]:
(ftp.sum(axis=1) == 1).mean()

0.189077

#### The likelihood of not seeing a food truck in 3 days is: 0.189

In [132]:
days_5 = np.random.random((sim_size, 3))
days_5


array([[0.02988161, 0.4881583 , 0.02358047],
       [0.90367545, 0.82425685, 0.26821217],
       [0.5154022 , 0.02632868, 0.43241795],
       ...,
       [0.32041781, 0.78002845, 0.67842199],
       [0.3003452 , 0.70348279, 0.93682083],
       [0.97627151, 0.32393739, 0.64490904]])

In [133]:
ftp_w = days_5 < p_ft
ftp_w

array([[ True,  True,  True],
       [False, False,  True],
       [ True,  True,  True],
       ...,
       [ True, False,  True],
       [ True, False, False],
       [False,  True,  True]])

In [134]:
(ftp_w.sum(axis=1) >= 1).mean()

0.973006

#### Probability of not seeing a food truck all week: 0.97

### 8. If 23 people are in the same room, what are the odds that two of them share a birthday? What if it's 20 people? 40?

In [94]:
p_bday = 1/365
sim_size = 1_000_000
ppl = 23

In [136]:
### Rav's solution:

bd = np.random.choice(range(1,366), size = (10000, 23))
bd

array([[ 59,  53, 340, ...,  19,  80,  52],
       [123, 129, 195, ..., 352, 134, 280],
       [106, 105, 337, ...,  75,  27, 164],
       ...,
       [ 77,  15,  32, ..., 320, 353,  51],
       [215, 266, 123, ...,  71, 305, 159],
       [188, 225,  51, ..., 259,  35,  79]])

In [137]:
(pd.DataFrame(bd).nunique(axis = 1) < 23).mean()

0.5032

#### Out of 23 ppl, probability 2 share a bday: 0.5032

In [145]:
bd_20 = np.random.choice(range(1,366), size = (10000, 20))
bd

array([[ 59,  53, 340, ...,  19,  80,  52],
       [123, 129, 195, ..., 352, 134, 280],
       [106, 105, 337, ...,  75,  27, 164],
       ...,
       [ 77,  15,  32, ..., 320, 353,  51],
       [215, 266, 123, ...,  71, 305, 159],
       [188, 225,  51, ..., 259,  35,  79]])

In [146]:
(pd.DataFrame(bd_20).nunique(axis = 1) < 20).mean()

0.4125

#### Out of 20 ppl, prob is 0.4125

In [143]:
bd_40 = np.random.choice(range(1,366), size = (10000, 40))
bd

array([[ 59,  53, 340, ...,  19,  80,  52],
       [123, 129, 195, ..., 352, 134, 280],
       [106, 105, 337, ...,  75,  27, 164],
       ...,
       [ 77,  15,  32, ..., 320, 353,  51],
       [215, 266, 123, ...,  71, 305, 159],
       [188, 225,  51, ..., 259,  35,  79]])

In [144]:
(pd.DataFrame(bd_40).nunique(axis = 1) < 40).mean()

0.8881

#### Out of 40 ppl, prob is 0.888