In [25]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
from math import sqrt

In [3]:
n_trials = nrows = 10_000
n_dice = ncols = 3

rolls = np.random.choice([1, 2, 3, 4, 5, 6], n_trials * n_dice).reshape(nrows, ncols)
rolls

array([[3, 6, 3],
       [1, 1, 2],
       [3, 1, 2],
       ...,
       [3, 6, 3],
       [6, 2, 5],
       [2, 6, 2]])

In [4]:
sums_by_trial = rolls.sum(axis=1)
sums_by_trial

array([12,  4,  6, ..., 12, 13, 10])

In [12]:
wins = sums_by_trial > 12
wins

array([False, False, False, ..., False,  True, False])

In [13]:
win_rate = wins.astype(int).mean()
win_rate

0.2637

In [14]:
expected_winnings = win_rate * 15
cost = 5
expected_profit = expected_winnings - cost
expected_profit

-1.0445000000000002

In [15]:
wins = sums_by_trial >= 12
win_rate = wins.astype(int).mean()
expected_winnings = win_rate * 15
cost = 5
expected_profit = expected_winnings - cost
expected_profit

0.6070000000000002

No Rest or Relaxation

There's a 30% chance my son takes a nap on any given weekend day. What is the chance that he takes a nap at least one day this weekend? What is the probability that he doesn't nap at all?

In [16]:
p_nap = .3
ndays = ncols = 2
n_simulated_weekends = nrows = 10**5

To simulate the results from many weekends, we'll create a 2 x 10,000 matrix, with 2 being the number of days in a weekend and 10,000 being the number of simulations we want to run.

To determine whether or not a nap is taken on a given day, we'll generate a random number between 0 and 1, and say that it is a nap if it is less than our probability of taking a nap.

In [17]:
data = np.random.random((nrows, ncols))
data

array([[0.8989816 , 0.25634187],
       [0.76632041, 0.61797236],
       [0.93152143, 0.01180821],
       ...,
       [0.58016506, 0.83048012],
       [0.23111159, 0.86727359],
       [0.20595987, 0.13770003]])

In [18]:
naps = data < p_nap
naps

array([[False,  True],
       [False, False],
       [False,  True],
       ...,
       [False, False],
       [ True, False],
       [ True,  True]])

Now that we have each day as either true or false, we can take the sum of each row to find the total number of naps for the weekend. When we sum an array of boolean values, numpy will treat True as 1 and False as 0.

In [19]:
naps.sum(axis=1)

array([1, 0, 1, ..., 0, 1, 2])

Now we have the results of our simulation, an array where each number in the array represents how many naps were taken in a two day weekend.

In [20]:
(naps.sum(axis=1) >= 1).mean()  #probability of one nap taken

0.5106

In [23]:
(naps.sum(axis=1) == 0).mean()  #probability of no naps

0.4894

### One With Dataframes

Let's take a look at one more problem:

What is the probability of getting at least one 3 in 3 dice rolls?

To simulate this, we'll use a similar strategy to how we modeled the dice rolls in the previous example, but this time, we'll store the results in a pandas dataframe so that we can apply a lambda function that will check to see if one of the rolls was a 3.

In [None]:
n_simulations = nrows = 10**5  #assign values for rows
n_dice_rolled = ncols = 3  #assign values for columns


#Next we create the rolls variable that holds a 3 x 10,000 matrix where each element is a randomly chosen number 
#from 1 to 6
rolls = np.random.choice([1, 2, 3, 4, 5, 6], nrows * ncols).reshape(nrows, ncols)

### Lastly we create a dataframe from the rolls

pd.DataFrame(rolls) converts our 2d numpy matrix to a pandas DataFrame

.apply(... applies a function to each row in our dataframe, because we specified axis=1, the function will be called with each row as it's argument. The body of the function checks to see if the value 3 is in the values of the row, and will return either True or False

.mean() takes our resulting series of boolean values, and treats True as 1 and False as 0, to give us the average rate of Trues, in this case, the simulated probability of getting a 3 in 3 dice rolls.


In [24]:
(pd.DataFrame(rolls)
 .apply(lambda row: 3 in row.values, axis=1)
 .mean())

0.41971