In [1]:
import numpy as np
import pandas as pd

# Simulations == Monte Carlo Method

## How to run a simulation with Python/Numpy/Pandas
1. Figure out a way to represent our data
2. Create a matrix of random data, rows = simulations, columns = trial
    - For example, rolling 2 dice 10,000 times means rows=10,000 and columns = 2 because we roll 2 dice each time.
3. Apply an aggregate function, row-wise to get the results of the simulation
4. Apply a final aggregate to get our probability

In [2]:
# Let's answer questions experimentally rather than theoretically
# What's the probability of flipping "Heads" on a coin?

# Let's flip a coin 100,000 times and figure out the probability of flipping "Heads"

# Let's find a way to represent out data
outcomes = ["Heads", "Tails"]
n_simulations = 1_000_000

flips = np.random.choice(outcomes,  size=n_simulations)

# After flipping 100 thousand coins, our experiemental probability of flipping heads is:
(flips == "Heads").mean()

0.500249

In [3]:
# Another example: What is the probability of rolling a 5 on a 6 sided die?

# Step 1, represent our data's outcomes
outcomes = [1, 2, 3, 4, 5, 6]

# Step 2, create the data
n_simulations = 10_000

rolls = np.random.choice(outcomes, size=n_simulations)

# What are the chances we roll a 5?
(rolls == 5).mean()

0.172

In [4]:
# What is the probability we'll roll a 5 or a 6 on a 6 sided die?
(rolls >= 5).mean()

0.3408

In [5]:
# What is the probabiliyt of rolling less than a 3 (but not including 3)
(rolls < 3).mean()

0.3273

In [6]:
# What are the chances we roll something other than 3
(rolls != 3).mean()

0.8312

## Let's Roll 2 Dice at Once!

1. Figure out a way to represent the data
2. Create a matrix of random data, rows=simulations, columns=trials
3. Apply an aggregagte row-wise to get the result of each simulation
4. Apply a final aggregate (probably the .mean) to get our probability

In [7]:
# What are the odds of rolling Snake Eyes on two dice?

# Step 1 Represent our outcomes
outcomes = [1, 2, 3, 4, 5, 6]

# Step 2: Create a matrix of random data where rows=simulations, columns=trial

# Simulation = the number of times we run the experiment
# Trials = the number of things in each experiment
n_simulations = 1_000_000
n_trials = 2 # b/c we're rolling 2 dice with each experiment

# size argument can set our simulation and trial size
rolls = np.random.choice(outcomes, size=(n_simulations, n_trials))
rolls

array([[2, 6],
       [3, 2],
       [6, 6],
       ...,
       [1, 6],
       [6, 3],
       [1, 5]])

In [8]:
# Step 3: Apply an aggregate row-wise
# axis=1 means sum across the rows
sum_of_rolls = rolls.sum(axis=1)
sum_of_rolls

array([ 8,  5, 12, ...,  7,  9,  6])

In [9]:
# Axis=0 means sum up the entire column. 
# If you don't put an axis, the default is 0
# rolls.sum(axis=0)

In [10]:
# Step 4.
# Add up all the times that an experiment produces the sum of 2
(sum_of_rolls == 2).mean()

0.027772

In [11]:
theoretical = 1/6 * 1/6
print(f"Our theoretical probability of rolling snake eyes is 1/6 * 1/6, which is {theoretical}")

Our theoretical probability of rolling snake eyes is 1/6 * 1/6, which is 0.027777777777777776


In [12]:
# What is the probability of rolling a 7 on two dice
# 1+6, 2+5, 3+4, 4+3, 5+2, 6+1

# step 1: represent our outcomes
outcomes = [1, 2, 3, 4, 5, 6]

# Step 2: generate a matrix of random outcomes, simulations = rows, trials = columns
# size=(simulations, trials)
# size=(experiements, number_of_dice per experiment)
rolls = np.random.choice(outcomes, size=(10_000, 2))

# Step 3, apply a row-wise aggregate
# axis=1 to apply sum to rows
sum_of_rolls = rolls.sum(axis=1)

(sum_of_rolls == 7).mean()

0.1651

In [14]:
# What are the experimental probabilities of rolling each sum
df = pd.DataFrame()

# possible sum outcomes from 2 dice
df["outcome"] = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

# produce the probability of seeing each sum
y = [(sum_of_rolls == n).mean() for n in x]

# set the probability to its own column
df["probability"] = pd.Series(y)

print("Sum outcome of rolling 2 dice and the probability of seeing that outcome"s)
df

Sum outcome of rolling 2 dice and the probability of seeing that outcome


Unnamed: 0,outcome,probability
0,2,0.0314
1,3,0.0547
2,4,0.0818
3,5,0.113
4,6,0.142
5,7,0.1651
6,8,0.1346
7,9,0.114
8,10,0.0798
9,11,0.0539
