# Probability Simulations with Python
Dice and coins are great examples of independent events

In [2]:
import numpy as np
import pandas as pd

## Example 0

In [72]:
# What are the odds of flipping heads?
flips = np.random.choice(["Heads", "Tails"], size=1_000_000)
(flips == "Heads").mean()

0.49909

In [78]:
# Example 0 built directly with 0s and 1s instead of strings
# If we're looking for "Heads", we'll call "Heads" the positive case, the 1
# 0 means "not the thing", 1 means "the thing you're looking to measure or detect
flips = np.random.choice([0, 1], size=1_000_000)
flips.mean()

0.50016

## Example 1

What's the probability of flipping exactly two heads?

We know a fair coin is 50/50

And we know each flip is independent of the next flip (independence or dependence is a big deal w/ probability)


In [83]:
# Let's flip 2 coins at a time to check the chances, we'll run 100_000 simulations
n_trials = 100_000
n_coins = 2

flips = np.random.choice(["Heads", "Tails"], size=(n_trials, n_coins))
flips

array([['Heads', 'Heads'],
       ['Tails', 'Tails'],
       ['Tails', 'Heads'],
       ...,
       ['Heads', 'Heads'],
       ['Heads', 'Tails'],
       ['Heads', 'Tails']], dtype='<U5')

In [84]:
# 100k trials of 2 coin flips per trial
flips.shape

(100000, 2)

In [85]:
# Let's turn this into a dataframe
flips = pd.DataFrame(flips, columns=["first_flip", "second_flip"])
flips.head()

Unnamed: 0,first_flip,second_flip
0,Heads,Heads
1,Tails,Tails
2,Tails,Heads
3,Heads,Heads
4,Heads,Tails


In [86]:
# Let's set up a function that counts up the number of heads per trial, (per row)
# If we .apply this function with axis=1, it runs per row (or without, defaults to axis=0 which is by column)
def count_heads(row):
    return row.first_flip.count("Heads") + row.second_flip.count("Heads")

In [87]:
# # Let's setup a 3rd column to count the number of flips 
flips["head_count"] = flips.apply(count_heads, axis=1)
flips

Unnamed: 0,first_flip,second_flip,head_count
0,Heads,Heads,2
1,Tails,Tails,0
2,Tails,Heads,1
3,Heads,Heads,2
4,Heads,Tails,1
...,...,...,...
99995,Heads,Tails,1
99996,Tails,Heads,1
99997,Heads,Heads,2
99998,Heads,Tails,1


In [88]:
# What are the chances of flipping exactly two heads?
(flips.head_count == 2).mean()

0.25022

In [89]:
# What is the theoretical chance of flipping exactly two heads on a fair coin?
0.5 * 0.5

0.25

## Example 2
What are the chances of flipping 1 or more "Headss" when we flip 2 coins at a time?

In [94]:
# One approach: determine the proportion of head_count greater than 1 out of all flips
(flips.head_count >= 1).mean()

0.74947

In [95]:
# Another approach:
# Determine the chances of flipping exactly two heads
# add that to the chances of flipping only 1 head

two_heads = (flips.head_count == 2).mean()
exactly_one_head = (flips.head_count == 1).mean()
one_or_more = two_heads + exactly_one_head
one_or_more

0.7494700000000001

## Example 3
What are the chances of flipping zero heads on two coins at a time?

In [96]:
# Experimentally
(flips.head_count == 0).mean()

0.25053

## Example 4
When flipping 5 coins, what are the chances of flipping exaclty 5 heads?

In [101]:
# Let's encode 1 for heads and 0 for tails
n_trials = 100_000
n_coins = 5

flips = np.random.choice([0, 1], size=(n_trials, n_coins))
flips = pd.DataFrame(flips)
flips

Unnamed: 0,0,1,2,3,4
0,0,1,0,1,1
1,0,0,1,0,0
2,0,0,1,1,1
3,1,1,0,1,1
4,1,0,1,1,0
...,...,...,...,...,...
99995,0,0,1,0,1
99996,1,0,0,0,0
99997,1,0,1,1,0
99998,1,1,1,0,1


In [103]:
# Count up the number of heads (or 1s in this case)
flips["n_heads"] = flips.sum(axis=1)
flips.head()

Unnamed: 0,0,1,2,3,4,n_heads
0,0,1,0,1,1,3
1,0,0,1,0,0,1
2,0,0,1,1,1,3
3,1,1,0,1,1,4
4,1,0,1,1,0,3


In [105]:
# Calculate the number of times we flip exactly 5 heads out of all the simulations
(flips.n_heads == 5).mean()

0.03093

## Example 5
What are the chances of flipping 2 or more heads?

In [106]:
(flips.n_heads >= 2).mean()

0.81095

## Example 6
What are the chances of flipping exactly 3 or 4 heads

In [109]:
exactly_three = (flips.n_heads == 3)
exactly_four = (flips.n_heads == 4)

(exactly_three | exactly_four).mean()

0.46774