In [1]:
# 1.1: Sample Space Analysis
# Sample space of a coin
sample_space = {'Heads', 'Tails'}

probability_heads = 1 / len(sample_space)
print(f'Probability of choosing heads is {probability_heads}')

Probability of choosing heads is 0.5


In [2]:
# 4 Event Conditions
def is_heads_or_tails(outcome): return outcome in {'Heads','Tails'}
def is_neither(outcome): return not is_heads_or_tails(outcome)

def is_heads(outcome): return outcome == "Heads"
def is_tails(outcome): return outcome == "Tails"

In [4]:
def get_matching_event(event_condition, sample_space):
    return set([outcome for outcome in sample_space if event_condition(outcome)])

In [5]:
event_conditions = [is_heads_or_tails, is_heads, is_tails, is_neither]

for event_condition in event_conditions:
    print(f"Event Condition: {event_condition.__name__}")
    event = get_matching_event(event_condition, sample_space)
    print(f"Event: {event}\n")

Event Condition: is_heads_or_tails
Event: {'Heads', 'Tails'}

Event Condition: is_heads
Event: {'Heads'}

Event Condition: is_tails
Event: {'Tails'}

Event Condition: is_neither
Event: set()



In [6]:
def compute_probability(event_condition, generic_sample_space):
    event = get_matching_event(event_condition, generic_sample_space)
    return len(event) / len(generic_sample_space)

In [7]:
for event_condition in event_conditions:
    prob = compute_probability(event_condition, sample_space)
    name = event_condition.__name__
    print(f"Probability of event arising from '{name}' is {prob}")

Probability of event arising from 'is_heads_or_tails' is 1.0
Probability of event arising from 'is_heads' is 0.5
Probability of event arising from 'is_tails' is 0.5
Probability of event arising from 'is_neither' is 0.0


In [10]:
# 1.1.1 Analyzing a biased coin
weighted_sample_space = {'Heads' : 4, 'Tails': 1}

# Getting the new size of the sample space
sample_space_size = sum(weighted_sample_space.values())
assert sample_space_size == 5

event = get_matching_event(is_heads_or_tails, weighted_sample_space)
event_size = sum(weighted_sample_space[outcome] for outcome in event)
assert event_size == 5

In [11]:
# Generalized Event Probability Function
def compute_event_probability(event_condition, generic_sample_space):
    event = get_matching_event(event_condition, generic_sample_space)
    
    # check to see if its a generic set and not a dictionary
    if type(generic_sample_space) == type(set()):
        return len(event) / len(generic_sample_space)
    
    # otherwise, deal with the weights of the in the sample space dictionary
    event_size = sum(generic_sample_space[outcome] for outcome in event)
    
    return event_size / sum(generic_sample_space.values())

In [12]:
for event_condition in event_conditions:
    prob = compute_event_probability(event_condition, weighted_sample_space)
    name = event_condition.__name__
    print(f"Probability of event arising from '{name}' is {prob}")

Probability of event arising from 'is_heads_or_tails' is 1.0
Probability of event arising from 'is_heads' is 0.8
Probability of event arising from 'is_tails' is 0.2
Probability of event arising from 'is_neither' is 0.0


In [13]:
# 1.2 Computing nontrivial Probabilities
# 1.2.1 Analyzing a family with 4 children
possible_children = ['Boy', 'Girl']
sample_space = set()

# Computing sample space with 4 for loops
for child1 in possible_children:
    for child2 in possible_children:
        for child3 in possible_children:
            for child4 in possible_children:
                outcome = (child1, child2, child3, child4)
                sample_space.add(outcome)

In [15]:
# Computing sample space with itertools.product
from itertools import product
all_combinations = product(*(4 * [possible_children]))

# Check
assert set(all_combinations) == sample_space

In [16]:
# More efficient
sample_space_effiecient = set(product(possible_children, repeat=4))
assert sample_space == sample_space_effiecient

In [17]:
# Find families with 2 boys (combinations with 2 boys)
def has_two_boys(outcome): return len([child for child in outcome if child == 'Boy']) == 2
# list = list()
# for child in outcome:
# if child == 2:
#     list.add(1)
# return len(list) == 2

In [18]:
prob = compute_event_probability(has_two_boys, sample_space)
print(f'Probability of 2 boys is {prob}')

Probability of 2 boys is 0.375


In [19]:
# 1.2.2 Analyzing multiple die rolls
possible_rolls = list(range(1,7))
print(possible_rolls)

[1, 2, 3, 4, 5, 6]


In [20]:
# What is the probability that 6 die rolls add up to 21?
# Get the sample space for 6 dice
sample_space = set(product(possible_rolls, repeat=6))

In [21]:
# Definie a function to return true or false if the total is 21
def has_sum_of_21(outcome): return sum(outcome) == 21

In [31]:
prob = compute_event_probability(has_sum_of_21, sample_space)
print(f"6 rolls sum to 21 with a probability of {prob}")

# yay
num_21 = (int) (len(sample_space) * prob)
print(f"There are {num_21} different ways of getting 21 with 6 dice")

6 rolls sum to 21 with a probability of 0.09284979423868313
There are 4332 different ways of getting 21 with 6 dice


In [23]:
# With lambda functions instead of has_sum_of_21
prob = compute_probability(lambda x: sum(x) == 21, sample_space)
assert prob == compute_event_probability(has_sum_of_21, sample_space)

In [24]:
# 1.2.3 Computing die-roll probabilities using weighted sample spaces
# Mapping die-roll sums to occurence counts
from collections import defaultdict

# records the number of times the sum of an outcome occurs
weighted_sample_space = defaultdict(int)
for outcome in sample_space:
    total = sum(outcome)
    
    # indexed by totals
    weighted_sample_space[total] += 1

In [25]:
# The lowest and highest sums are 6 and 36 respectively, each occuring once
assert weighted_sample_space[6] == 1
assert weighted_sample_space[36] == 1

In [26]:
num_combinations = weighted_sample_space[21]
print(f"There are {num_combinations} ways for 6 die rolls to sum to 21")

There are 4332 ways for 6 die rolls to sum to 21


In [27]:
# 2 combos of 6 dice to make 21
assert sum([4,4,4,4,3,2]) == 21
assert sum([4,4,4,5,3,1]) == 21

In [34]:
event = get_matching_event(lambda x: sum(x) == 21, sample_space)
assert weighted_sample_space[21] == len(event)
assert sum(weighted_sample_space.values()) == len(sample_space)

In [36]:
prob = compute_event_probability(lambda x: x == 21, weighted_sample_space)
assert prob == compute_event_probability(has_sum_of_21, sample_space)
print(f"6 rolls sum to 21 with a probability of {prob}")

6 rolls sum to 21 with a probability of 0.09284979423868313


In [37]:
# Comparing sizes of weighted sample space and unweighted sample space
print('Number of Elements in Unweighted Sample Space:')
print(len(sample_space))
print('Number of Elements in Weighted Sample Space:')
print(len(weighted_sample_space))

Number of Elements in Unweighted Sample Space:
46656
Number of Elements in Weighted Sample Space:
31


In [38]:
# 1.3 Computing Probabilities Over Interval Ranges (similar to Algorithms Interval ranges)
def is_in_interval(number, minimum, maximum):
    return minimum <= number <= maximum

In [39]:
# compute the prob that an event's value falls within a range
# example with range between 10 and 21 inclusive:
prob = compute_event_probability(lambda x: is_in_interval(x, 10, 21), weighted_sample_space)
print(f"Probability of interval is {prob}")
# majority of the time at ~54%

Probability of interval is 0.5446244855967078


In [43]:
# 1.3.1 Evaluating Extremes Using Interval Analysis
# Ex: What is the probability that 10 fair coin flips produce from 8 to 10 heads?
# Weighted Sample Space Dictionary
def generate_coin_sample_space(num_flips=10):
    weighted_sample_space = defaultdict(int)
    for coin_flips in product(['Heads', 'Tails'], repeat=num_flips):
        heads_count = len([outcome for outcome in coin_flips if outcome == 'Heads'])
        weighted_sample_space[heads_count] += 1
    
    return weighted_sample_space

weighted_sample_space = generate_coin_sample_space()
assert weighted_sample_space[10] == 1
assert weighted_sample_space[9] == 10

In [45]:
prob = compute_event_probability(lambda x: is_in_interval(x, 8, 10), weighted_sample_space)
print(f"Probability of observing more than 7 heads is {prob}")
# ~5.4% there will be more than 8 heads in 10 flips

Probability of observing more than 7 heads is 0.0546875


In [47]:
# Probability that the coin does not produce from 3 to 7 heads
prob = compute_event_probability(lambda x: not is_in_interval(x, 3, 7), weighted_sample_space)
print(f"Probability of observing more than 7 heads or 7 tails is {prob}")

Probability of observing more than 7 heads or 7 tails is 0.109375


In [48]:
# Key Words / Notes:
# Sample Space: the set of all the possible outcomes an action can produce
# Event: A subset of the sample space containing just those outcomes that satify some event condition
# Event Condition: A Boolean function that takes an outcome and returns either True or False
# Probability: The fraction of event outcomes over all the possible outcomes in the entire sample space
# Probabilities can be compared over numeric intervals
# Numeric Intervals: The set of all the numbers sandwiched between two boundary values
# Interval probabilities are useful for determining whether an observation appears extreme