In [23]:
import numpy as np # for array manipulation and random number generation
import random # for sampling
import math # to calculate the permutations and combinations
from scipy import stats # for statistical operations 
import pandas as pd # for I/O and manipulation of tabular datasets

# Chapter 1

## Factorials and binomial coefficients and Sampling and simulation

In [12]:
# port of Chapter 1 R code

# to calculate permutations and combinations
factorial_of_10 = math.factorial(10)
combination_of_2_from_30_items = math.comb(30, 2)
print(factorial_of_10)
print(combination_of_2_from_30_items)

# to sample from a range
sample_without_replacement = random.sample(range(1, 10 + 1),  k=5) # sample 1 to 10 without replacement
sample_with_replacement = random.choices(range(1, 10 + 1),  k=5) # sample 1 to 10 with replacement
unequal_sample_with_replacement = random.choices(range(1, 4 + 1), weights=[0.1, 0.2, 0.3, 0.4], k=3)
print(sample_without_replacement)
print(sample_with_replacement)
print(unequal_sample_with_replacement)



3628800
435
[6, 10, 3, 8, 9]
[10, 5, 7, 2, 5]
[3, 3, 4]


In [13]:
n = 100
simulations = 10**4

# Run the simulation 10,000 times
r = np.zeros(simulations)
for i in range(simulations):
    perm = np.random.permutation(n) + 1  # Generate a random permutation of 1 to 100
    r[i] = np.sum(perm == np.arange(1, n + 1))  # Compare to the sequence 1:n and count matches

# Calculate the proportion of simulations where at least one element is in its original position
result = np.sum(r >= 1) / simulations
print(result)

0.6341


## Birthday problem calculation and simulation

In [14]:
# Number of simulations
simulations = 10**4

# Run the simulation 10,000 times
r = np.zeros(simulations)
for i in range(simulations):
    sample = np.random.choice(365, 23, replace=True)  # Generate a sample of 23 birthdays
    counts = np.bincount(sample)  # Count occurrences of each day
    r[i] = np.max(counts)  # Find the maximum count

# Calculate the proportion of simulations where at least one day has two or more people sharing a birthday
result = np.sum(r >= 2) / simulations
print(result)

0.5042


# Chapter 2

## Simulating the frequentist interpretation

In [15]:
# Set n to 100,000
n = 10**5

# Generate two random samples of size n from the set {1, 2} with replacement
child1 = np.random.choice([1, 2], size=n, replace=True)
child2 = np.random.choice([1, 2], size=n, replace=True)

print("Sample child1:", child1[:10])  # Print the first 10 elements for verification
print("Sample child2:", child2[:10])  # Print the first 10 elements for verification

import numpy as np

# Set n to 100,000
n = 10**5

# Generate a random sample of size n from the set {"girl", "boy"} with replacement
children = np.random.choice(["girl", "boy"], size=n, replace=True)

print("Sample children:", children[:10])  # Print the first 10 elements for verification

Sample child1: [1 2 2 1 2 2 2 2 2 1]
Sample child2: [1 1 1 1 1 2 2 1 1 2]
Sample children: ['girl' 'boy' 'boy' 'boy' 'girl' 'boy' 'boy' 'girl' 'boy' 'boy']


In [18]:
# Calculate the number of times child1 is equal to 1
number_of_times_first_child_is_girl = np.sum(child1 == 1)

# Calculate the number of times both child1 and child2 are equal to 1
number_of_times_both_children_are_girls = np.sum((child1 == 1) & (child2 == 1))

# Calculate the ratio n_ab / n_b
prob_both_children_are_girls_if_fist_child_is_girl = number_of_times_both_children_are_girls / number_of_times_first_child_is_girl 

print(number_of_times_first_child_is_girl)
print(number_of_times_both_children_are_girls)
print(prob_both_children_are_girls_if_fist_child_is_girl)

50000
24988
0.49976


In [21]:
number_of_times_at_least_one_child_is_girl = np.sum((child1 == 1) | (child2 == 1)) # like in R, | computes or
print('Probability that both children are girls if there is at least one girl ', number_of_times_both_children_are_girls / number_of_times_at_least_one_child_is_girl )

Probability that both children are girls if there is at least one girl  0.33249946774536937


## Monty-Hall Problem

In [22]:
def monty_hall():
    doors = [1, 2, 3]

    # Randomly pick where the car is
    cardoor = np.random.choice(doors)

    # Prompt player
    print("Monty Hall says ‘Pick a door, any door!’")
    chosen = int(input("Enter your choice (1, 2, or 3): "))

    # Pick Monty's door (can't be the player's door or the car door)
    if chosen != cardoor:
        montydoor = [door for door in doors if door != chosen and door != cardoor][0]
    else:
        montydoor = np.random.choice([door for door in doors if door != chosen])

    # Find out whether the player wants to switch doors
    print(f"Monty opens door {montydoor}!")
    reply = input("Would you like to switch (y/n)? ")

    # Interpret what player wrote as "yes" if it starts with "y"
    if reply.lower().startswith('y'):
        chosen = [door for door in doors if door != chosen and door != montydoor][0]

    # Announce the result of the game!
    if chosen == cardoor:
        print("You won!")
    else:
        print("You lost!")

# Run the game
monty_hall()

Monty Hall says ‘Pick a door, any door!’
Monty opens door 1!
You won!


# Chapter 3

## Binomial distribution

In [24]:
# Parameters
n = 10  # number of trials
p = 0.5  # probability of success

# Binomial Distribution
binomial_distribution = stats.binom(n, p)

# Example calculations
k = 5
pmf = binomial_distribution.pmf(k)  # Probability of exactly k successes
cdf = binomial_distribution.cdf(k)  # Cumulative probability of k or fewer successes

print("Binomial Distribution:")
print(f"PMF at k={k}: {pmf}")
print(f"CDF at k={k}: {cdf}")
print()

Binomial Distribution:
PMF at k=5: 0.24609375000000003
CDF at k=5: 0.623046875


## Hypergeometric distribution

In [25]:
# Parameters
N = 20  # population size
K = 7   # number of successes in the population
n = 10  # number of draws

# Hypergeometric Distribution
hypergeometric_distribution = stats.hypergeom(N, K, n)

# Example calculations
k = 5
pmf = hypergeometric_distribution.pmf(k)  # Probability of exactly k successes
cdf = hypergeometric_distribution.cdf(k)  # Cumulative probability of k or fewer successes

print("Hypergeometric Distribution:")
print(f"PMF at k={k}: {pmf}")
print(f"CDF at k={k}: {cdf}")
print()

Hypergeometric Distribution:
PMF at k=5: 0.14628482972136223
CDF at k=5: 0.9713622291021672


## Poisson Distribution

In [26]:
# Parameters
mu = 3  # mean number of events

# Poisson Distribution
poisson_distribution = stats.poisson(mu)

# Example calculations
k = 5
pmf = poisson_distribution.pmf(k)  # Probability of exactly k events
cdf = poisson_distribution.cdf(k)  # Cumulative probability of k or fewer events

print("Poisson Distribution:")
print(f"PMF at k={k}: {pmf}")
print(f"CDF at k={k}: {cdf}")
print()

Poisson Distribution:
PMF at k=5: 0.10081881344492458
CDF at k=5: 0.9160820579686966


# Chapter 4

## Geometric Distribution

In [27]:
# Parameters
p = 0.5  # probability of success on each trial

# Geometric Distribution
geometric_distribution = stats.geom(p)

# Example calculations
k = 5
pmf = geometric_distribution.pmf(k)  # Probability of k trials until first success
cdf = geometric_distribution.cdf(k)  # Cumulative probability of k or fewer trials until first success

print("Geometric Distribution:")
print(f"PMF at k={k}: {pmf}")
print(f"CDF at k={k}: {cdf}")
print()

Geometric Distribution:
PMF at k=5: 0.03125
CDF at k=5: 0.96875


## Matching Simulation

In [28]:
n = 100

# Number of simulations
simulations = 10**4

# Run the simulation 10,000 times
results = np.zeros(simulations)
for i in range(simulations):
    perm = np.random.permutation(n) + 1  # Generate a random permutation of 1 to 100
    results[i] = np.sum(perm == np.arange(1, n + 1))  # Compare to the sequence 1:n and count matches

# Calculate the mean of the results
mean_result = np.mean(results)
print(mean_result)

1.003


In [29]:
k = 20

# Number of simulations
simulations = 10**4

# Run the simulation 10,000 times
results = np.zeros(simulations)
for i in range(simulations):
    bdays = np.random.choice(365, k, replace=True)  # Generate a sample of k birthdays
    results[i] = len(np.unique(bdays))  # Count the number of unique birthdays

# Calculate the mean of the results
mean_result = np.mean(results)
print(f"Mean number of unique birthdays: {mean_result}")

# Compute the theoretical expected number of unique birthdays
theoretical_value = 365 * (1 - (364/365)**k)
print(f"Theoretical expected number of unique birthdays: {theoretical_value}")

Mean number of unique birthdays: 19.4724
Theoretical expected number of unique birthdays: 19.487910239138333
