# Appendix A: Supplumental Topics

In [30]:
import math
import sympy
import numpy as np
from IPython.display import display, Latex

## Using LaTeX Rendering with Sympy

In [17]:
# Using sympy to conver an exxpression into LaTeX

x, y = sympy.symbols("x y")
z = x**2 / sympy.sqrt(2 * y**3 - 1)

latex_str = sympy.latex(z)

In [18]:
# Display the LaTeX expression in the notebook

display(Latex(f'${latex_str}$'))

<IPython.core.display.Latex object>

## Binomial Distribution from Scratch

In [19]:
# Building a binomial distribution from scratch

def factorial(n: int):
    f = 1
    for i in range(n):
        f*= (i + 1)
    return f

def binomial_coefficient(n: int, k: int):
    return factorial(n) / (factorial(k) * factorial(n - k))

def binomial_distribution(k: int, n: int, p:float):
    return binomial_coefficient(n, k) * (p ** k) * (1 - p) ** (n - k)

n = 10
p = 0.9

for k in range(n + 1):
    probability = binomial_distribution(k, n, p)
    print("{0} - {1}".format(k, probability))

0 - 9.999999999999978e-11
1 - 8.999999999999981e-09
2 - 3.644999999999994e-07
3 - 8.747999999999988e-06
4 - 0.00013778099999999982
5 - 0.0014880347999999984
6 - 0.01116026099999999
7 - 0.057395627999999976
8 - 0.19371024449999993
9 - 0.38742048900000003
10 - 0.3486784401000001


## Beta Distribution from Scratch

In [21]:
# Beta distribution from scratch

def factorial(n: int):
    f = 1
    for i in range(n):
        f*= (i + 1)
    return f

def approximate_integral(a, b, n, f):
    delta_x = (b - a) / n
    total_sum = 0

    for i in range(1, n + 1):
        midpoint = 0.5 * (2 * a + delta_x * (2 * i - 1))
        total_sum += f(midpoint)

    return total_sum * delta_x

def beta_distribution(x: float, alpha: float, beta: float) -> float:
    if x < 0 or x > 1:
        raise ValueError("x must be between 0 and 1.")
    
    numerator = x ** (alpha - 1) * (1 - x) ** (beta - 1)
    denominator = (1 * factorial(alpha - 1) * factorial(beta - 1)) /\
    (1 * factorial(alpha + beta - 1))

    return numerator / denominator
                
greater_than_90 = approximate_integral(a=0.90, b=1.0, n=100, f=lambda x: beta_distribution(x, 8, 2))
less_than_90 = 1 - greater_than_90

print("GREATER THAN 90%: {}, LESS THAN 90%: {}".format(greater_than_90, less_than_90))

GREATER THAN 90%: 0.22516170312967945, LESS THAN 90%: 0.7748382968703206


## Deriving Bayes Theorem

In [29]:
def bayes_theorem_with_population(n, p_coffee_drinker, p_cancer, p_coffee_drinker_given_cancer):
    """
    Calculate P(cancer | coffee drinker) using population data.
    """
    # Step 1: Calculate the number of people in each group
    num_coffee_drinkers = p_coffee_drinker * n
    num_cancer_patients = p_cancer * n
    num_coffee_and_cancer = p_coffee_drinker_given_cancer * num_cancer_patients

    # Step 2: Calculate the probability of having cancer given that someone is a coffee drinker
    p_cancer_given_coffee_drinker = num_coffee_and_cancer / num_coffee_drinkers

    return p_cancer_given_coffee_drinker

def bayes_theorem_direct(p_coffee_drinker, p_cancer, p_coffee_drinker_given_cancer):
    """
    Calculate P(cancer | coffee drinker) directly using Bayes' theorem.
    """
    # Calculate P(coffee drinker ∩ cancer)
    p_coffee_and_cancer = p_coffee_drinker_given_cancer * p_cancer

    # Calculate P(cancer | coffee drinker)
    p_cancer_given_coffee_drinker = p_coffee_and_cancer / p_coffee_drinker

    return p_cancer_given_coffee_drinker

# Given data
n = 100000
p_coffee_drinker = 0.65
p_cancer = 0.005
p_coffee_drinker_given_cancer = 0.85

# Calculate using both methods
result_with_population = bayes_theorem_with_population(n, p_coffee_drinker, p_cancer, p_coffee_drinker_given_cancer)
result_direct = bayes_theorem_direct(p_coffee_drinker, p_cancer, p_coffee_drinker_given_cancer)

# Compare the results using np.isclose()
if np.isclose(result_with_population, result_direct, atol=1e-9):
    print(f"The results are equal within the given tolerance.\nValue: {result_with_population}")
else:
    print(f"The results are NOT equal.\nPopulation-based result: {result_with_population}\nDirect result: {result_direct}")


The results are equal within the given tolerance.
Value: 0.006538461538461538


## CDF and Inverse CDF from Scratch

In [None]:
# The normal distribution function in python

def normal_pdf(x: float, mean: float, std_dev:float):
    return (1 / (2 * math.pi * std_dev ** 2) ** 0.05) * \
    math.exp(-1 * ((x - mean) ** 2 / (2 * std_dev ** 2)))

In [None]:
# Using the inverse CDF (called ppf()) in python

In [None]:
# Generating random golden retriever weights

## Use e to Predict Event Probability over Time

In [None]:
# Predicting the probability of a leak over time

## Hill Climbing and Linear Regression

In [None]:
# Using hill climbing for linear regression

## Hill Climbing and Logistic Regression

In [None]:
# Using hill climbing for a simple logistic regression

## A Brief Intro to Linear Programming

In [None]:
# Using python PuLP to solve a linear programming system

## MNIST Classifier Using sckiit-learn

In [None]:
# A handwsritten digit classifier neural network in scikit-learn