In [1]:
import sys
import math
import string
import operator
import collections
import datetime
import itertools
import functools

import numpy as np

import matplotlib
import matplotlib.dates as mdates
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
import seaborn as sns

import IPython
import sympy as sp
import spb
import sympy.core.random as spr
import sympy.stats as ss
import scipy.stats as sps
import sympy.plotting as splt
import sympy.physics.vector as spv
import sympy.physics.mechanics as spm

import IPython.display as ipd
spv.init_vprinting()
%matplotlib inline


In [2]:
HALF = sp.S.Half
PI = sp.pi
E = sp.exp
POSITIVEINFINITY = sp.S.Infinity
NEGATIVEINFINITY = sp.S.NegativeInfinity

In [3]:
def print_aligned_latex_equations(*args):
    result = r"\\".join(
        [
            r"\begin{equation}",
            r"\begin{split}",
            *args,
            r"\nonumber" r"\end{split}",
            r"\end{equation}",
        ]
    )

    ipd.display(ipd.Math(rf"{result}"))

In [4]:
import doctest

# Distributions

## Binomial, Bernoulli

In statistics, the **binomial distribution** is a discrete probability 
distribution that describes the probability of getting a certain number 
of successes in a fixed number of trials. The binomial distribution is 
a special case of the Bernoulli distribution, which is a discrete 
probability distribution that describes the probability of getting 
a single success in a single trial.

The binomial distribution has two parameters:

* **n:** The number of trials.
* **p:** The probability of success on a single trial.

The probability of getting a certain number of successes in a 
binomial distribution is given by the following formula:

$$P(x) = {}^nC_x  p^x  (1 - p)^{(n - x)}$$


where:

* **x:** The number of successes.
* **${}^nC_x$:** The binomial coefficient, which is the number of ways 
to choose x successes from n trials.

The binomial distribution is a very useful distribution in statistics, 
as it can be used to model a wide variety of phenomena. For example, the
binomial distribution can be used to model the probability of getting a

* certain number of heads in a coin toss, 
* the probability of getting a certain number of successes in a survey, 
or 
* the probability of getting a certain number of defective items in a 
production run.

Here are some examples of how the binomial distribution can be used:

* A coin is tossed 10 times. What is the probability of getting 5 heads?


The binomial distribution is a powerful tool that can be used to model 
a wide variety of phenomena.

### Example Coin Toss

A coin is tossed 10 times. What is the probability of getting 5 heads?

In [5]:
n = 10  # number of coin tosses
p = 0.5  # probability of getting heads on a single toss

B = ss.Binomial("B", n, p)  # Binomial distribution object

# Calculate the probability of getting 5 heads
probability_of_5_heads = ss.density(B).dict[5]

# Calculate the probability of getting 5 heads using P()
probability_of_5_heads_using_P = ss.P(sp.Eq(B, 5))

# Print the results
probability_of_5_heads, probability_of_5_heads_using_P

# Store probabilities in a list for display
text_density = r"\textrm{ with density function}"
text_P = r"P(B = 5) \textrm{ with P function}"

probabilities = [
    f"{text_density}  & :{probability_of_5_heads }", 
    f"{text_P}  & :{probability_of_5_heads_using_P}", 
]


# Print aligned LaTeX equations
print_aligned_latex_equations(*probabilities)

<IPython.core.display.Math object>

### Example Dancers

In a university $8\%$ of students are members of the university 
dance club. A random sample of 36 students is taken from the university.

The random variable $X$ represents the number of these students who 
are members of the dance club.

* Using a suitable model for $X$, find
    * $P(X = 4)$  
    * $P(X \ge 7)$


In [6]:
# Constants
PROBABILITY_DANCER = 0.08
PROBABILITY_TANGOER = 0.12

# Define symbols
x = sp.symbols('x')

sample_size = 36
# Binomial distribution with sample size and probability
X = ss.Binomial("X", n=sample_size, p=PROBABILITY_DANCER)

# Calculate and round the probability P(X = 4)
PX_eq_4 = round(ss.P(sp.Eq(X, 4)).evalf(), 3)

# Calculate and round the probability P(X >= 7)
PX_ge_7 = round(ss.P(X >= 7).evalf(), 3)

# Store probabilities in a list for display
probabilities = [
    f"P(X = 4) & :{PX_eq_4}", 
    f"P(X >= 7) & :{PX_ge_7}"
]

# Print aligned LaTeX equations
print_aligned_latex_equations(*probabilities)

<IPython.core.display.Math object>

In [7]:
doctest.testmod(verbose=False)

TestResults(failed=0, attempted=0)

# Product Moment Correlation Coefficient



## Pearson's Correlation Coefficient

Find the product moment correlation coefficient between x and y for 
this data.

In [8]:
x = [3, 6, 8, 7, 5, 3, 11, 3, 4, 5, 4, 9, 7, 10, 6, 6]
y = [7, 7, 4, 4, 6, 8, 5, 5, 8, 4, 7, 4, 5, 5, 6, 3]

# Calculate the correlation coefficient and p-value

pearson_correlation, p_value = sps.pearsonr(x, y)

pearson_correlation_results = [
    fr"\textrm{{Correlation }}&:" 
    fr"{sp.latex(round(pearson_correlation, 3))}",
    fr"\textrm{{Significance}}&:" 
    fr"{sp.latex(round(p_value, 3))}",
]

print_aligned_latex_equations( *pearson_correlation_results )


<IPython.core.display.Math object>