In [3]:
# Fitting a Binomial Distribution #

import numpy as np
import pandas as pd
from scipy.stats import binom
# Data
x = np.array([0, 1, 2, 3, 4, 5])
f = np.array([2, 14, 20, 34, 22, 8])
N = np.sum(f)  # Total Frequency
# Calculating mean and estimate p
mean = np.sum(x * f) / N
n = 5
p_hat = mean / n

# Compute P(X = x) using binomial PMF
probs = binom.pmf(x, n=n, p=p_hat)

# Compute Expected frequencies
expected = N * probs

# We want to Tabulate everything
df = pd.DataFrame({
    'x': x,
    'Observed f': f,
    'P(X=x)': np.round(probs, 4),
    'Expected E_x': np.round(expected, 2)
})
# Print
print(f"Estimated p-hat = {p_hat:.4f}")
print(df)


Estimated p-hat = 0.5680
   x  Observed f  P(X=x)  Expected E_x
0  0           2  0.0150          1.50
1  1          14  0.0989          9.89
2  2          20  0.2601         26.01
3  3          34  0.3420         34.20
4  4          22  0.2248         22.48
5  5           8  0.0591          5.91


In [6]:
#  Fitting a Poisson Distribution #

import numpy as np
import pandas as pd
from scipy.stats import poisson

# Data
x = np.array([0, 1, 2, 3, 4, 5])
f = np.array([35, 40, 20, 3, 2, 0])
N = np.sum(f)

# Estimate lambda (mean)
mean = np.sum(x * f) / N
lam = mean

# Computing Poisson probabilities and expected frequencies
probs = poisson.pmf(x, mu=lam)
expected = N * probs

# We want to tabulate everything
df = pd.DataFrame({
    'x': x,
    'Observed f': f,
    'P(X=x)': np.round(probs, 4),
    'Expected E_x': np.round(expected, 2)
})

# Print
print(f"Estimated mean = {lam:.2f}")
print(df)


Estimated mean = 0.97
   x  Observed f  P(X=x)  Expected E_x
0  0          35  0.3791         37.91
1  1          40  0.3677         36.77
2  2          20  0.1783         17.83
3  3           3  0.0577          5.77
4  4           2  0.0140          1.40
5  5           0  0.0027          0.27


In [7]:
import numpy as np
import pandas as pd
from scipy.stats import norm

# Class intervals and frequencies
classes = [(60, 69), (70, 79), (80, 89), (90, 99), (100, 109)]
frequencies = np.array([5, 18, 42, 27, 8])
N = np.sum(frequencies)

# Compute midpoints of classes
midpoints = np.array([(a + b) / 2 for a, b in classes])

# Compute sample mean and standard deviation
mean = np.sum(midpoints * frequencies) / N
variance = np.sum(frequencies * (midpoints - mean)**2) / N
std_dev = np.sqrt(variance)

# Use CDF to compute probabilities P(a < X < b)
probs = []
for a, b in classes:
    z1 = (a - mean) / std_dev
    z2 = (b - mean) / std_dev
    prob = norm.cdf(z2) - norm.cdf(z1)
    probs.append(prob)

# Get expected frequencies
expected = N * np.array(probs)

df = pd.DataFrame({
    'Class Interval': [f"{a}-{b}" for a, b in classes],
    'Observed f': frequencies,
    'Expected Frequency': np.round(expected, 2),
    'P(a<X<b)': np.round(probs, 4)
})


print(f"Mean = {mean:.2f}, SD (s) = {std_dev:.2f}")
print(df)


Mean = 86.00, SD (s) = 9.73
  Class Interval  Observed f  Expected Frequency  P(a<X<b)
0          60-69           5                3.66    0.0366
1          70-79          18               18.59    0.1859
2          80-89          42               35.22    0.3522
3          90-99          27               24.97    0.2497
4        100-109           8                6.61    0.0661
