In [None]:
# Statistics Part 2 + Practical (Java + DSAPwskills)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.stats import bernoulli, binom, poisson, norm, uniform, ttest_ind, ttest_rel, chisquare, chi2_contingency

# Generate a random variable and display its value
random_var = np.random.rand()
print("Random Variable:", random_var)

# Discrete uniform distribution
values = np.arange(1, 7)
probs = np.ones_like(values) / len(values)
plt.stem(values, probs)
plt.title("PMF of Discrete Uniform Distribution")
plt.show()

# Bernoulli PDF
p = 0.6
def bernoulli_pdf(x, p):
    return bernoulli.pmf(x, p)
print("Bernoulli PDF (x=1, p=0.6):", bernoulli_pdf(1, p))

# Binomial distribution
binom_data = binom.rvs(n=10, p=0.5, size=1000)
plt.hist(binom_data, bins=10)
plt.title("Binomial Distribution (n=10, p=0.5)")
plt.show()

# Poisson distribution
poisson_data = poisson.rvs(mu=3, size=1000)
plt.hist(poisson_data, bins=10)
plt.title("Poisson Distribution")
plt.show()

# CDF of discrete uniform
cdf_vals = np.cumsum(probs)
plt.step(values, cdf_vals)
plt.title("CDF of Discrete Uniform")
plt.show()

# Continuous uniform
cont_data = uniform.rvs(size=1000)
plt.hist(cont_data, bins=20)
plt.title("Continuous Uniform Distribution")
plt.show()

# Normal distribution
normal_data = np.random.normal(loc=50, scale=10, size=1000)
plt.hist(normal_data, bins=20)
plt.title("Normal Distribution")
plt.show()

# Z-scores
z_scores = stats.zscore(normal_data)
plt.plot(z_scores[:100])
plt.title("Z-scores")
plt.show()

# Central Limit Theorem
samples = [np.mean(np.random.exponential(size=30)) for _ in range(1000)]
plt.hist(samples, bins=30)
plt.title("CLT with Exponential Samples")
plt.show()

# Standard normal distribution plot
x = np.linspace(-4, 4, 1000)
plt.plot(x, norm.pdf(x))
plt.title("Standard Normal Distribution")
plt.show()

# Binomial probability
print("Binomial P(X=5):", binom.pmf(5, 10, 0.5))

# Z-score comparison
data_point = 75
mean = 70
std = 5
z = (data_point - mean) / std
print("Z-score:", z)

# Hypothesis testing using Z-statistics
sample = np.random.normal(loc=72, scale=5, size=30)
z_stat = (np.mean(sample) - 70) / (np.std(sample, ddof=1)/np.sqrt(len(sample)))
p_val = 2 * (1 - norm.cdf(abs(z_stat)))
print("Z-stat:", z_stat, "P-value:", p_val)

# Confidence interval
mean_sample = np.mean(sample)
se = stats.sem(sample)
ci = stats.t.interval(0.95, len(sample)-1, loc=mean_sample, scale=se)
print("95% Confidence Interval:", ci)

# PDF of normal distribution
plt.plot(x, norm.pdf(x))
plt.title("PDF of Normal Distribution")
plt.show()

# CDF of Poisson
x_vals = np.arange(0, 10)
cdf_poisson = poisson.cdf(x_vals, mu=3)
plt.step(x_vals, cdf_poisson)
plt.title("CDF of Poisson Distribution")
plt.show()

# Continuous uniform expected value
cont_uni = uniform.rvs(loc=0, scale=10, size=1000)
print("Expected Value (mean):", np.mean(cont_uni))

# Compare standard deviations
data1 = np.random.normal(50, 5, 1000)
data2 = np.random.normal(50, 10, 1000)
print("STD1:", np.std(data1), "STD2:", np.std(data2))

# Range and IQR
rng = np.ptp(normal_data)
iqr = stats.iqr(normal_data)
print("Range:", rng, "IQR:", iqr)

# Z-score normalization
normalized = stats.zscore(normal_data)
plt.hist(normalized, bins=20)
plt.title("Z-score Normalized Data")
plt.show()

# Skewness and Kurtosis
print("Skewness:", stats.skew(normal_data))
print("Kurtosis:", stats.kurtosis(normal_data))

# Z-test for sample vs population
pop_mean = 70
sample_mean = np.mean(sample)
z_test = (sample_mean - pop_mean) / (np.std(sample, ddof=1)/np.sqrt(len(sample)))
p_value = 2 * (1 - norm.cdf(abs(z_test)))
print("Z-test Result:", z_test, "P-value:", p_value)

# One-sample Z-test
print("One-sample Z-test:", z_test, "P-value:", p_value)

# Two-tailed Z-test plot
plt.axvline(x=-1.96, color='r', linestyle='--')
plt.axvline(x=1.96, color='r', linestyle='--')
plt.title("Two-tailed Z-test")
plt.show()

# Type 1 and Type 2 errors
# Type I: reject H0 when true, Type II: fail to reject H0 when false

# Independent T-test
group1 = np.random.normal(70, 10, 30)
group2 = np.random.normal(75, 10, 30)
t_stat, p = ttest_ind(group1, group2)
print("Independent T-test:", t_stat, p)

# Paired T-test
before = np.random.normal(100, 10, 30)
after = before + np.random.normal(5, 5, 30)
t_stat, p = ttest_rel(before, after)
print("Paired T-test:", t_stat, p)

# Compare Z and T test
print("Z-test stat:", z_test, "T-test stat:", t_stat)

# CI for sample mean
conf_int = stats.t.interval(0.95, len(sample)-1, loc=sample_mean, scale=se)
print("Confidence Interval:", conf_int)

# Margin of Error
moe = 1.96 * se
print("Margin of Error:", moe)

# Bayes' Theorem
# P(A|B) = [P(B|A) * P(A)] / P(B)
P_A = 0.3
P_B_given_A = 0.8
P_B = 0.5
P_A_given_B = (P_B_given_A * P_A) / P_B
print("Bayes Result:", P_A_given_B)

# Chi-square test for independence
obs = np.array([[20, 15], [30, 35]])
chi2, p, dof, expected = chi2_contingency(obs)
print("Chi-square Test:", chi2, p)

# Expected frequencies
print("Expected Frequencies:\n", expected)

# Goodness-of-fit test
obs = np.array([16, 18, 16, 14, 12, 12])
exp = np.array([16, 16, 16, 16, 16, 8])
chi2_stat, p_val = chisquare(obs, f_exp=exp)
print("Goodness-of-Fit Test:", chi2_stat, p_val)
