# Chi-2 Test

In [40]:
import numpy as np
from scipy.stats import chi2_contingency, t , chi2 as scst
import pandas as pd

In [41]:
def chi2(data, alpha=0.05):
    df = pd.DataFrame(data)
    total = df.values.sum()
    col_sums = df.sum(axis=0).values
    row_sums = df.sum(axis=1).values

    expected = np.outer(row_sums, col_sums) / total

    obs_chi2 = np.sum((df.values - expected) ** 2 / expected)

    dof = (df.shape[0] - 1) * (df.shape[1] - 1)

    pval = scst.sf(obs_chi2, dof)

    chi2_stat, p, dof, expected_theo = chi2_contingency(data)

    print(f"Experimental/data based : Chi-Square value = {obs_chi2:.4f}")
    print(f"Theoretical: Chi-Square value = {chi2_stat:.4f}")
    print(f"p-value = {pval:.4f} (alpha={alpha})")

    if pval < alpha:
        print(f"Reject the null hypothesis.")
    else:
        print(f"Null Hypothesis Accepted]")

In [42]:
data = [[15.5, 25.2, 69.79], [80.2, 41, 41.225], [72, 11.1, 49.7]]
chi2(data)

Experimental/data based : Chi-Square value = 63.7961
Theoretical: Chi-Square value = 63.7961
p-value = 0.0000 (alpha=0.05)
Reject the null hypothesis.


# t-Test

## One Sample

In [55]:
from scipy.stats import norm, t
import scipy.stats as stats

In [47]:
def one_sample_t_test(S, mu, alpha=0.05, direction='+'):
    x_bar = sum(S) / len(S)
    sd = np.sqrt(sum((x - x_bar) ** 2 for x in S) / (len(S) - 1))
    se = sd / np.sqrt(len(S))
    df = len(S) - 1

    obs_t = (x_bar - mu) / se
    p_val = 1 - t.cdf(obs_t, df)

    if direction == '+-':
        alpha /= 2
        p_val *= 2
    elif direction == '-':
        alpha = 1 - alpha
        p_val = t.cdf(obs_t, df)

    print(f'Experimental t-value: {obs_t:.4f}, p-value: {p_val:.4f}')
    return p_val >= alpha

In [48]:
data = np.random.randint(0, 100, 100)
one_sample_t_test(data, mu=50)

Experimental t-value: -0.4773, p-value: 0.6829


True

## Two Sample

In [51]:
def two_sample_unpaired_test(S1, S2, equal_var=False, alpha=0.05, direction='+'):
    x1_bar, x2_bar = np.mean(S1), np.mean(S2)
    var1, var2 = np.var(S1, ddof=1), np.var(S2, ddof=1)
    n1, n2 = len(S1), len(S2)

    if equal_var:
        sp = np.sqrt(((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2))
        se = sp * np.sqrt(1/n1 + 1/n2)
        df = n1 + n2 - 2
    else:
        se = np.sqrt(var1 / n1 + var2 / n2)
        df = (var1 / n1 + var2 / n2) ** 2 / ((var1 / n1) ** 2 / (n1 - 1) + (var2 / n2) ** 2 / (n2 - 1))

    obs_t = (x1_bar - x2_bar) / se
    p_val = 1 - t.cdf(obs_t, df)

    if direction == '+-':
        alpha /= 2
        p_val *= 2
    elif direction == '-':
        alpha = 1 - alpha
        p_val = t.cdf(obs_t, df)

    print(f'Experimental t-value: {obs_t:.4f}, p-value: {p_val:.4f}')
    return p_val >= alpha

In [57]:
def two_sample_paired_test(S1, S2, alpha=0.05, direction='+'):
    return one_sample_t_test([s1 - s2 for s1, s2 in zip(S1, S2)], 0, alpha, direction)

In [58]:
data1 = np.random.normal(500, 100, 100).astype(int)
data2 = np.random.normal(500, 95, 100).astype(int)
two_sample_unpaired_test(data1, data2)

Experimental t-value: 0.6888, p-value: 0.2459


True

In [56]:
alpha = 0.05
n = int(input('Type of tailed test: 2 or 1? '))
if n == 2:
    p = norm.ppf(alpha / 2)
    print(f'Restriction: between {-abs(p):.4f} and {abs(p):.4f}')
elif n == 1:
    p = norm.ppf(alpha)
    print(f'Restriction: above {abs(p):.4f}')
else:
    print('Error: Invalid input')

Type of tailed test: 2 or 1? 2
Restriction: between -1.9600 and 1.9600
