Benchmark Code Performance


chi square stats formula:
$$
\chi^2 = \sum \frac{(O_i - E_i)^2}{E_i}
$$

In [10]:
# Explanation of the Code
# Input: The function chi_square_test takes observed frequencies and an optional significance level alpha.
# Observed Frequencies: The observed frequencies are converted to a NumPy array.
# Expected Frequencies: The expected frequencies are calculated assuming a uniform distribution across categories.
# Chi-Square Statistic Calculation: The Chi-Square statistic is computed using the formula provided.
# Degrees of Freedom: The degrees of freedom are calculated as the number of categories minus one.
# P-Value Calculation: The p-value is calculated using the cumulative distribution function (CDF) of the Chi-Square distribution.
# Decision Making: The function prints the Chi-Square statistic, p-value, and whether to reject or fail to reject the null hypothesis based on the p-value.

In [9]:
import numpy as np
from scipy.stats import chi2

def chi_square_test(observed, alpha=0.05):
    # 11. Convert observed to a numpy array
    observed = np.array(observed)
    
    # 22. Calculate the total observed counts
    total_observed = np.sum(observed)
    
    # from [11. & 22.] A. Calculate the expected frequencies
    # Assuming uniform distribution for expected frequencies
    expected = np.full_like(observed, total_observed/len(observed), dtype=np.float64)
    
    # from [A.] 1. Calculate the Chi-Square statistic
    chi_square_statistic = np.sum((observed-expected)**2 /expected)
    
    # 2. Calculate degrees of freedom
    degrees_of_freedom = len(observed)-1
    
    # from [1. & 2.] 3. Calculate the p-value
    p_value = 1-chi2.cdf(chi_square_statistic, degrees_of_freedom)
    
    # from [1. & 3.] Print results
    print(f"Chi-Square Statistic: {chi_square_statistic}")
    print(f"Alpha: {alpha}")
    print(f"P-Value: {p_value}")
    
    # from [input and 3.] Decision
    if p_value < alpha:
        print("Reject the null hypothesis (significant association).")
    else:
        print("Fail to reject the null hypothesis (no significant association).")

# Example usage
observed_frequencies = [50, 30, 20]  # Example observed frequencies
chi_square_test(observed_frequencies)


Chi-Square Statistic: 14.0
Alpha: 0.05
P-Value: 0.000911881965554473
Reject the null hypothesis (significant association).


again


In [None]:
# null hypo: there is NO significant association b/w these 2 categorical vars.
# alternate hypo: there IS a significant association b/w these 2 categorical vars.

In [13]:
import numpy as np
from scipy.stats import chi2

def chi_square_test(contingency_table, alpha=0.05):
    # Convert the contingency table to a numpy array
    observed = np.array(contingency_table)
    
    # Calculate the total observed counts
    total_observed = np.sum(observed)

    # Calculate expected frequencies
    row_sums = np.sum(observed, axis=1)
    col_sums = np.sum(observed, axis=0)
    expected = np.outer(row_sums, col_sums) / total_observed

    # Calculate the Chi-Square statistic
    chi_square_statistic = np.sum((observed - expected) ** 2 / expected)

    # Calculate degrees of freedom
    degrees_of_freedom = (observed.shape[0] - 1) * (observed.shape[1] - 1)

    # Calculate the p-value
    p_value = 1 - chi2.cdf(chi_square_statistic, degrees_of_freedom)

    # Print results
    print(f"Chi-Square Statistic: {chi_square_statistic}")
    print(f"P-Value: {p_value}")

    # Decision
    if p_value < alpha:
        print("We Can Reject the null hypothesis: -> ALTERNATE is TRUE == (there IS a significant association).")
    else:
        print("We Failed to reject the null hypothesis: -> NULL is TRUE (there is NO significant association).")

# Example contingency table
contingency_table = [
    [30, 10],  # Male: Like, Dislike
    [20, 40]   # Female: Like, Dislike
]

# Run the Chi-Square test
chi_square_test(contingency_table)


Chi-Square Statistic: 16.666666666666668
P-Value: 4.455709060402491e-05
We Can Reject the null hypothesis: -> ALTERNATE is TRUE == (there IS a significant association).


My Custom Code Performance