In [1]:
import numpy as np
from scipy import stats
# Revising the Python example to focus on the relationship t^2 = F in a two-sample t-test scenario

def t_test_as_f_test(data1, data2):
    """
    Perform a two-sample t-test and demonstrate how it relates to an F-test in this specific scenario.
    
    Parameters:
    - data1: array-like, the first data set
    - data2: array-like, the second data set
    
    Returns:
    - t_stat: float, the t-statistic
    - F_stat_from_t: float, the F-statistic derived from t
    - F_stat: float, the F-statistic
    - are_statistics_close: boolean, whether t^2 is close to F
    """
    # Perform the two-sample t-test
    t_stat, _ = stats.ttest_ind(data1, data2)
    
    # Calculate the F-statistic from the t-statistic
    F_stat_from_t = t_stat ** 2
    
    # Calculate the F-statistic using one-way ANOVA
    F_stat, _ = stats.f_oneway(data1, data2)
    
    # Check if t^2 is close to F
    are_statistics_close = np.isclose(F_stat_from_t, F_stat)
    
    return t_stat, F_stat_from_t, F_stat, are_statistics_close

# Test data sets
data1 = np.random.normal(loc=20, scale=5, size=50)
data2 = np.random.normal(loc=22, scale=5, size=50)

# Perform the tests and check the relationship
t_stat, F_stat_from_t, F_stat, are_statistics_close = t_test_as_f_test(data1, data2)

print(t_stat, F_stat_from_t, F_stat, are_statistics_close)

-3.1727429615431215 10.066297900021418 10.066297900021413 True


In [2]:
import numpy as np
from scipy import stats

def t_test_two_sample(data1, data2):
    """
    Perform a two-sample t-test on the given data sets.
    
    Parameters:
    - data1: array-like, the first data set
    - data2: array-like, the second data set
    
    Returns:
    - t_stat: float, the t-statistic
    - p_value: float, the p-value
    """
    t_stat, p_value = stats.ttest_ind(data1, data2)
    return t_stat, p_value

def f_test_two_sample(data1, data2):
    """
    Perform an F-test to compare variances of two data sets.
    
    Parameters:
    - data1: array-like, the first data set
    - data2: array-like, the second data set
    
    Returns:
    - F_stat: float, the F-statistic
    - p_value: float, the p-value
    """
    var1, var2 = np.var(data1, ddof=1), np.var(data2, ddof=1)
    F_stat = var1 / var2
    df1, df2 = len(data1) - 1, len(data2) - 1
    p_value = 1 - stats.f.cdf(F_stat, df1, df2)
    return F_stat, p_value

# Test data sets
data1 = np.array([25.5, 27.1, 28.8, 26.5, 27.2])
data2 = np.array([32.9, 33.8, 33.4, 33.1, 31.2])

# Perform t-test and F-test
t_stat, t_p_value = t_test_two_sample(data1, data2)
F_stat, F_p_value = f_test_two_sample(data1, data2)

# Verify the relationship t^2 = F
is_relationship_valid = np.isclose(t_stat**2, F_stat)

print((t_stat, t_p_value), (F_stat, F_p_value), is_relationship_valid)

(-8.381698166667624, 3.117440373838272e-05) (1.4513540621865615, 0.3634672468622858) False
