This code is for EN5423 class at GIST, Republic of Korea, and created by Dr. Hyunglok Kim.  
**Contact information**: hyunglokkim@gist.ac.kr  
**License**: This work is licensed for non-commercial use only.  
**Restrictions**: Do not use this material without permission for teaching or developing other classes.

In [17]:
import pandas as pd
import numpy as np

from scipy.stats import friedmanchisquare
from itertools import combinations

In [12]:
# Example 1: Mercury in periphyton—Median polish.
def median_polish(data, n_iter=100, tol=1e-10):
    """Performs median polish
    Args:
        data: pd data frame
        n_iter: maximum number of iterations
        tol: tolerance for convergence
    Returns:
        a dict with:
            'overall': overall median
            'row': row effects
            'column': column effects
            'residuals': residuals
    """
    data = data.copy()
    overall_effect = np.median(data)
    data -= overall_effect
    row_effects = np.zeros(data.shape[0])
    col_effects = np.zeros(data.shape[1])

    for _ in range(n_iter):
        row_medians = np.median(data, axis=1)
        row_effects += row_medians
        data -= row_medians[:, np.newaxis]

        col_medians = np.median(data, axis=0)
        col_effects += col_medians
        data -= col_medians

        new_overall_effect = overall_effect + np.median(row_medians) + np.median(col_medians)
        if np.abs(new_overall_effect - overall_effect) < tol:
            break
        overall_effect = new_overall_effect

    residuals = data
    return {'overall': overall_effect, 'row': row_effects, 'column': col_effects, 'residuals': residuals}

# Read the data from the CSV file
data = pd.read_csv('Merc.csv')

# Convert DataFrame to NumPy array
data_array = data.values

# Perform median polish
result = median_polish(data_array)

# Extract the results
overall = result['overall']
row_effects = result['row']
col_effects = result['column']
residuals = result['residuals']

# Display the results
print(f"Overall: {overall}")
print("\nRow Effects:")
print(pd.Series(row_effects, index=data.index))
print("\nColumn Effects:")
print(pd.Series(col_effects, index=data.columns))
print("\nResiduals:")
print(pd.DataFrame(residuals, index=data.index, columns=data.columns))

Overall: 2.527760416582459

Row Effects:
0   -0.399271
1   -0.044271
2   -0.505937
3   -0.369271
4   -0.365937
5   -0.653438
dtype: float64

Column Effects:
Site1   -2.384062
Site2   -2.379062
Site3   -1.077396
Site4    0.730937
Site5    5.052604
Site6    3.022604
dtype: float64

Residuals:
      Site1     Site2     Site3     Site4     Site5     Site6
0  0.213333  2.998333 -0.213333 -1.311667 -3.743333  0.286667
1 -0.491667 -0.496667 -0.908333  0.603333  1.891667  0.491667
2  0.120000  0.115000  0.213333 -0.115000 -0.176667 -1.106667
3 -0.176667 -0.211667 -0.653333  0.278333  0.176667  0.566667
4 -0.120000 -0.115000  0.593333  0.115000  1.113333 -0.286667
5  0.187500  0.402500  3.010833 -0.187500 -1.919167 -1.099167


In [15]:
# Example 2: Ordinial Data (Teaching methods) with Friedman Test
# Rankings provided by each student
rankings = np.array([
    [2, 3, 1],
    [1, 2, 3],
    [3, 1, 2],
    [2, 1, 3],
    [3, 2, 1]
])

# Perform the Friedman test
stat, p = friedmanchisquare(rankings[:, 0], rankings[:, 1], rankings[:, 2])

print(f"Friedman test statistic: {stat}")
print(f"p-value: {p}")

if p < 0.05:
    print("There is a significant difference between the teaching methods.")
else:
    print("There is no significant difference between the teaching methods.")

Friedman test statistic: 0.4000000000000057
p-value: 0.8187307530779795
There is no significant difference between the teaching methods.


In [16]:
# Example 3. Mercury in periphyton—Friedman test
# Read the data from the CSV file
data = pd.read_csv('Merc.csv')

# Extract the columns for the test
Hg = data.values

# Perform the Friedman test
stat, p_value = friedmanchisquare(Hg[:, 0], Hg[:, 1], Hg[:, 2], Hg[:, 3], Hg[:, 4], Hg[:, 5])

print(f"Friedman chi-squared = {stat:.3f}, df = {Hg.shape[1] - 1}, p-value = {p_value:.7f}")

Friedman chi-squared = 25.577, df = 5, p-value = 0.0001078


In [19]:
# Example 4. Mercury in periphyton—Pairwise Friedman comparison test.
def pairwise_friedman(data, alpha=0.05):
    k = data.shape[1]
    n = data.shape[0]
    comparisons = list(combinations(range(k), 2))
    ranks = data.rank(axis=1)
    mean_ranks = ranks.mean(axis=0)
    
    p_values = {}
    for i, j in comparisons:
        diff = np.abs(mean_ranks[i] - mean_ranks[j])
        se = np.sqrt(k * (k + 1) / (6 * n))
        z = diff / se
        p = 2 * (1 - norm.cdf(np.abs(z)))  # Two-tailed test
        p_values[(i, j)] = p
    
    # Bonferroni correction
    bonferroni_alpha = alpha / len(comparisons)
    reject = {comp: p < bonferroni_alpha for comp, p in p_values.items()}
    
    return p_values, reject

from scipy.stats import norm

# Read the data from the CSV file
data = pd.read_csv('Merc.csv')

# Perform the Friedman test
stat, p_value = friedmanchisquare(*[data[col] for col in data.columns])

print(f"Friedman chi-squared = {stat:.3f}, df = {data.shape[1] - 1}, p-value = {p_value:.7f}")



# Perform pairwise comparisons with Bonferroni correction
p_values, reject = pairwise_friedman(data)

# Display results
print("\nPairwise comparisons:")
for (i, j), p in p_values.items():
    print(f"Comparison {data.columns[i]} vs {data.columns[j]}: p-value = {p:.7f}, reject H0 = {reject[(i, j)]}")

Friedman chi-squared = 25.577, df = 5, p-value = 0.0001078

Pairwise comparisons:
Comparison Site1 vs Site2: p-value = 0.5370940, reject H0 = False
Comparison Site1 vs Site3: p-value = 0.0896330, reject H0 = False
Comparison Site1 vs Site4: p-value = 0.0307536, reject H0 = False
Comparison Site1 vs Site5: p-value = 0.0000310, reject H0 = True
Comparison Site1 vs Site6: p-value = 0.0006871, reject H0 = True
Comparison Site2 vs Site3: p-value = 0.2800872, reject H0 = False
Comparison Site2 vs Site4: p-value = 0.1228226, reject H0 = False
Comparison Site2 vs Site5: p-value = 0.0003867, reject H0 = True
Comparison Site2 vs Site6: p-value = 0.0054786, reject H0 = False
Comparison Site3 vs Site4: p-value = 0.6434288, reject H0 = False
Comparison Site3 vs Site5: p-value = 0.0135547, reject H0 = False
Comparison Site3 vs Site6: p-value = 0.0896330, reject H0 = False
Comparison Site4 vs Site5: p-value = 0.0448623, reject H0 = False
Comparison Site4 vs Site6: p-value = 0.2170439, reject H0 = Fal

  diff = np.abs(mean_ranks[i] - mean_ranks[j])
