##Non-parametric test

In [None]:
import pandas as pd
from scipy.stats import kruskal

# Load the CSV file
df_formants = pd.read_csv('/content/formant_analysis_final.csv')

# Correctly split the data into three chunks based on identified patterns
df_sitting = df_formants.iloc[0:12].copy()
df_standing = df_formants.iloc[13:26].copy()
df_supine = df_formants.iloc[27:].copy()

# Label each subset with the corresponding position
df_sitting['position'] = 'sitting'
df_standing['position'] = 'standing'
df_supine['position'] = 'supine'

# Combine the subsets into a single DataFrame
df_combined = pd.concat([df_sitting, df_standing, df_supine], ignore_index=True)

# Convert formant columns to numeric, handling non-numeric entries
df_combined['F1_Hz'] = pd.to_numeric(df_combined['F1_Hz'], errors='coerce')
df_combined['F2_Hz'] = pd.to_numeric(df_combined['F2_Hz'], errors='coerce')
df_combined['F3_Hz'] = pd.to_numeric(df_combined['F3_Hz'], errors='coerce')

# Drop rows with any NaN values
df_combined.dropna(inplace=True)

# Create a dictionary to store Kruskal-Wallis test results
kruskal_results = {}

# Perform Kruskal-Wallis test for each formant frequency
for formant in ['F1_Hz', 'F2_Hz', 'F3_Hz']:
    # Extract data for each position
    sitting_data = df_combined[df_combined['position'] == 'sitting'][formant]
    standing_data = df_combined[df_combined['position'] == 'standing'][formant]
    supine_data = df_combined[df_combined['position'] == 'supine'][formant]
    # Perform Kruskal-Wallis test
    h_statistic, p_value = kruskal(sitting_data, standing_data, supine_data)
    # Store the results in the dictionary
    kruskal_results[formant] = (h_statistic, p_value)

# Print Kruskal-Wallis test results
for formant, (h_statistic, p_value) in kruskal_results.items():
    print(f"Kruskal-Wallis test for {formant}:")
    print(f"H-statistic: {h_statistic}")
    print(f"P-value: {p_value}\n")

Kruskal-Wallis test for F1_Hz:
H-statistic: 6.031531531531527
P-value: 0.04900829224174808

Kruskal-Wallis test for F2_Hz:
H-statistic: 0.2942942942942892
P-value: 0.8631669555160798

Kruskal-Wallis test for F3_Hz:
H-statistic: 1.4099099099099135
P-value: 0.49413083190041984



In [None]:

import pandas as pd
from scipy.stats import friedmanchisquare

# Load the CSV file
df_formants = pd.read_csv('/content/formant_analysis_final.csv')

# Correctly split the data into three chunks based on identified patterns
df_sitting = df_formants.iloc[0:12].copy()
df_standing = df_formants.iloc[13:26].copy()
df_supine = df_formants.iloc[27:].copy()

# Label each subset with the corresponding position
df_sitting['position'] = 'sitting'
df_standing['position'] = 'standing'
df_supine['position'] = 'supine'

# Combine the subsets into a single DataFrame
df_combined = pd.concat([df_sitting, df_standing, df_supine], ignore_index=True)

# Convert formant columns to numeric, handling non-numeric entries
df_combined['F1_Hz'] = pd.to_numeric(df_combined['F1_Hz'], errors='coerce')
df_combined['F2_Hz'] = pd.to_numeric(df_combined['F2_Hz'], errors='coerce')
df_combined['F3_Hz'] = pd.to_numeric(df_combined['F3_Hz'], errors='coerce')

# Drop rows with any NaN values
df_combined.dropna(inplace=True)

# Create a dictionary to store Friedman test results
friedman_results = {}

# Perform Friedman test for each formant frequency
for formant in ['F1_Hz', 'F2_Hz', 'F3_Hz']:
    # Extract data for each position
    sitting_data = df_combined[df_combined['position'] == 'sitting'][formant]
    standing_data = df_combined[df_combined['position'] == 'standing'][formant]
    supine_data = df_combined[df_combined['position'] == 'supine'][formant]
    # Perform Friedman test
    stat, p_value = friedmanchisquare(sitting_data, standing_data, supine_data)
    # Store the results in the dictionary
    friedman_results[formant] = (stat, p_value)

# Print Friedman test results
for formant, (stat, p_value) in friedman_results.items():
    print(f"Friedman test for {formant}:")
    print(f"Test statistic: {stat}")
    print(f"P-value: {p_value}\n")

Friedman test for F1_Hz:
Test statistic: 12.166666666666657
P-value: 0.0022805620953921715

Friedman test for F2_Hz:
Test statistic: 0.5
P-value: 0.7788007830714049

Friedman test for F3_Hz:
Test statistic: 4.666666666666657
P-value: 0.09697196786440551



##Parametric test

In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Load the CSV file
df_formants = pd.read_csv('/content/formant_analysis_final.csv')

# Correctly split the data into three chunks based on identified patterns
df_sitting = df_formants.iloc[0:12].copy()
df_standing = df_formants.iloc[13:26].copy()
df_supine = df_formants.iloc[27:].copy()

# Label each subset with the corresponding position
df_sitting['position'] = 'sitting'
df_standing['position'] = 'standing'
df_supine['position'] = 'supine'

# Combine the subsets into a single DataFrame
df_combined = pd.concat([df_sitting, df_standing, df_supine], ignore_index=True)

# Convert formant columns to numeric, handling non-numeric entries
df_combined['F1_Hz'] = pd.to_numeric(df_combined['F1_Hz'], errors='coerce')
df_combined['F2_Hz'] = pd.to_numeric(df_combined['F2_Hz'], errors='coerce')
df_combined['F3_Hz'] = pd.to_numeric(df_combined['F3_Hz'], errors='coerce')

# Drop rows with any NaN values
df_combined.dropna(inplace=True)

# Perform ANOVA for each formant (F1, F2, and F3) separately for each position
formula = '{formant} ~ C(position)'
results = {}
for formant in ['F1_Hz', 'F2_Hz', 'F3_Hz']:
    model = ols(formula.format(formant=formant), data=df_combined).fit()
    anova_result = sm.stats.anova_lm(model, typ=2)
    results[formant] = anova_result

# Print ANOVA results for each formant
for formant, anova_result in results.items():
    print(f"ANOVA Results for {formant}:\n{anova_result}\n")

ANOVA Results for F1_Hz:
                    sum_sq    df         F    PR(>F)
C(position)  159827.041667   2.0  2.639561  0.086419
Residual     999084.977107  33.0       NaN       NaN

ANOVA Results for F2_Hz:
                   sum_sq    df         F    PR(>F)
C(position)  1.853244e+05   2.0  0.316756  0.730699
Residual     9.653640e+06  33.0       NaN       NaN

ANOVA Results for F3_Hz:
                   sum_sq    df         F    PR(>F)
C(position)  2.192340e+05   2.0  0.833868  0.443312
Residual     4.338048e+06  33.0       NaN       NaN



In [None]:
"""
Welch's ANOVA is a modification of the traditional ANOVA test that does not assume equal variances across groups.
It's particularly useful when the assumption of homogeneity of variances is violated.
"""
from scipy.stats import f_oneway
import pandas as pd


# Load the CSV file
df_formants = pd.read_csv('/content/formant_analysis_final.csv')

# Correctly split the data into three chunks based on identified patterns
df_sitting = df_formants.iloc[0:12].copy()
df_standing = df_formants.iloc[13:26].copy()
df_supine = df_formants.iloc[27:].copy()

# Label each subset with the corresponding position
df_sitting['position'] = 'sitting'
df_standing['position'] = 'standing'
df_supine['position'] = 'supine'

# Combine the subsets into a single DataFrame
df_combined = pd.concat([df_sitting, df_standing, df_supine], ignore_index=True)

# Convert formant columns to numeric, handling non-numeric entries
df_combined['F1_Hz'] = pd.to_numeric(df_combined['F1_Hz'], errors='coerce')
df_combined['F2_Hz'] = pd.to_numeric(df_combined['F2_Hz'], errors='coerce')
df_combined['F3_Hz'] = pd.to_numeric(df_combined['F3_Hz'], errors='coerce')

# Drop rows with any NaN values
df_combined.dropna(inplace=True)

# Create a dictionary to store Welch's ANOVA test results
welch_anova_results = {}

# Perform Welch's ANOVA test for each formant frequency
for formant in ['F1_Hz', 'F2_Hz', 'F3_Hz']:
    # Extract data for each position
    sitting_data = df_combined[df_combined['position'] == 'sitting'][formant]
    standing_data = df_combined[df_combined['position'] == 'standing'][formant]
    supine_data = df_combined[df_combined['position'] == 'supine'][formant]
    # Perform Welch's ANOVA test
    statistic, p_value = f_oneway(sitting_data, standing_data, supine_data)
    # Store the results in the dictionary
    welch_anova_results[formant] = (statistic, p_value)

# Print Welch's ANOVA test results
for formant, (statistic, p_value) in welch_anova_results.items():
    print(f"Welch's ANOVA test for {formant}:")
    print(f"Test statistic: {statistic}")
    print(f"P-value: {p_value}\n")


Welch's ANOVA test for F1_Hz:
Test statistic: 2.6395614466650645
P-value: 0.08641862866184676

Welch's ANOVA test for F2_Hz:
Test statistic: 0.31675637254478417
P-value: 0.7306985455711174

Welch's ANOVA test for F3_Hz:
Test statistic: 0.8338683472469535
P-value: 0.44331194460818946

