In [6]:
from scipy.stats import f_oneway, kruskal


In [5]:
import numpy as np  
import pandas as pd

In [7]:
symmetry_metrics=['ForceSymmetry', 'ImpulseSymmetry', 'MaxForceSymmetry', 'TorqueSymmetry']

In [11]:
vald_data = pd.read_csv('data/vald_data_afterrisk.csv')

In [12]:

# Prepare data for hypothesis testing: group symmetry metrics by risk category
hypothesis_results = {}
for metric in symmetry_metrics:
    # Group data by risk category
    low_risk = vald_data[vald_data[f'{metric}Risk'] == 'Low Risk'][metric]
    medium_risk = vald_data[vald_data[f'{metric}Risk'] == 'Medium Risk'][metric]
    high_risk = vald_data[vald_data[f'{metric}Risk'] == 'High Risk'][metric]

    # Perform ANOVA if data is sufficient, otherwise fallback to Kruskal-Wallis
    if len(low_risk) > 2 and len(medium_risk) > 2 and len(high_risk) > 2:
        f_stat, p_value = f_oneway(low_risk, medium_risk, high_risk)
        test_type = "ANOVA"
    else:
        h_stat, p_value = kruskal(low_risk, medium_risk, high_risk)
        test_type = "Kruskal-Wallis"

    # Store results
    hypothesis_results[metric] = {
        'Test Type': test_type,
        'P-Value': p_value,
        'Significant': p_value < 0.05
    }

# Convert results into a DataFrame for better readability
hypothesis_results_df = pd.DataFrame(hypothesis_results).transpose()


hypothesis_results_df


  h_stat, p_value = kruskal(low_risk, medium_risk, high_risk)


Unnamed: 0,Test Type,P-Value,Significant
ForceSymmetry,ANOVA,0.0,True
ImpulseSymmetry,Kruskal-Wallis,,False
MaxForceSymmetry,ANOVA,0.0,True
TorqueSymmetry,ANOVA,0.0,True


Hypothesis Testing Results
ForceSymmetry:

Test Type: ANOVA
P-Value: 0.0 (highly significant)
Significance: Yes
Conclusion:
There is a significant difference in ForceSymmetry across Low Risk, Medium Risk, and High Risk categories.
This indicates that the categorization logic for ForceSymmetry effectively separates the groups.
ImpulseSymmetry:

Test Type: Kruskal-Wallis
P-Value: Not computable (all athletes fall into Low Risk)
Significance: No
Conclusion:
The thresholds or buffer logic for ImpulseSymmetry are likely too lenient, categorizing all athletes as Low Risk.
This metric does not currently contribute meaningfully to risk differentiation and requires refinement.