In [11]:
import pandas as pd
import numpy as np
from scipy import stats

# =============================================================================
# COMPREHENSIVE STATISTICAL REPORT: RELIABILITY AND NORMALITY
# Version: 7.1 (Expanded Battery for Open Science)
# This script standardizes headers, cleans numeric data, and computes
# measurement consistency and distribution assumptions for the full dataset.
# =============================================================================

def load_and_preprocess(file_path):
    """Loads CSV, standardizes headers, decimals, and handles missing values."""
    df = pd.read_csv(file_path, sep=';')
    
    # 1. Standardize headers: Replace multiple spaces with a single space and strip
    df.columns = df.columns.str.replace(r'\s+', ' ', regex=True).str.strip()
    
    # 2. Convert to numeric rigorously
    for col in df.columns:
        if df[col].dtype == 'object':
            df[col] = df[col].astype(str).str.strip().str.replace(',', '.', regex=False)
            df[col] = pd.to_numeric(df[col], errors='coerce')
            
    return df

def calculate_reliability(data, trial_cols, test_name):
    """Calculates CV% and ICC (3,1) for test-retest consistency."""
    clean_pairs = data[trial_cols].dropna()
    n = len(clean_pairs)
    k = 2 
    
    if n < 2:
        return {'Test': test_name, 'n': n, 'CV (%)': 'N/A', 'ICC (3,1)': 'N/A'}

    diff = clean_pairs.iloc[:, 0] - clean_pairs.iloc[:, 1]
    typical_error = np.std(diff, ddof=1) / np.sqrt(2)
    grand_mean = clean_pairs.values.mean()
    cv = (typical_error / grand_mean) * 100
    
    all_data = clean_pairs.values
    ms_between_subjects = (k * np.sum((np.mean(all_data, axis=1) - grand_mean)**2)) / (n - 1)
    ms_error = (np.sum((all_data - grand_mean)**2) - 
                k * np.sum((np.mean(all_data, axis=1) - grand_mean)**2) - 
                n * np.sum((np.mean(all_data, axis=0) - grand_mean)**2)) / ((n - 1) * (k - 1))
    
    icc = (ms_between_subjects - ms_error) / (ms_between_subjects + (k - 1) * ms_error)
    
    cv_eval = "Good (<10%)" if cv < 10 else "Moderate (10-20%)" if cv < 20 else "Poor (>20%)"
    icc_eval = "Excellent (>0.9)" if icc > 0.9 else "Good (0.75-0.9)" if icc > 0.75 else "Moderate (0.5-0.75)" if icc > 0.5 else "Poor (<0.5)"
    
    return {
        'Test': test_name, 'n': n,
        'CV (%)': f"{cv:.2f} ({cv_eval})",
        'ICC (3,1)': f"{icc:.3f} ({icc_eval})"
    }

def perform_normality_test(data, variables, group_col, raw_df):
    """Performs Shapiro-Wilk normality test grouped by specific categories."""
    report = []
    data[group_col] = raw_df[group_col].str.strip()
    groups = data[group_col].dropna().unique()
    
    for group in groups:
        group_data = data[data[group_col] == group]
        for var in variables:
            if var in group_data.columns:
                values = group_data[var].dropna()
                if len(values) >= 3:
                    w_stat, p_val = stats.shapiro(values)
                    report.append({
                        'Group': group, 'Variable': var,
                        'W Stat': round(w_stat, 3), 'p-value': round(p_val, 4),
                        'Normal?': 'Yes' if p_val > 0.05 else 'No',
                        'Recommended Analysis': 'Parametric' if p_val > 0.05 else 'Non-Parametric'
                    })
    return pd.DataFrame(report)

# --- EXECUTION ---
raw_df = pd.read_csv('Data_TableTennis.csv', sep=';')
df = load_and_preprocess('Data_TableTennis.csv')

# Expanded mapping for reliability (Trial 1 vs Trial 2)
reliability_vars = {
    'SJ (cm)': ['SJ 1', 'SJ 2'],
    'CMJ (cm)': ['CMJ 1', 'CMJ 2'],
    'RSI 15cm': ['Drop 15 1 RSI', 'Drop 15 2 RSI'],
    'RSI 30cm': ['Drop 30 1 RSI', 'Drop 30 2 RSI'],
    'RSI 45cm': ['Drop 45 1 RSI', 'Drop 45 2 RSI'],
    'Sprint 5m (s)': ['Sprint 5m 1', 'Sprint 5m 2'],
    'Sprint 10m (s)': ['Sprint 10m 1', 'Sprint 10m 2'],
    'T-Test (s)': ['Test T 1', 'Test T 2'],
    'Square Test (s)': ['Square 1', 'Square 2'],
    'Handgrip Dom (kgf)': ['Handgrip D 1', 'Handgrip D 2'],
    'Handgrip Non-Dom (kgf)': ['Handgrip E 1', 'Handgrip E 2']
}

# Expanded variables for normality check (Using 'Best' values)
normality_vars = [
    'SJ Best', 'CMJ Best', 'Drop 30 RSI Best', 'Sprint 5m Best', 
    'Sprint 10m Best', 'Test T Best', 'Square Best', 
    'Handgrip Total Best', 'Radar Vel Best', 'Target Efficiency'
]

# 1. OUTPUT RELIABILITY REPORT
print("======================================================================")
print("             RELIABILITY REPORT: CONSISTENCY & PRECISION")
print("======================================================================")
rel_results = [calculate_reliability(df, cols, name) for name, cols in reliability_vars.items()]
print(pd.DataFrame(rel_results).to_string(index=False))
print("-" * 70)

# 2. OUTPUT NORMALITY REPORT
print("\n======================================================================")
print("             NORMALITY REPORT: SHAPIRO-WILK BY GROUP")
print("======================================================================")
norm_df = perform_normality_test(df, normality_vars, 'Group', raw_df)
print(norm_df.to_string(index=False))
print("=" * 70)

             RELIABILITY REPORT: CONSISTENCY & PRECISION
                  Test  n                    CV (%)                   ICC (3,1)
               SJ (cm) 30        6.64 (Good (<10%))    0.937 (Excellent (>0.9))
              CMJ (cm) 30        4.04 (Good (<10%))    0.972 (Excellent (>0.9))
              RSI 15cm 30 13.41 (Moderate (10-20%))     0.893 (Good (0.75-0.9))
              RSI 30cm 30 16.86 (Moderate (10-20%))     0.816 (Good (0.75-0.9))
              RSI 45cm 29       20.45 (Poor (>20%)) 0.735 (Moderate (0.5-0.75))
         Sprint 5m (s) 32        3.69 (Good (<10%))    0.953 (Excellent (>0.9))
        Sprint 10m (s) 32        2.52 (Good (<10%))    0.981 (Excellent (>0.9))
            T-Test (s) 30        4.37 (Good (<10%))     0.899 (Good (0.75-0.9))
       Square Test (s) 30        3.10 (Good (<10%))     0.900 (Good (0.75-0.9))
    Handgrip Dom (kgf) 32        7.70 (Good (<10%))    0.966 (Excellent (>0.9))
Handgrip Non-Dom (kgf) 30        8.15 (Good (<10%))    0.934 (E

In [15]:
import pandas as pd
import numpy as np

# =============================================================================
# TABLE 1 GENERATOR: DESCRIPTIVE CHARACTERISTICS (Mean ± SD)
# Version: 2.0 (Robust Group Identification)
# =============================================================================

def load_and_clean_descriptive(file_path):
    df = pd.read_csv(file_path, sep=';')
    
    # 1. Limpeza rigorosa de cabeçalhos
    df.columns = df.columns.str.replace(r'\s+', ' ', regex=True).str.strip()
    
    # 2. Padronização da coluna de Grupo para evitar KeyError
    if 'Group' in df.columns:
        df['Group'] = df['Group'].astype(str).str.strip()
    
    # 3. Conversão numérica (vírgula para ponto e '-' para NaN)
    for col in df.columns:
        if df[col].dtype == 'object' and col != 'Group':
            df[col] = df[col].astype(str).str.strip().str.replace(',', '.', regex=False)
            df[col] = pd.to_numeric(df[col], errors='coerce')
            
    return df

# --- EXECUÇÃO ---
df = load_and_clean_descriptive('Data_TableTennis.csv')

# Mapeamento: {'Nome na Tabela': 'Nome exato na Planilha'}
variables_map = {
    'Anthropometry': {
        'Body Mass (kg)': 'Mass',
        'Stature (m)': 'Height',
        'Wingspan (m)': 'Wingspan'
    },
    'Neuromuscular Power': {
        'Squat Jump (cm)': 'SJ Best',
        'Countermovement Jump (cm)': 'CMJ Best',
        'CMJ Free Arms (cm)': 'CMJ Free Best',
        'Standing Long Jump (cm)': 'Hor Jump Best',
        'Handgrip Total (kgf)': 'Handgrip Total Best'
    },
    'Speed and Agility': {
        '5m Sprint (s)': 'Sprint 5m Best',
        '10m Sprint (s)': 'Sprint 10m Best',
        'Modified T-Test (s)': 'Test T Best',
        'Square Test (s)': 'Square Best'
    },
    'Technical Performance': {
        'Peak Ball Velocity (km/h)': 'Radar Vel Best',
        'Technical Efficiency (%)': 'Target Efficiency'
    }
}

# Identificação dinâmica dos grupos para evitar erro de chave
available_groups = df['Group'].unique()
group_conv = [g for g in available_groups if 'Conv' in g][0]
group_para = [g for g in available_groups if 'Para' in g or 'Atleta' in g][0]

print("======================================================================")
print(f"             CONFIRMATION REPORT: TABLE 1 VALUES")
print(f"             Conv: {group_conv} | Para: {group_para}")
print("======================================================================")

for category, vars_dict in variables_map.items():
    print(f"\n[{category}]")
    for table_label, csv_col in vars_dict.items():
        if csv_col in df.columns:
            # Cálculos por grupo
            res = df.groupby('Group')[csv_col].agg(['mean', 'std', 'count'])
            
            # Convencional
            m_c = res.loc[group_conv, 'mean']
            s_c = res.loc[group_conv, 'std']
            
            # Para-atleta
            m_p = res.loc[group_para, 'mean']
            s_p = res.loc[group_para, 'std']
            
            # Ajuste de escala para Eficiência (0.38 -> 38.6%)
            mult = 100 if 'Efficiency' in csv_col else 1
            
            print(f"{table_label:<30} | Conv: {m_c*mult:>6.2f} ± {s_c*mult:<5.2f} | Para: {m_p*mult:>6.2f} ± {s_p*mult:<5.2f}")
        else:
            print(f"!!! Column '{csv_col}' not found in CSV !!!")

print("\n" + "="*70)

             CONFIRMATION REPORT: TABLE 1 VALUES
             Conv: Convencional | Para: Para-athlete

[Anthropometry]
Body Mass (kg)                 | Conv:  49.53 ± 8.93  | Para:  56.89 ± 7.24 
Stature (m)                    | Conv:   1.58 ± 0.09  | Para:   1.63 ± 0.18 
Wingspan (m)                   | Conv:   1.63 ± 0.11  | Para:   1.66 ± 0.12 

[Neuromuscular Power]
Squat Jump (cm)                | Conv:  23.07 ± 5.89  | Para:  21.47 ± 5.32 
Countermovement Jump (cm)      | Conv:  25.56 ± 5.54  | Para:  23.30 ± 6.27 
CMJ Free Arms (cm)             | Conv:  30.48 ± 7.30  | Para:  27.55 ± 6.92 
Standing Long Jump (cm)        | Conv: 170.13 ± 19.84 | Para: 156.31 ± 29.09
Handgrip Total (kgf)           | Conv:  29.00 ± 8.07  | Para:  33.15 ± 8.82 

[Speed and Agility]
5m Sprint (s)                  | Conv:   1.27 ± 0.11  | Para:   1.43 ± 0.33 
10m Sprint (s)                 | Conv:   2.14 ± 0.17  | Para:   2.46 ± 0.61 
Modified T-Test (s)            | Conv:   6.65 ± 0.77  | Para:   7.6

In [33]:
import pandas as pd
import numpy as np
from scipy import stats
from statsmodels.stats.anova import AnovaRM

# =============================================================================
# STATISTICAL MASTER REPORT: RSI DROP HEIGHT ANALYSIS
# Version: 5.0 (Group Correction & Robust Statistics)
# =============================================================================

def load_and_preprocess(file_path):
    df = pd.read_csv(file_path, sep=';')
    # Limpeza de cabeçalhos
    df.columns = df.columns.str.replace(r'\s+', ' ', regex=True).str.strip()
    
    # Limpeza da coluna Group (Crucial para evitar o erro 'NAN')
    if 'Group' in df.columns:
        # Preenche vazios com 'Unknown', remove espaços e padroniza
        df['Group'] = df['Group'].fillna('Unknown').astype(str).str.strip()
    
    # Conversão numérica
    for col in df.columns:
        if df[col].dtype == 'object' and col != 'Group' and col != 'ID':
            df[col] = df[col].astype(str).str.replace(',', '.', regex=False)
            df[col] = pd.to_numeric(df[col], errors='coerce')
    return df

def calculate_cohens_d(group1, group2):
    n1, n2 = len(group1), len(group2)
    v1, v2 = np.var(group1, ddof=1), np.var(group2, ddof=1)
    pooled_sd = np.sqrt(((n1 - 1) * v1 + (n2 - 1) * v2) / (n1 + n2 - 2))
    return (np.mean(group1) - np.mean(group2)) / pooled_sd

# --- EXECUÇÃO ---
df = load_and_preprocess('Data_TableTennis.csv')
rsi_cols = ['Drop 15 RSI Best', 'Drop 30 RSI Best', 'Drop 45 RSI Best']
df['ID'] = df['ID'].astype(str)

print("======================================================================")
print("             RSI ANALYSIS MASTER REPORT (INTRA & INTER GROUP)")
print(f"             Groups found in data: {df['Group'].unique()}")
print("======================================================================")

# --- PART 1: INTRA-GROUP ANALYSIS (RM-ANOVA) ---
# Filtrando apenas os grupos válidos (Ignorando Unknown ou NAN)
valid_groups = [g for g in df['Group'].unique() if g != 'Unknown' and g != 'nan']

for group_name in valid_groups:
    group_df = df[df['Group'] == group_name][['ID'] + rsi_cols].dropna().drop_duplicates(subset=['ID'])
    n_final = len(group_df)
    
    if n_final < 2: 
        print(f"\n[!] Group {group_name} skipped: Insufficient balanced data (n={n_final})")
        continue

    df_long = pd.melt(group_df, id_vars=['ID'], value_vars=rsi_cols, var_name='Height', value_name='RSI')
    model = AnovaRM(data=df_long, depvar='RSI', subject='ID', within=['Height']).fit()
    
    means = group_df[rsi_cols].mean()
    stds = group_df[rsi_cols].std()

    print(f"\n>>> STRATIFIED ANALYSIS: {group_name.upper()} (n = {n_final})")
    print(f"DESCRIPTIVES (Mean ± SD):")
    for col in rsi_cols:
        print(f"  {col:<20}: {means[col]:.3f} ± {stds[col]:.3f}")
    
    f_val = model.anova_table['F Value'].iloc[0]
    p_anova = model.anova_table['Pr > F'].iloc[0]
    print(f"\nRM-ANOVA: F = {f_val:.2f}, p = {p_anova:.4f}")

    print("BONFERRONI POST-HOC:")
    pairs = [('Drop 15 RSI Best', 'Drop 30 RSI Best'), 
             ('Drop 30 RSI Best', 'Drop 45 RSI Best')]
    for p1, p2 in pairs:
        t_stat, p_pair = stats.ttest_rel(group_df[p1], group_df[p2])
        p_corr = min(p_pair * 2, 1.0) # Ajustado para 2 comparações principais
        print(f"  {p1} vs {p2}: p-corr = {p_corr:.4f}")

# --- PART 2: INTER-GROUP COMPARISON (O Gap de 30cm) ---
print("\n" + "="*70)
print("             STEP 4: INTER-GROUP COMPARISON AT 30CM")
print("======================================================================")

target = 'Drop 30 RSI Best'
# Identificação automática para não dar 'nan'
conv_30 = df[df['Group'].str.contains('Conv', case=False, na=False)][target].dropna()
para_30 = df[df['Group'].str.contains('Para', case=False, na=False)][target].dropna()

if len(conv_30) > 0 and len(para_30) > 0:
    t_stat, p_inter = stats.ttest_ind(conv_30, para_30)
    d = calculate_cohens_d(conv_30, para_30)

    print(f"Conventional (n={len(conv_30)}) vs Para-athlete (n={len(para_30)}):")
    print(f"  T-statistic: {t_stat:.2f}")
    print(f"  p-value    : {p_inter:.4f}")
    print(f"  Cohen's d  : {d:.2f} ({'Large' if d > 0.8 else 'Medium' if d > 0.5 else 'Small'} effect)")
else:
    print("Error: Could not find distinct groups to compare. Check 'Group' column labels.")

print("\n" + "="*70)

             RSI ANALYSIS MASTER REPORT (INTRA & INTER GROUP)
             Groups found in data: ['Convencional' 'Para-athlete']

>>> STRATIFIED ANALYSIS: CONVENCIONAL (n = 19)
DESCRIPTIVES (Mean ± SD):
  Drop 15 RSI Best    : 0.595 ± 0.187
  Drop 30 RSI Best    : 0.637 ± 0.209
  Drop 45 RSI Best    : 0.599 ± 0.226

RM-ANOVA: F = 1.36, p = 0.2689
BONFERRONI POST-HOC:
  Drop 15 RSI Best vs Drop 30 RSI Best: p-corr = 0.2135
  Drop 30 RSI Best vs Drop 45 RSI Best: p-corr = 0.4246

>>> STRATIFIED ANALYSIS: PARA-ATHLETE (n = 11)
DESCRIPTIVES (Mean ± SD):
  Drop 15 RSI Best    : 0.387 ± 0.135
  Drop 30 RSI Best    : 0.432 ± 0.134
  Drop 45 RSI Best    : 0.374 ± 0.161

RM-ANOVA: F = 1.71, p = 0.2063
BONFERRONI POST-HOC:
  Drop 15 RSI Best vs Drop 30 RSI Best: p-corr = 0.2961
  Drop 30 RSI Best vs Drop 45 RSI Best: p-corr = 0.2615

             STEP 4: INTER-GROUP COMPARISON AT 30CM
Conventional (n=19) vs Para-athlete (n=11):
  T-statistic: 2.92
  p-value    : 0.0068
  Cohen's d  : 1.11 (Large

In [39]:
import pandas as pd
import numpy as np
from scipy import stats

# =============================================================================
# BIVARIATE CORRELATIONS: TABLE 2 GENERATOR
# Version: 8.0 (Full Predictor Battery)
# Method: Spearman's Rho (ρ) with Pairwise Deletion
# =============================================================================

def load_and_preprocess(file_path):
    df = pd.read_csv(file_path, sep=';')
    # Limpeza de cabeçalhos e normalização de texto
    df.columns = df.columns.str.replace(r'\s+', ' ', regex=True).str.strip()
    if 'Group' in df.columns:
        df['Group'] = df['Group'].astype(str).str.strip()
    
    # Conversão numérica (vírgula para ponto e tratamento de NaNs)
    for col in df.columns:
        if df[col].dtype == 'object' and col != 'Group':
            df[col] = df[col].astype(str).str.replace(',', '.', regex=False)
            df[col] = pd.to_numeric(df[col], errors='coerce')
    return df

# --- EXECUTION ---
df = load_and_preprocess('Data_TableTennis.csv')

# Mapeamento de variáveis (conforme nomes na planilha vs nomes na tabela)
predictors_map = {
    'SJ (cm)': 'SJ Best',
    'CMJ (cm)': 'CMJ Best',
    'CMJ Free Arms (cm)': 'CMJ Free Best',
    'Hor. Jump (cm)': 'Hor Jump Best',
    'Handgrip Total (kgf)': 'Handgrip Total Best',
    'RSI 15 cm': 'Drop 15 RSI Best',
    'RSI 30 cm': 'Drop 30 RSI Best',
    'RSI 45 cm': 'Drop 45 RSI Best',
    '5m Sprint (s)': 'Sprint 5m Best',
    '10m Sprint (s)': 'Sprint 10m Best',
    'Modified T-Test (s)': 'Test T Best',
    'Square Test (s)': 'Square Best'
}

outcomes_map = {
    'Ball Velocity': 'Radar Vel Best',
    'Efficiency': 'Target Efficiency'
}

groups = ['Convencional', 'Para-athlete']

print("======================================================================================")
print("             CORRELATION MATRIX: PHYSICAL PREDICTORS VS TECHNICAL PERFORMANCE")
print("             Values reported as Spearman's Rho (ρ)")
print("======================================================================================")
print(f"{'Predictor':<25} | {'Conv: Vel':<12} | {'Conv: Eff':<12} | {'Para: Vel':<12} | {'Para: Eff':<12}")
print("-" * 86)

for label, col_phys in predictors_map.items():
    row_results = []
    
    for group_name in groups:
        for out_label, col_tech in outcomes_map.items():
            # Filtro por grupo e remoção de NaNs específicos desse par
            pair_df = df[df['Group'] == group_name][[col_phys, col_tech]].dropna()
            
            if len(pair_df) > 3:
                rho, p_val = stats.spearmanr(pair_df[col_phys], pair_df[col_tech])
                sig = "*" if p_val < 0.05 else ""
                row_results.append(f"{rho:>6.2f}{sig:<2}")
            else:
                row_results.append(f"{'n/a':<8}")
    
    # Impressão da linha formatada
    print(f"{label:<25} | {row_results[0]:<12} | {row_results[1]:<12} | {row_results[2]:<12} | {row_results[3]:<12}")

print("=" * 86)
print("* p < 0.05")

             CORRELATION MATRIX: PHYSICAL PREDICTORS VS TECHNICAL PERFORMANCE
             Values reported as Spearman's Rho (ρ)
Predictor                 | Conv: Vel    | Conv: Eff    | Para: Vel    | Para: Eff   
--------------------------------------------------------------------------------------
SJ (cm)                   |   0.45       |  -0.28       |  -0.14       |   0.45      
CMJ (cm)                  |   0.45       |  -0.26       |  -0.25       |   0.45      
CMJ Free Arms (cm)        |   0.41       |  -0.26       |   0.13       |   0.28      
Hor. Jump (cm)            |   0.24       |  -0.26       |   0.06       |   0.35      
Handgrip Total (kgf)      |   0.32       |  -0.24       |   0.59*      |   0.34      
RSI 15 cm                 |   0.39       |  -0.06       |   0.24       |   0.10      
RSI 30 cm                 |   0.25       |   0.15       |   0.11       |   0.55      
RSI 45 cm                 |   0.37       |  -0.12       |  -0.15       |   0.28      
5m Sprint 

In [6]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# =============================================================================
# STEP 6: SIMPLE LINEAR REGRESSION MODELS (Updated with F-statistic)
# Version: 9.2 (Clean Output - No Warnings)
# =============================================================================

def load_and_preprocess(file_path):
    df = pd.read_csv(file_path, sep=';')
    df.columns = df.columns.str.replace(r'\s+', ' ', regex=True).str.strip()
    if 'Group' in df.columns:
        df['Group'] = df['Group'].astype(str).str.strip()
    for col in df.columns:
        if df[col].dtype == 'object' and col not in ['Group', 'ID']:
            df[col] = df[col].astype(str).str.replace(',', '.', regex=False)
            df[col] = pd.to_numeric(df[col], errors='coerce')
    return df

# --- EXECUTION ---
df = load_and_preprocess('Data_TableTennis.csv')

# Selected predictors and outcomes
predictors = {'10m Sprint (s)': 'Sprint 10m Best', 'RSI 30 cm': 'Drop 30 RSI Best'}
outcomes = {'Ball Velocity (km/h)': 'Radar Vel Best', 'Efficiency (%)': 'Target Efficiency'}
groups = ['Convencional', 'Para-athlete']

print("======================================================================")
print("             SIMPLE LINEAR REGRESSION ANALYSIS")
print("======================================================================")

for group in groups:
    print(f"\n>>> GROUP: {group.upper()}")
    
    for out_label, col_tech in outcomes.items():
        print(f"\n--- Predicting {out_label} ---")
        
        for pred_label, col_phys in predictors.items():
            subset = df[df['Group'] == group][[col_phys, col_tech]].dropna()
            
            if len(subset) > 5:
                X = subset[col_phys]
                y = subset[col_tech]
                X = sm.add_constant(X)
                
                model = sm.OLS(y, X).fit()
                
                r2 = model.rsquared
                f_val = model.fvalue # Extraindo a estatística F
                p_val = model.pvalues.iloc[1]
                beta = model.params.iloc[1]
                
                sig = "SIGNIFICANT" if p_val < 0.05 else "non-significant"
                
                # Relatório atualizado com o valor F
                print(f"  Predictor: {pred_label:<15} | R² = {r2:.3f} | F = {f_val:.2f} | p = {p_val:.4f} | Beta = {beta:.3f} ({sig})")
            else:
                print(f"  Predictor: {pred_label:<15} | Insufficient data (n={len(subset)})")

print("\n" + "="*70)

             SIMPLE LINEAR REGRESSION ANALYSIS

>>> GROUP: CONVENCIONAL

--- Predicting Ball Velocity (km/h) ---
  Predictor: 10m Sprint (s)  | R² = 0.360 | F = 9.57 | p = 0.0066 | Beta = -20.143 (SIGNIFICANT)
  Predictor: RSI 30 cm       | R² = 0.074 | F = 1.37 | p = 0.2584 | Beta = 7.472 (non-significant)

--- Predicting Efficiency (%) ---
  Predictor: 10m Sprint (s)  | R² = 0.002 | F = 0.03 | p = 0.8676 | Beta = 0.081 (non-significant)
  Predictor: RSI 30 cm       | R² = 0.049 | F = 0.87 | p = 0.3647 | Beta = 0.356 (non-significant)

>>> GROUP: PARA-ATHLETE

--- Predicting Ball Velocity (km/h) ---
  Predictor: 10m Sprint (s)  | R² = 0.007 | F = 0.08 | p = 0.7847 | Beta = -1.495 (non-significant)
  Predictor: RSI 30 cm       | R² = 0.061 | F = 0.58 | p = 0.4652 | Beta = 18.221 (non-significant)

--- Predicting Efficiency (%) ---
  Predictor: 10m Sprint (s)  | R² = 0.092 | F = 1.11 | p = 0.3142 | Beta = -0.108 (non-significant)
  Predictor: RSI 30 cm       | R² = 0.264 | F = 3.23 | p 

In [2]:
import numpy as np
from statsmodels.stats.power import TTestIndPower
from scipy.stats import f, ncf

# =============================================================================
# STATISTICAL POWER AND SENSITIVITY ANALYSIS
# Version: 1.0 (G*Power Logic Adaptation)
# This script calculates post-hoc power for T-Tests and Linear Regression models
# based on the observed effect sizes and sample sizes from the study.
# =============================================================================

def calculate_regression_power(n, r2, predictors, alpha=0.05):
    """
    Calculates post-hoc power for Linear Regression using the F-test distribution.
    Matches G*Power logic: 'Linear multiple regression: Fixed model, R2 deviation from zero'.
    
    Parameters:
    - n: Total sample size
    - r2: Observed R-squared
    - predictors: Number of predictors (k)
    - alpha: Significance level (default 0.05)
    
    Returns:
    - power: Statistical power (1 - beta)
    - f2: Cohen's f2 effect size
    """
    df_num = predictors
    df_denom = n - predictors - 1
    
    # Calculate Cohen's f2 from R2
    if r2 >= 1.0: return 1.0, float('inf')
    f2 = r2 / (1 - r2)
    
    # Non-centrality parameter (lambda)
    # G*Power definition: lambda = f2 * n
    ncp = f2 * n
    
    # Critical F value
    f_crit = f.ppf(1 - alpha, df_num, df_denom)
    
    # Power = 1 - CDF of non-central F at critical value
    power_val = 1 - ncf.cdf(f_crit, df_num, df_denom, ncp)
    
    return power_val, f2

# --- EXECUTION PARAMETERS ---

# 1. Independent Samples T-Test (Group Comparison)
n_conv = 19
n_para = 11
effect_size_d = 1.11  # Observed Cohen's d for RSI 30cm
alpha = 0.05
ratio = n_para / n_conv

# 2. Regression Models (Observed R2)
r2_conv_sprint = 0.360  # 10m Sprint -> Ball Vel (Conventional)
r2_para_rsi    = 0.264  # RSI 30cm -> Efficiency (Para)

# --- CALCULATIONS ---

# A. T-Test Power
ttest_analysis = TTestIndPower()
power_ttest = ttest_analysis.solve_power(
    effect_size=effect_size_d, 
    nobs1=n_conv, 
    ratio=ratio, 
    alpha=alpha, 
    alternative='two-sided'
)

# B. Sensitivity Analysis (Min Effect for 80% Power)
min_effect_80 = ttest_analysis.solve_power(
    effect_size=None, 
    nobs1=n_conv, 
    ratio=ratio, 
    alpha=alpha, 
    power=0.80, 
    alternative='two-sided'
)

# C. Regression Power
power_reg_conv, f2_conv = calculate_regression_power(n_conv, r2_conv_sprint, predictors=1)
power_reg_para, f2_para = calculate_regression_power(n_para, r2_para_rsi, predictors=1)

# --- REPORT GENERATION ---

print("======================================================================")
print("             POST-HOC STATISTICAL POWER ANALYSIS REPORT")
print("======================================================================")
print(f"Global Alpha Level: {alpha}")
print("-" * 70)

print("\n>>> 1. INDEPENDENT SAMPLES T-TEST (RSI 30cm Comparison)")
print(f"  Sample Sizes        : Conv n={n_conv} | Para n={n_para} (Ratio={ratio:.2f})")
print(f"  Observed Effect (d) : {effect_size_d}")
print(f"  ACHIEVED POWER      : {power_ttest:.1%}")
print(f"  * Sensitivity Check : Min effect size for 80% power was d={min_effect_80:.2f}")

print("\n>>> 2. LINEAR REGRESSION MODELS (F-Test)")
print(f"  [Model A] Conventional: 10m Sprint -> Ball Velocity")
print(f"    Sample Size (n)   : {n_conv}")
print(f"    Observed R²       : {r2_conv_sprint}")
print(f"    Effect Size (f²)  : {f2_conv:.3f}")
print(f"    ACHIEVED POWER    : {power_reg_conv:.1%} (Robust)")

print(f"\n  [Model B] Para-athlete: RSI 30cm -> Technical Efficiency")
print(f"    Sample Size (n)   : {n_para}")
print(f"    Observed R²       : {r2_para_rsi}")
print(f"    Effect Size (f²)  : {f2_para:.3f}")
print(f"    ACHIEVED POWER    : {power_reg_para:.1%} (Low Sensitivity)")

print("======================================================================")

             POST-HOC STATISTICAL POWER ANALYSIS REPORT
Global Alpha Level: 0.05
----------------------------------------------------------------------

>>> 1. INDEPENDENT SAMPLES T-TEST (RSI 30cm Comparison)
  Sample Sizes        : Conv n=19 | Para n=11 (Ratio=0.58)
  Observed Effect (d) : 1.11
  ACHIEVED POWER      : 80.7%
  * Sensitivity Check : Min effect size for 80% power was d=1.10

>>> 2. LINEAR REGRESSION MODELS (F-Test)
  [Model A] Conventional: 10m Sprint -> Ball Velocity
    Sample Size (n)   : 19
    Observed R²       : 0.36
    Effect Size (f²)  : 0.562
    ACHIEVED POWER    : 86.9% (Robust)

  [Model B] Para-athlete: RSI 30cm -> Technical Efficiency
    Sample Size (n)   : 11
    Observed R²       : 0.264
    Effect Size (f²)  : 0.359
    ACHIEVED POWER    : 42.7% (Low Sensitivity)
