In [6]:
import os
import pandas as pd
from scipy.stats import wilcoxon

In [2]:
def get_data(direct_file_path=''):
    
    if direct_file_path:
        data = pd.read_csv(direct_file_path, sep =';')
    else:    
        # Get the current directory
        current_dir = os.getcwd()

        # Construct the file path relative to the current directory
        fname = 'DATASET_ExerciseInPsychiatry.csv'
        filepath = os.path.join(current_dir, fname)

        # Read the contents of the file:
        with open(filepath, 'r') as file:
            data = pd.read_csv(fname, sep =';')

    # Set columns 
    data.columns = data.iloc[0]

    # Get only the raw data rows
    data = data[1:40+1]


    # Replaceing ',' into '.' in the SDS, BMI, VO2MAX columns, as well as the '2' matching columns (including SDS2)
    data.SDS.replace(to_replace = ',',value = '.',inplace = True, regex =True)
    data.SDS2.replace(to_replace = ',',value = '.',inplace = True, regex =True)
    data.BMI.replace(to_replace = ',',value = '.',inplace = True, regex =True)
    data.BMI2.replace(to_replace = ',',value = '.',inplace = True, regex =True)
    data.VO2MAX.replace(to_replace = ',',value = '.',inplace = True, regex =True)
    data.VO2MAX2.replace(to_replace = ',',value = '.',inplace = True, regex =True)

    # Replace group numbers with lables
    data.Group = data.Group.replace('1','control').replace('2','exercise')


    # Creating list of headers to be converted to numeric values
    numeric_headers = data.columns.values[3:-1]


    # for loop iterating through headers to apply pd.to_numeric method to all relevant data columns
    for header in numeric_headers:
      data[header] = pd.to_numeric(data[header],errors = 'coerce')


    # Create new features: pre and post measures ratio
    num_headers_pre = numeric_headers[1:11+1]
    num_headers_post = numeric_headers[12:]
    pre_post_pairs = list(zip(num_headers_pre,num_headers_post))

    for i in range(len(pre_post_pairs)):
      data['r: ' + str(pre_post_pairs[i][1])+'/'+str(pre_post_pairs[i][0])] = data[pre_post_pairs[i][1]]/data[pre_post_pairs[i][0]]
    
    # Finally...
    return data

In [13]:
def calculate_wilcoxon(df, param1, param2, title='Comparison of Wilcoxon values'):
  control_group = slice(1,20)
  exercise_group = slice(21,40)

  # Extract the data from the two columns
  control_before = df.loc[control_group, param1]
  control_after = df.loc[control_group, param2]

  # Convert No data into NaN values from the numeric Series
  control_before = pd.to_numeric(control_before, errors='coerce')
  control_after = pd.to_numeric(control_after, errors='coerce')

  # # Perform the Wilcoxon signed-rank test
  statistic, p_value = wilcoxon(control_before, control_after
                                , zero_method= "wilcox",  nan_policy='omit')

  # Print the test statistic and p-value
  print("Test Statistic for control group:", statistic)
  print("P-value for control group:", p_value)

  # Extract the data from the two columns
  exercise_before = df.loc[exercise_group, param1]
  exercise_after = df.loc[exercise_group, param2]

  # Convert No data into NaN values from the numeric Series
  exercise_before = pd.to_numeric(exercise_before, errors='coerce')
  exercise_after = pd.to_numeric(exercise_after, errors='coerce')

  # # Perform the Wilcoxon signed-rank test
  statistic, p_value = wilcoxon(exercise_before, exercise_after
                                , zero_method= "wilcox",  nan_policy='omit')

  # Print the test statistic and p-value
  print("Test Statistic for exercise group:", statistic)
  print("P-value for exercise group:", p_value)