In [1]:
# Importing required libraries 
import pandas as pd
import numpy as np
from scipy.stats import mstats, mannwhitneyu, chi2_contingency, spearmanr, kendalltau

In [2]:
# Importing data
data = pd.read_csv('base_data.csv')
data.head(5)

Unnamed: 0,Name,Gender,CompSports_Experience,Sports,TrainingHoursPerweek,Thour_Bins,Education,Disability_pct,Socioeconomic_Status,Socioeconomic_Status_Ord,School_Type,Self_Esteem,Quality_of_Life,Achievement_Motivation_PerceivedCompetence,BodyImage_Autonomy,Empowerment,Family_Peer_Influence,Social_Inclusion,Barriers
0,Goutam Dey,M,2,2,6.0,2,3,100,Low,1,1,6,2,5,4,6,3,4,32
1,Saheb Hussain,M,2,1,42.0,3,1,75,Low,1,2,6,3,5,4,6,3,4,30
2,Sandhya Mondol,F,2,1,5.0,1,3,65,Low,1,2,6,2,5,4,5,3,4,30
3,Ujjwal Ghosh,M,2,1,7.0,2,2,50,Low,1,2,6,2,5,4,6,3,4,33
4,Tashkura Khatoon,F,1,1,4.0,1,1,80,Low,1,1,6,2,5,3,4,3,4,32


In [3]:
# Getting the distinct values of all variables
Var_List = list(data.columns.values)
for var in Var_List:
    print(var)
    print(list(set(data[var])))

Name
['Pratima Ghosh', 'Mantu Das', 'Santana Ghosh', 'Atar Ali', 'Sanjay Das', 'Indrajit Pal', 'Harilal Tudu', 'Sourav Purakait', 'Joydev Mondal', 'Tashkura Khatoon', 'Rubia Chatterjee Das', 'Md. Sukur Ali Mondal', 'Goutam Dey', 'Asutosh kumar', 'Saheb Hussain', 'Aloke Mondal', 'Papia Khatoon', 'Md. Kutubuddin Mondal', 'Sandhya Mondol', 'Pinky Khatoon', 'Arabinda Patra*', 'Majuri Bisoi', 'Md Rafique', 'Sukanta Mondal', 'Tarak Ram', 'Rita Ghosh', 'Ujjwal Ghosh', 'Sufia Molla', 'Priyobrata Sadhu', 'Mani Hussain Singh', 'Ajibur Rahaman Molla']
Gender
['M', 'F']
CompSports_Experience
[1, 2, 3]
Sports
[1, 2, 3]
TrainingHoursPerweek
[3.0, 4.0, 5.0, 6.0, 7.0, 36.0, 9.0, 42.0, 11.0, 8.0, 14.0, 15.0, 3.5, 21.0, 24.0, 28.0]
Thour_Bins
[1, 2, 3]
Education
[1, 2, 3]
Disability_pct
[65, 100, 70, 40, 42, 75, 80, 50, 55, 90, 60]
Socioeconomic_Status
['High', 'Medium', 'Low']
Socioeconomic_Status_Ord
[1, 2, 3]
School_Type
[1, 2]
Self_Esteem
[0, 1, 2, 3, 4, 5, 6]
Quality_of_Life
[0, 1, 2, 3]
Achievemen

In [4]:
# Missing Value Summary
print('Number of rows with missing values:', data.isnull().sum(axis=1).sum())

Number of rows with missing values: 0


In [5]:
# Seggregating down different variables in different groups
# Note: TrainingHoursPerweek has been marked both as Independent and Dependent
Independent_nom_var = ["Gender", "CompSports_Experience", "Sports", "Education", "Socioeconomic_Status", "School_Type", "Thour_Bins"]
Independent_int_var = ["TrainingHoursPerweek", "Disability_pct"]
Dependent_var = ["TrainingHoursPerweek", "Self_Esteem", "Quality_of_Life", "Achievement_Motivation_PerceivedCompetence", "BodyImage_Autonomy", "Empowerment", "Family_Peer_Influence", "Social_Inclusion", "Barriers"]

## Mann-Whitney U Test
#### The Mann-Whitney U test is the non-parametric alternative test to the independent sample t-test.  
#### It is a non-parametric test that is used to compare two sample means that come from the same population, and used to test whether two sample means are equal or not.
#### Usually, the Mann-Whitney U test is used when the data is ordinal or when the assumptions of the t-test are not met.

## Kruskal Wallis H test
#### The Kruskal-Wallis test is a nonparametric test, and is used when the assumptions of one-way ANOVA are not met.
#### Both the Kruskal-Wallis test and one-way ANOVA assess for significant differences on a continuous dependent variable by a categorical independent variable (with two or more groups).
#### The Kruskal-Wallis test can be used for both continuous and ordinal-level dependent variables.

### Null Hypothesis and Alternative Hypothesis
#### Null hypothesis: Null hypothesis assumes that the samples (groups) are from identical populations.
#### Alternative hypothesis: Alternative hypothesis assumes that at least one of the samples (groups) comes from a different population than the others.

In [6]:
# Mann-Whitney U Test and Kruskal Wallis H test
# Independent Variable: Gender
for dep in Dependent_var:
    M_data = np.array(data[data["Gender"]=='M'][dep])
    F_data = np.array(data[data["Gender"]=='F'][dep])

    stat, pval = mannwhitneyu(M_data, F_data, use_continuity=True, alternative=None)
    print('Dependent: ', dep)
    print('Independent: M & F')
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")
    
    H, kwpval = mstats.kruskalwallis(M_data, F_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")


Dependent:  TrainingHoursPerweek
Independent: M & F
mannwhitneyu: 61.5
P-Value: 0.034181853194364016
*****************************REJECT*******************************
 
kruskalwallis: 61.5
P-Value: 0.06521302840643113
ACCEPT
 
Dependent:  Self_Esteem
Independent: M & F
mannwhitneyu: 105.0
P-Value: 0.49140973339929744
ACCEPT
 
kruskalwallis: 105.0
P-Value: 1.0
ACCEPT
 
Dependent:  Quality_of_Life
Independent: M & F
mannwhitneyu: 82.0
P-Value: 0.15932984527395977
ACCEPT
 
kruskalwallis: 82.0
P-Value: 0.3080243411243011
ACCEPT
 
Dependent:  Achievement_Motivation_PerceivedCompetence
Independent: M & F
mannwhitneyu: 96.0
P-Value: 0.3547920106464564
ACCEPT
 
kruskalwallis: 96.0
P-Value: 0.6933436369653692
ACCEPT
 
Dependent:  BodyImage_Autonomy
Independent: M & F
mannwhitneyu: 98.5
P-Value: 0.3937825960338971
ACCEPT
 
kruskalwallis: 98.5
P-Value: 0.7703402813121609
ACCEPT
 
Dependent:  Empowerment
Independent: M & F
mannwhitneyu: 85.5
P-Value: 0.2020909189834863
ACCEPT
 
kruskalwallis: 85.

### Findings:
#### Gender affects Training Hours per Week
#### It does not affect any other dependent variables

In [7]:
# Mann-Whitney U Test and Kruskal Wallis H test
# Independent Variable: CompSports_Experience
for dep in Dependent_var:
    Exp1_data = np.array(data[data["CompSports_Experience"]==1][dep])
    Exp2_data = np.array(data[data["CompSports_Experience"]==2][dep])
    Exp3_data = np.array(data[data["CompSports_Experience"]==3][dep])

    print('Dependent: ', dep)
    print('Independent: Exp1 & Exp2')

    stat, pval = mannwhitneyu(Exp1_data, Exp2_data, use_continuity=True, alternative=None)
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")

    H, kwpval = mstats.kruskalwallis(Exp1_data, Exp2_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")

    print('Dependent: ', dep)
    print('Independent: Exp1 & Exp3')

    stat, pval = mannwhitneyu(Exp1_data, Exp3_data, use_continuity=True, alternative=None)
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")

    H, kwpval = mstats.kruskalwallis(Exp1_data, Exp3_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")

    print('Dependent: ', dep)
    print('Independent: Exp2 & Exp3')

    stat, pval = mannwhitneyu(Exp2_data, Exp3_data, use_continuity=True, alternative=None)
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")
    
    H, kwpval = mstats.kruskalwallis(Exp2_data, Exp3_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")


Dependent:  TrainingHoursPerweek
Independent: Exp1 & Exp2
mannwhitneyu: 45.0
P-Value: 0.4850877950185951
ACCEPT
 
kruskalwallis: 45.0
P-Value: 1.0
ACCEPT
 
Dependent:  TrainingHoursPerweek
Independent: Exp1 & Exp3
mannwhitneyu: 15.0
P-Value: 0.25327164811004454
ACCEPT
 
kruskalwallis: 15.0
P-Value: 0.46049438439732326
ACCEPT
 
Dependent:  TrainingHoursPerweek
Independent: Exp2 & Exp3
mannwhitneyu: 61.5
P-Value: 0.2886377517771679
ACCEPT
 
kruskalwallis: 61.5
P-Value: 0.5583882345192112
ACCEPT
 
Dependent:  Self_Esteem
Independent: Exp1 & Exp2
mannwhitneyu: 35.5
P-Value: 0.24571614570383715
ACCEPT
 
kruskalwallis: 35.5
P-Value: 0.4676814994741296
ACCEPT
 
Dependent:  Self_Esteem
Independent: Exp1 & Exp3
mannwhitneyu: 17.5
P-Value: 0.383280439551282
ACCEPT
 
kruskalwallis: 17.5
P-Value: 0.7105664083336254
ACCEPT
 
Dependent:  Self_Esteem
Independent: Exp2 & Exp3
mannwhitneyu: 70.0
P-Value: 0.46599517163804377
ACCEPT
 
kruskalwallis: 70.0
P-Value: 0.9094059127845183
ACCEPT
 
Dependent:  Q

### Findings:
#### Experience impacts Quality of Life; Empowerment; Family Peer Influence; and Barriers

In [8]:
# Mann-Whitney U Test and Kruskal Wallis H test
# Independent Variable: Sports
for dep in Dependent_var:
    Sports1_data = np.array(data[data["Sports"]==1][dep])
    Sports2_data = np.array(data[data["Sports"]==2][dep])
    Sports3_data = np.array(data[data["Sports"]==3][dep])

    print('Dependent: ', dep)
    print('Independent: Sports1 & Sports2')

    stat, pval = mannwhitneyu(Sports1_data, Sports2_data, use_continuity=True, alternative=None)
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")

    H, kwpval = mstats.kruskalwallis(Sports1_data, Sports2_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")

    print('Dependent: ', dep)
    print('Independent: Sports1 & Sports3')

    stat, pval = mannwhitneyu(Sports1_data, Sports3_data, use_continuity=True, alternative=None)
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")

    H, kwpval = mstats.kruskalwallis(Sports1_data, Sports3_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")

    print('Dependent: ', dep)
    print('Independent: Sports2 & Sports3')

    stat, pval = mannwhitneyu(Sports2_data, Sports3_data, use_continuity=True, alternative=None)
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")
    
    H, kwpval = mstats.kruskalwallis(Sports2_data, Sports3_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")


Dependent:  TrainingHoursPerweek
Independent: Sports1 & Sports2
mannwhitneyu: 85.0
P-Value: 0.4507814406498258
ACCEPT
 
kruskalwallis: 85.0
P-Value: 0.8820075772456184
ACCEPT
 
Dependent:  TrainingHoursPerweek
Independent: Sports1 & Sports3
mannwhitneyu: 27.5
P-Value: 0.3520910379930105
ACCEPT
 
kruskalwallis: 27.5
P-Value: 0.6692763753682653
ACCEPT
 
Dependent:  TrainingHoursPerweek
Independent: Sports2 & Sports3
mannwhitneyu: 19.0
P-Value: 0.37060501018687975
ACCEPT
 
kruskalwallis: 19.0
P-Value: 0.6918824980449587
ACCEPT
 
Dependent:  Self_Esteem
Independent: Sports1 & Sports2
mannwhitneyu: 56.5
P-Value: 0.05922978601551799
ACCEPT
 
kruskalwallis: 56.5
P-Value: 0.11263638821595624
ACCEPT
 
Dependent:  Self_Esteem
Independent: Sports1 & Sports3
mannwhitneyu: 27.0
P-Value: 0.33097992487728656
ACCEPT
 
kruskalwallis: 27.0
P-Value: 0.627117713001798
ACCEPT
 
Dependent:  Self_Esteem
Independent: Sports2 & Sports3
mannwhitneyu: 10.5
P-Value: 0.07087405708173071
ACCEPT
 
kruskalwallis: 10.

### Findings:
#### Sports type affects Quality of Life; and Barriers; 

In [9]:
# Mann-Whitney U Test and Kruskal Wallis H test
# Independent Variable: Education
for dep in Dependent_var:
    Education1_data = np.array(data[data["Education"]==1][dep])
    Education2_data = np.array(data[data["Education"]==2][dep])
    Education3_data = np.array(data[data["Education"]==3][dep])

    print('Dependent: ', dep)
    print('Independent: Education1 & Education2')

    stat, pval = mannwhitneyu(Education1_data, Education2_data, use_continuity=True, alternative=None)
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")

    H, kwpval = mstats.kruskalwallis(Education1_data, Education2_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")

    print('Dependent: ', dep)
    print('Independent: Education1 & Education3')

    stat, pval = mannwhitneyu(Education1_data, Education3_data, use_continuity=True, alternative=None)
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")

    H, kwpval = mstats.kruskalwallis(Education1_data, Education3_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")

    print('Dependent: ', dep)
    print('Independent: Education2 & Education3')

    stat, pval = mannwhitneyu(Education2_data, Education3_data, use_continuity=True, alternative=None)
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")
    
    H, kwpval = mstats.kruskalwallis(Education2_data, Education3_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")


Dependent:  TrainingHoursPerweek
Independent: Education1 & Education2
mannwhitneyu: 49.5
P-Value: 0.2725241555002342
ACCEPT
 
kruskalwallis: 49.5
P-Value: 0.5240928269263511
ACCEPT
 
Dependent:  TrainingHoursPerweek
Independent: Education1 & Education3
mannwhitneyu: 48.0
P-Value: 0.24163253260201523
ACCEPT
 
kruskalwallis: 48.0
P-Value: 0.4636028248984493
ACCEPT
 
Dependent:  TrainingHoursPerweek
Independent: Education2 & Education3
mannwhitneyu: 24.0
P-Value: 0.5
ACCEPT
 
kruskalwallis: 24.0
P-Value: 0.9486064859860214
ACCEPT
 
Dependent:  Self_Esteem
Independent: Education1 & Education2
mannwhitneyu: 24.5
P-Value: 0.012915431681665063
*****************************REJECT*******************************
 
kruskalwallis: 24.5
P-Value: 0.02375641805671798
*****************************REJECT*******************************
 
Dependent:  Self_Esteem
Independent: Education1 & Education3
mannwhitneyu: 44.0
P-Value: 0.16561341353012504
ACCEPT
 
kruskalwallis: 44.0
P-Value: 0.3153622667498417
AC

### Findings:
#### Education affects Self Esteem; and Empowerment

In [10]:
# Mann-Whitney U Test and Kruskal Wallis H test
# Independent Variable: Socioeconomic_Status
for dep in Dependent_var:
    Socioeconomic_StatusLOW_data = np.array(data[data["Socioeconomic_Status"]=="Low"][dep])
    Socioeconomic_StatusMEDIUM_data = np.array(data[data["Socioeconomic_Status"]=="Medium"][dep])
    Socioeconomic_StatusHIGH_data = np.array(data[data["Socioeconomic_Status"]=="High"][dep])

    print('Dependent: ', dep)
    print('Independent: Socioeconomic_StatusLOW & Socioeconomic_StatusMEDIUM')

    stat, pval = mannwhitneyu(Socioeconomic_StatusLOW_data, Socioeconomic_StatusMEDIUM_data, use_continuity=True, alternative=None)
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")

    H, kwpval = mstats.kruskalwallis(Socioeconomic_StatusLOW_data, Socioeconomic_StatusMEDIUM_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")

    print('Dependent: ', dep)
    print('Independent: Socioeconomic_StatusLOW & Socioeconomic_StatusHIGH')

    stat, pval = mannwhitneyu(Socioeconomic_StatusLOW_data, Socioeconomic_StatusHIGH_data, use_continuity=True, alternative=None)
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")

    H, kwpval = mstats.kruskalwallis(Socioeconomic_StatusLOW_data, Socioeconomic_StatusHIGH_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")

    print('Dependent: ', dep)
    print('Independent: Socioeconomic_StatusMEDIUM & Socioeconomic_StatusHIGH')

    stat, pval = mannwhitneyu(Socioeconomic_StatusMEDIUM_data, Socioeconomic_StatusHIGH_data, use_continuity=True, alternative=None)
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")
    
    H, kwpval = mstats.kruskalwallis(Socioeconomic_StatusMEDIUM_data, Socioeconomic_StatusHIGH_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")


Dependent:  TrainingHoursPerweek
Independent: Socioeconomic_StatusLOW & Socioeconomic_StatusMEDIUM
mannwhitneyu: 32.5
P-Value: 0.3685915595575724
ACCEPT
 
kruskalwallis: 32.5
P-Value: 0.7092427598427863
ACCEPT
 
Dependent:  TrainingHoursPerweek
Independent: Socioeconomic_StatusLOW & Socioeconomic_StatusHIGH
mannwhitneyu: 22.0
P-Value: 0.13194718865129623
ACCEPT
 
kruskalwallis: 22.0
P-Value: 0.2483055173438374
ACCEPT
 
Dependent:  TrainingHoursPerweek
Independent: Socioeconomic_StatusMEDIUM & Socioeconomic_StatusHIGH
mannwhitneyu: 1.5
P-Value: 0.13414294183855868
ACCEPT
 
kruskalwallis: 1.5
P-Value: 0.18403862719642194
ACCEPT
 
Dependent:  Self_Esteem
Independent: Socioeconomic_StatusLOW & Socioeconomic_StatusMEDIUM
mannwhitneyu: 36.5
P-Value: 0.4848279638956057
ACCEPT
 
kruskalwallis: 36.5
P-Value: 0.9393557272773577
ACCEPT
 
Dependent:  Self_Esteem
Independent: Socioeconomic_StatusLOW & Socioeconomic_StatusHIGH
mannwhitneyu: 26.5
P-Value: 0.21367765694890384
ACCEPT
 
kruskalwallis: 2

### Findings:
#### Socioeconomic Status affects Quality of Life; and Body Image and Autonomy

In [11]:
# Mann-Whitney U Test and Kruskal Wallis H test
# Independent Variable: School_Type
for dep in Dependent_var:
    School1_data = np.array(data[data["School_Type"]==1][dep])
    School2_data = np.array(data[data["School_Type"]==2][dep])

    stat, pval = mannwhitneyu(School1_data, School2_data, use_continuity=True, alternative=None)
    print('Dependent: ', dep)
    print('Independent: School Type 1 & 2')
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")
    
    H, kwpval = mstats.kruskalwallis(School1_data, School2_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")


Dependent:  TrainingHoursPerweek
Independent: School Type 1 & 2
mannwhitneyu: 47.0
P-Value: 0.002629880168198153
*****************************REJECT*******************************
 
kruskalwallis: 47.0
P-Value: 0.004942527479674767
*****************************REJECT*******************************
 
Dependent:  Self_Esteem
Independent: School Type 1 & 2
mannwhitneyu: 107.5
P-Value: 0.35673438602911944
ACCEPT
 
kruskalwallis: 107.5
P-Value: 0.6983109510077168
ACCEPT
 
Dependent:  Quality_of_Life
Independent: School Type 1 & 2
mannwhitneyu: 115.0
P-Value: 0.4748913867633021
ACCEPT
 
kruskalwallis: 115.0
P-Value: 0.9330780905963729
ACCEPT
 
Dependent:  Achievement_Motivation_PerceivedCompetence
Independent: School Type 1 & 2
mannwhitneyu: 108.0
P-Value: 0.36211916217501394
ACCEPT
 
kruskalwallis: 108.0
P-Value: 0.708736888271257
ACCEPT
 
Dependent:  BodyImage_Autonomy
Independent: School Type 1 & 2
mannwhitneyu: 107.0
P-Value: 0.3430348385056349
ACCEPT
 
kruskalwallis: 107.0
P-Value: 0.67

### Findings:
#### School Type affects Training Hours per week only

In [12]:
# Mann-Whitney U Test and Kruskal Wallis H test
# Independent Variable: Training Hour
for dep in Dependent_var:
    Thour1_data = np.array(data[data["Thour_Bins"]==1][dep])
    Thour2_data = np.array(data[data["Thour_Bins"]==2][dep])
    Thour3_data = np.array(data[data["Thour_Bins"]==3][dep])

    print('Dependent: ', dep)
    print('Independent: Thour1 & Thour2')

    stat, pval = mannwhitneyu(Thour1_data, Thour2_data, use_continuity=True, alternative=None)
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")

    H, kwpval = mstats.kruskalwallis(Thour1_data, Thour2_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")

    print('Dependent: ', dep)
    print('Independent: Thour1 & Thour3')

    stat, pval = mannwhitneyu(Thour1_data, Thour3_data, use_continuity=True, alternative=None)
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")

    H, kwpval = mstats.kruskalwallis(Thour1_data, Thour3_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")

    print('Dependent: ', dep)
    print('Independent: Thour2 & Thour3')

    stat, pval = mannwhitneyu(Thour2_data, Thour3_data, use_continuity=True, alternative=None)
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")
    
    H, kwpval = mstats.kruskalwallis(Thour2_data, Thour3_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")


Dependent:  TrainingHoursPerweek
Independent: Thour1 & Thour2
mannwhitneyu: 0.0
P-Value: 0.00012491271473813085
*****************************REJECT*******************************
 
kruskalwallis: 0.0
P-Value: 0.00021256987033375528
*****************************REJECT*******************************
 
Dependent:  TrainingHoursPerweek
Independent: Thour1 & Thour3
mannwhitneyu: 0.0
P-Value: 3.904771959697366e-05
*****************************REJECT*******************************
 
kruskalwallis: 0.0
P-Value: 6.794612564495497e-05
*****************************REJECT*******************************
 
Dependent:  TrainingHoursPerweek
Independent: Thour2 & Thour3
mannwhitneyu: 0.0
P-Value: 6.551991316787982e-05
*****************************REJECT*******************************
 
kruskalwallis: 0.0
P-Value: 0.00011327615658809215
*****************************REJECT*******************************
 
Dependent:  Self_Esteem
Independent: Thour1 & Thour2
mannwhitneyu: 39.5
P-Value: 0.33637431568595455

In [13]:
# Mann-Whitney U Test and Kruskal Wallis H test
# Independent Variable: School_Type
Dependent_var = ["TrainingHoursPerweek"]
for dep in Dependent_var:
    S1_M_data = np.array(data[(data["School_Type"]==1) & (data["Gender"]=='M')][dep])
    S2_M_data = np.array(data[(data["School_Type"]==2) & (data["Gender"]=='M')][dep])
    S1_F_data = np.array(data[(data["School_Type"]==1) & (data["Gender"]=='F')][dep])
    S2_F_data = np.array(data[(data["School_Type"]==2) & (data["Gender"]=='F')][dep])

    stat, pval = mannwhitneyu(S1_M_data, S1_F_data, use_continuity=True, alternative=None)
    print('Dependent: ', dep)
    print('Independent: School Type 1 among Male vs Female')
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")
    
    H, kwpval = mstats.kruskalwallis(S1_M_data, S1_F_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")
    
    stat, pval = mannwhitneyu(S2_M_data, S2_F_data, use_continuity=True, alternative=None)
    print('Dependent: ', dep)
    print('Independent: School Type 2 among Male vs Female')
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")
    
    H, kwpval = mstats.kruskalwallis(S2_M_data, S2_F_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")

    stat, pval = mannwhitneyu(S1_M_data, S2_M_data, use_continuity=True, alternative=None)
    print('Dependent: ', dep)
    print('Independent: School Type 1 vs School Type 2 for Male')
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")
    
    H, kwpval = mstats.kruskalwallis(S1_M_data, S2_M_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")
    
    stat, pval = mannwhitneyu(S1_F_data, S2_F_data, use_continuity=True, alternative=None)
    print('Dependent: ', dep)
    print('Independent: School Type 1 vs School Type 2 for Female')
    print("mannwhitneyu:", stat)
    print("P-Value:", pval)
    if pval < 0.05:
        print("*****************************REJECT*******************************")
    if pval > 0.05:
        print("ACCEPT")
    print(" ")
    
    H, kwpval = mstats.kruskalwallis(S1_F_data, S2_F_data)
    print("kruskalwallis:", stat)
    print("P-Value:", kwpval)
    if kwpval < 0.05:
        print("*****************************REJECT*******************************")
    if kwpval > 0.05:
        print("ACCEPT")
    print(" ")

Dependent:  TrainingHoursPerweek
Independent: School Type 1 among Male vs Female
mannwhitneyu: 7.5
P-Value: 0.02983724322102582
*****************************REJECT*******************************
 
kruskalwallis: 7.5
P-Value: 0.050509765651485496
ACCEPT
 
Dependent:  TrainingHoursPerweek
Independent: School Type 2 among Male vs Female
mannwhitneyu: 15.5
P-Value: 0.2201615108971412
ACCEPT
 
kruskalwallis: 15.5
P-Value: 0.40597058785452866
ACCEPT
 
Dependent:  TrainingHoursPerweek
Independent: School Type 1 vs School Type 2 for Male
mannwhitneyu: 29.5
P-Value: 0.12045633057167365
ACCEPT
 
kruskalwallis: 29.5
P-Value: 0.2255893545217041
ACCEPT
 
Dependent:  TrainingHoursPerweek
Independent: School Type 1 vs School Type 2 for Female
mannwhitneyu: 2.0
P-Value: 0.03203875322552976
*****************************REJECT*******************************
 
kruskalwallis: 2.0
P-Value: 0.049140833911850854
*****************************REJECT*******************************
 


## Chi-Square test of independence
#### The Chi-Square test of independence is used to determine if there is a significant relationship between two nominal (categorical) variables.
#### The frequency of each category for one nominal variable is compared across the categories of the second nominal variable.  
#### The data can be displayed in a contingency table where each row represents a category for one variable and each column represents a category for the other variable.

### Null Hypothesis and Alternate Hypothesis:
#### Null hypothesis: Assumes that there is no association between the two variables.
#### Alternative hypothesis: Assumes that there is an association between the two variables.

### Disadvantage:
#### We have too small data, and we need a non-parametric test for robust results

In [13]:
# Chi Square Test of independence
for indep in Independent_nom_var:
    for dep in Dependent_var:
        contingency = pd.crosstab(data[indep], data[dep])
        c, pval, dof, expected = chi2_contingency(contingency)
        print("Var1:", indep, "Var2:", dep)
        #print(contingency)
        print("chisquare-statistic:", c)
        print("P-Value:", pval)
        if pval < 0.05:
            print("*****************************REJECT*******************************")
        if pval > 0.05:
            print("ACCEPT")
        print(" ")

Var1: Gender Var2: TrainingHoursPerweek
chisquare-statistic: 14.983333333333333
P-Value: 0.45261841380142676
ACCEPT
 
Var1: Gender Var2: Self_Esteem
chisquare-statistic: 3.7444274376417233
P-Value: 0.7112158328333938
ACCEPT
 
Var1: Gender Var2: Quality_of_Life
chisquare-statistic: 2.525925925925926
P-Value: 0.47062390334283466
ACCEPT
 
Var1: Gender Var2: Achievement_Motivation_PerceivedCompetence
chisquare-statistic: 0.36131519274376417
P-Value: 0.9855198327270881
ACCEPT
 
Var1: Gender Var2: BodyImage_Autonomy
chisquare-statistic: 1.107670068027211
P-Value: 0.7752228287958638
ACCEPT
 
Var1: Gender Var2: Empowerment
chisquare-statistic: 3.0667484367484374
P-Value: 0.6896957994148502
ACCEPT
 
Var1: Gender Var2: Family_Peer_Influence
chisquare-statistic: 4.213107263107264
P-Value: 0.12165651796251392
ACCEPT
 
Var1: Gender Var2: Social_Inclusion
chisquare-statistic: 6.246969696969698
P-Value: 0.100193707810732
ACCEPT
 
Var1: Gender Var2: Barriers
chisquare-statistic: 9.644444444444446
P-Va

### Findings:
#### Because of the small size of the data, the test could not detect any association
#### However, it could detect an association between Sports and Social Inclusion

## Spearman Rank correlation
#### The Spearman correlation is a nonparametric measure of the monotonicity of the relationship between two datasets.
#### Unlike the Pearson correlation, the Spearman correlation does not assume that both datasets are normally distributed.
#### Like other correlation coefficients, this one varies between -1 and +1 with 0 implying no correlation. 

## Kendall's Tau 
#### Kendall’s tau is a measure of the correspondence between two rankings.

### Null Hypothesis and Alternate Hypothesis:
#### Null hypothesis: Assumes that there is no association between the two variables.
#### Alternative hypothesis: Assumes that there is an association between the two variables.

In [38]:
Independent_nom_var = ["Gender", "CompSports_Experience", "Sports", "Education", "Socioeconomic_Status"]
Independent_int_var = ["TrainingHoursPerweek", "Disability_pct", "CompSports_Experience", "Sports", "Education", "Socioeconomic_Status_Ord", "Thour_Bins"]
Dependent_var = ["Self_Esteem", "Quality_of_Life", "Achievement_Motivation_PerceivedCompetence", "BodyImage_Autonomy", "Empowerment", "Family_Peer_Influence", "Social_Inclusion"]
for indep in Independent_int_var:
    for dep in Dependent_var:
        print("Independent Var:", indep, "Dependent Var:", dep)
        subdf = data[[indep, dep]]
        ind = np.array(data[indep])
        d = np.array(data[dep])    
        rho, sprmanpval = spearmanr(subdf)
        print("Spearman Rho-statistic:", rho)
        print("P-Value:", sprmanpval)
        if sprmanpval < 0.05:
            print("*****************************REJECT*******************************")
        if sprmanpval > 0.05:
            print("ACCEPT")
        print(" ")
        
        tau, kendallpval = kendalltau(ind, d)
        print("Kendall Tau-statistic:", tau)
        print("P-Value:", kendallpval)
        if kendallpval < 0.05:
            print("*****************************REJECT*******************************")
        if kendallpval > 0.05:
            print("ACCEPT")
        print(" ")

Independent Var: TrainingHoursPerweek Dependent Var: Self_Esteem
Spearman Rho-statistic: -0.07584687135675594
P-Value: 0.6850890454569704
ACCEPT
 
Kendall Tau-statistic: -0.04079472639863475
P-Value: 0.766947526337236
ACCEPT
 
Independent Var: TrainingHoursPerweek Dependent Var: Quality_of_Life
Spearman Rho-statistic: 0.09639850757126346
P-Value: 0.6059438069776777
ACCEPT
 
Kendall Tau-statistic: 0.06936635082021989
P-Value: 0.627660515488224
ACCEPT
 
Independent Var: TrainingHoursPerweek Dependent Var: Achievement_Motivation_PerceivedCompetence
Spearman Rho-statistic: 0.13429408339670879
P-Value: 0.47136271443806155
ACCEPT
 
Kendall Tau-statistic: 0.10280986037024684
P-Value: 0.46674275130121323
ACCEPT
 
Independent Var: TrainingHoursPerweek Dependent Var: BodyImage_Autonomy
Spearman Rho-statistic: 0.06243684628692454
P-Value: 0.7386258030375616
ACCEPT
 
Kendall Tau-statistic: 0.04969294870607627
P-Value: 0.7294044393154606
ACCEPT
 
Independent Var: TrainingHoursPerweek Dependent Var:

### Findings:
#### Neither Spearman Rank correlation nor Kendall's Tau measure could find any association between Training Hours per Week and the dependent variables;
#### They could not detect any association between Disability percentage and the dependent variables as well.
#### However, they could find a relationship between the following
##### --> Education and Empowerment; 
##### --> Sports and Quality of Life; 
##### --> CompSports Experience and Quality of Life
##### --> Socioeconomic_Status and  BodyImage_Autonomy

In [14]:
Independent_nom_var = ["Gender", "CompSports_Experience", "Sports", "Education", "Socioeconomic_Status"]
Independent_int_var = ["TrainingHoursPerweek", "Disability_pct", "CompSports_Experience", "Sports", "Education", "Socioeconomic_Status_Ord", "Thour_Bins", "Self_Esteem", "Quality_of_Life", "Achievement_Motivation_PerceivedCompetence", "BodyImage_Autonomy", "Empowerment", "Family_Peer_Influence", "Social_Inclusion"]
Dependent_var = ["Barriers"]
for indep in Independent_int_var:
    for dep in Dependent_var:
        print("Independent Var:", indep, "Dependent Var:", dep)
        subdf = data[[indep, dep]]
        ind = np.array(data[indep])
        d = np.array(data[dep])    
        rho, sprmanpval = spearmanr(subdf)
        print("Spearman Rho-statistic:", rho)
        print("P-Value:", sprmanpval)
        if sprmanpval < 0.05:
            print("*****************************REJECT*******************************")
        if sprmanpval > 0.05:
            print("ACCEPT")
        print(" ")
        
        tau, kendallpval = kendalltau(ind, d)
        print("Kendall Tau-statistic:", tau)
        print("P-Value:", kendallpval)
        if kendallpval < 0.05:
            print("*****************************REJECT*******************************")
        if kendallpval > 0.05:
            print("ACCEPT")
        print(" ")

Independent Var: TrainingHoursPerweek Dependent Var: Barriers
Spearman Rho-statistic: -0.11739041226019038
P-Value: 0.5293992430561422
ACCEPT
 
Kendall Tau-statistic: -0.0689771830177026
P-Value: 0.6057456814405743
ACCEPT
 
Independent Var: Disability_pct Dependent Var: Barriers
Spearman Rho-statistic: 0.02047581547762198
P-Value: 0.9129400852891215
ACCEPT
 
Kendall Tau-statistic: 0.009548279930104703
P-Value: 0.9445021232650929
ACCEPT
 
Independent Var: CompSports_Experience Dependent Var: Barriers
Spearman Rho-statistic: -0.4184016352268291
P-Value: 0.01915779851751189
*****************************REJECT*******************************
 
Kendall Tau-statistic: -0.33328489593769883
P-Value: 0.02662586899819955
*****************************REJECT*******************************
 
Independent Var: Sports Dependent Var: Barriers
Spearman Rho-statistic: -0.3730509447233091
P-Value: 0.03873499150267392
*****************************REJECT*******************************
 
Kendall Tau-statistic

In [23]:
Independent_nom_var = ["Gender", "CompSports_Experience", "Sports", "Education", "Socioeconomic_Status", "School_Type"]
Independent_int_var = ["TrainingHoursPerweek", "Disability_pct", "CompSports_Experience", "Sports", "Education", "Socioeconomic_Status_Ord"]
Dependent_var = ["Self_Esteem", "Quality_of_Life", "Achievement_Motivation_PerceivedCompetence", "BodyImage_Autonomy", "Empowerment", "Family_Peer_Influence", "Social_Inclusion"]

writer = pd.ExcelWriter('pandas_multiple1.xlsx', engine='xlsxwriter')

for indep in Independent_nom_var:
    for dep in Dependent_var:
        nm = indep[:10] + " " + dep[:10]
        print (nm)
        contingency = pd.crosstab(data[indep], data[dep])
        print(contingency)
        print(" ")
        contingency.to_excel(writer, sheet_name=nm)
        
writer.save()


School_Typ Self_Estee
Self_Esteem  0  1  2  3  4  5  6
School_Type                     
1            0  0  5  1  3  1  3
2            1  2  2  2  2  4  5
 
School_Typ Quality_of
Quality_of_Life  0  1  2  3
School_Type                
1                2  6  3  2
2                4  6  6  2
 
School_Typ Achievemen
Achievement_Motivation_PerceivedCompetence  1  2  3  4  5
School_Type                                              
1                                           1  1  5  3  3
2                                           2  1  4  7  4
 
School_Typ BodyImage_
BodyImage_Autonomy  1  2  3  4
School_Type                   
1                   3  5  4  1
2                   5  9  0  4
 
School_Typ Empowermen
Empowerment  0  2  3  4  5  6
School_Type                  
1            0  4  3  4  1  1
2            1  5  2  7  1  2
 
School_Typ Family_Pee
Family_Peer_Influence  1  2   3
School_Type                    
1                      2  3   8
2                      0  8  10
 
School_T

In [18]:
Independent_nom_var = ["Gender", "CompSports_Experience", "Sports", "Education", "Socioeconomic_Status", "School_Type", "Thour_Bins"]
Independent_int_var = ["TrainingHoursPerweek", "Disability_pct", "CompSports_Experience", "Sports", "Education", "Socioeconomic_Status_Ord", "Self_Esteem", "Quality_of_Life", "Achievement_Motivation_PerceivedCompetence", "BodyImage_Autonomy", "Empowerment", "Family_Peer_Influence", "Social_Inclusion"]
Independent_var = Independent_int_var + Independent_nom_var
Dependent_var = ["Barriers"]

writer = pd.ExcelWriter('pandas_multiple1.xlsx', engine='xlsxwriter')

for indep in Independent_var:
    for dep in Dependent_var:
        nm = indep[:10] + " " + dep[:10]
        print (nm)
        contingency = pd.crosstab(data[indep], data[dep])
        print(contingency)
        print(" ")
        contingency.to_excel(writer, sheet_name=nm)
        
writer.save()


TrainingHo Barriers
Barriers              19  20  21  22  23  24  25  28  30  32  33
TrainingHoursPerweek                                            
3.0                    0   0   0   0   2   0   0   0   1   0   0
3.5                    0   0   1   0   0   0   0   0   0   0   0
4.0                    0   0   0   2   0   0   0   0   0   1   0
5.0                    0   1   0   0   0   1   0   0   1   0   0
6.0                    1   0   1   0   0   0   0   0   0   1   0
7.0                    0   0   0   0   0   0   0   0   0   0   1
8.0                    0   0   0   1   0   0   0   0   0   0   0
9.0                    0   0   1   0   0   1   0   0   0   0   0
11.0                   0   0   0   1   0   0   1   0   0   0   0
14.0                   2   0   0   0   0   0   1   1   0   0   0
15.0                   0   0   0   1   0   0   0   0   0   0   0
21.0                   0   0   0   0   1   0   0   0   0   0   0
24.0                   0   0   0   1   0   0   0   0   0   0   0
28.0 

In [17]:
indep[:10]

'Thour_Bins'