In [None]:
'''
Part 1: Descriptive analysis
Use descriptive statistics to get an impression of the data, using:

1) A cross table with the percentages for each score from each variable
2) A visualisation of the data with a mutiple stacked bar-chart
3) Some statistical measures for central tendency (the median) and dispersion (concensus)

Part 2: Inferential statistics
After the first impression determine what can be said about the population based on your sample data by:

1) Determine if there are any differences between any of the variables in the population (Friedman test)
2) If there are, then determine which variables differ significantly Determine the effect sizes (Kendall's W)

Part 3: Reporting
As the last step, you will need to write up all the results.
'''

In [None]:
import pandas
import numpy
import matplotlib.pyplot as plt

In [None]:
data = pandas.read_csv('/content/StudentStatistics.csv', sep=';')
data

In [None]:
'''
'Teach_Motivate', 'Teach_LinkTheory', 'Teach_StimAsk', 'Teach_Avail', 'Teach_StimDisc', 'Teach_Partic', 'Teach_Comp'
'''

In [None]:
selected_data = data[['Teach_Motivate', 'Teach_LinkTheory', 'Teach_StimAsk', 'Teach_Avail', 'Teach_StimDisc', 'Teach_Partic', 'Teach_Comp']].dropna()
selected_data.head()

In [None]:
selected_data['Teach_Motivate'].value_counts()

In [None]:
coding = {'Fully Disagree': 1, 'Disagree': 2, 'Neither disagree nor agree': 3, 'Agree': 4, 'Fully agree': 5}

In [None]:
selected_data.replace(coding, inplace=True)

In [None]:
selected_data

In [None]:
freq = {}
for col in selected_data.columns:
    freq[col] = selected_data[col].value_counts()
freq

In [None]:
freq_table=pandas.DataFrame(freq)
freq_table

In [None]:
freq_table.T.plot(kind='barh', stacked=True)
plt.legend(coding, bbox_to_anchor=(1.05,1))

In [None]:
'''
H0 - there is no difference in all columns for all levels (1,2,3,4,5)
Ha - there is difference
'''

# Friedman Test
*By P. Stikker*<br>


We could ask when we have multiple paired ordinal variables: Are there any differences between the scores?

The test used to answer the question is usually a Friedman test (Friedman, 1937, 1939). This can be seen as an extension of the Wilcoxon signed rank test (although it is more an extension of the sign test). Where the Wilcoxon signed rank test is limited to two variables, the Friedmann test is an extension which will test all variables in one go.

In [None]:
from scipy.stats import friedmanchisquare

In [None]:
spRes = friedmanchisquare(selected_data['Teach_Motivate'],selected_data['Teach_LinkTheory'],selected_data['Teach_StimAsk'],selected_data['Teach_Avail'],selected_data['Teach_StimDisc'],selected_data['Teach_Partic'],selected_data['Teach_Comp'])
spRes

In [None]:
statistic=49.786063569682106
pvalue=5.188917372956482e-09

#Kendall W

In [None]:
#n - no. of rows
#k - number of cols

n = selected_data.shape[0]
k = selected_data.shape[1]

W = statistic / (n * (k-1))
W

In [None]:
'''
0-0.3 : weak
0.3-0.5: moderate
0.5-0.7 : good
0.7 - 1: strong
'''