In [None]:
'''
If you have two ordinal variables you can either check if there is a relationship between them,

To analyse a potential relationship between two ordinal variables, the following steps are done.

1) Get an impression of the sample data by creating a cross table
2) Visualise the sample data by using a heat map.
3) Test if the relation might still be present in the population, and how strong this effect is by using Goodman-Kruskal Gamma.
4) Write up the report.
'''

In [None]:
import pandas
import numpy
import matplotlib.pyplot as plt

In [None]:
mydata = pandas.read_csv('/content/StudentStatistics.csv', sep=';')
mydata

In [None]:
'''
'Teach_Motivate' (how well the teacher was able to motivate the student),
and 'Teach_LinkTheory' (how well the teacher was able to link theory to practice).
'''

'Teach_Motivate'
'Teach_LinkTheory'

In [None]:
mydata['Teach_Motivate'].value_counts()

In [None]:
mydata['Teach_LinkTheory'].value_counts()

In [None]:
dataviz = pandas.crosstab(mydata['Teach_Motivate'], mydata['Teach_LinkTheory'])
dataviz

In [None]:
dataviz.plot(kind='bar')

In [None]:
'''
H0 - no relationship or no influence of Teach_Motivate on Teach_LinkTheory
Ha - yes there is an influence
'''

#Goodman-Kruskal gamma

There are various tests that could be used to check if there is a relation between two ordinal variables. I will use Goodman-Kruskal gamma (γ) (Goodman & Kruskal, 1954). This will test if a so-called monotonic relationship exists between two ordinal variables. Gamma uses so-called concordent and discordent pairs to check for this.

Unfortunately there is no formal way to determine if a particular gamma value is high or low, and the rules of thumb floating around on the internet vary quite a lot, often depending on the field (e.g. biology, medicine, business, etc.).  I will use the rule of thumb from Rea and Parker (2014) that they use for correlation coefficients (not sure if this would also apply for gamma):

|\|γ\||Interpretation|
|-------|---------------|
|0.00 < 0.10|Negligible|
|0.10 < 0.20|Weak|
|0.20 < 0.40|Moderate|
|0.40 < 0.60|Relatively strong|
|0.60 < 0.80|Strong|
|0.80 <= 1.00|Very Strong|

A positive Gamma (i.e. above 0) indicates a positive relation, which means that if someone scores high on one variable, s/he will most likely also score high on the other. A negative Gamma (i.e. below 0) indicates that if someone scores high on one variable, s/he will most likely score low on the other.

In [None]:
from scipy.stats import norm

def goodmanKruskalgamma(data, ordinal1, ordinal2, orderLabels1, orderLabels2=None):
    myCrosstable = pandas.crosstab(data[ordinal1], data[ordinal2])

    myCrosstable = myCrosstable.reindex(orderLabels1)

    if orderLabels2 == None:
        myCrosstable = myCrosstable[orderLabels1]
    else:
        myCrosstable = myCrosstable[orderLabels2]

    nRows = myCrosstable.shape[0]
    nCols = myCrosstable.shape[1]


    C = [[0 for x in range(nCols)] for y in range(nRows)]

    # top left part
    for i in range(nRows):
        for j in range(nCols):
            h = i-1
            k = j-1
            if h>=0 and k>=0:
                for p in range(h+1):
                    for q in range(k+1):
                        C[i][j] = C[i][j] + list(myCrosstable.iloc[p])[q]

    # bottom right part
    for i in range(nRows):
        for j in range(nCols):
            h = i+1
            k = j+1
            if h<nRows and k<nCols:
                for p in range(h, nRows):
                    for q in range(k, nCols):
                        C[i][j] = C[i][j] + list(myCrosstable.iloc[p])[q]

    D = [[0 for x in range(nCols)] for y in range(nRows)]

    # bottom left part
    for i in range(nRows):
        for j in range(nCols):
            h = i+1
            k = j-1
            if h<nRows and k>=0:
                for p in range(h, nRows):
                    for q in range(k+1):
                        D[i][j] = D[i][j] + list(myCrosstable.iloc[p])[q]

    # top right part
    for i in range(nRows):
        for j in range(nCols):
            h = i-1
            k = j+1
            if h>=0 and k<nCols:
                for p in range(h+1):
                    for q in range(k, nCols):
                        D[i][j] = D[i][j] + list(myCrosstable.iloc[p])[q]

    P = 0
    Q = 0
    for i in range(nRows):
        for j in range(nCols):
            P = P + C[i][j] * list(myCrosstable.iloc[i])[j]
            Q = Q + D[i][j] * list(myCrosstable.iloc[i])[j]

    GKgamma = (P - Q) / (P + Q)

    if abs(GKgamma) < .10:
        qual = 'Negligible'
    elif abs(GKgamma) < .20:
        qual = 'Weak'
    elif abs(GKgamma) < .40:
        qual = 'Moderate'
    elif abs(GKgamma) < .60:
        qual = 'Relatively strong'
    elif abs(GKgamma) < .80:
        qual = 'Strong'
    else:
        qual = 'Very strong'

    n = myCrosstable.sum().sum()

    Z1 = GKgamma * ((P + Q) / (n * (1 - GKgamma**2)))**0.5

    forASE0 = 0
    forASE1 = 0
    for i in range(nRows):
        for j in range(nCols):
            forASE0 = forASE0 + list(myCrosstable.iloc[i])[j] * (Q * C[i][j] - P * D[i][j])**2
            forASE1 = forASE1 + list(myCrosstable.iloc[i])[j] * (C[i][j] - D[i][j])**2

    ASE0 = 4 * (forASE0)**0.5 / (P + Q)**2
    ASE1 = 2 * (forASE1 - (P - Q)**2 / n)**0.5 / (P + Q)
    Z2 = GKgamma / ASE0
    Z3 = GKgamma / ASE1

    p1 = norm.sf(Z1)
    p2 = norm.sf(Z2)
    p3 = norm.sf(Z3)

    zvalues = [Z1] + [Z2] + [Z3]
    pvalues = [p1] + [p2] + [p3]

    return (GKgamma,qual), zvalues, pvalues

In [None]:
mydata['Teach_Motivate'].unique()

In [None]:
mydata['Teach_LinkTheory'].unique()

In [None]:
correctOrder1 = ['Fully Disagree', 'Disagree', 'Neither disagree nor agree', 'Agree', 'Fully agree']
correctOrder2 = ['Fully Disagree', 'Disagree', 'Neither disagree nor agree', 'Agree', 'Fully agree']

In [None]:
goodmanKruskalgamma(mydata, 'Teach_Motivate', 'Teach_LinkTheory', correctOrder1, correctOrder2)

In [None]:
pvalue = 1.104473066321668e-40

effectsize = 0.8768577494692145

interpretation = 'Very strong'