# Correlations with Simpson et al. (2012)

The table in `other-studies/Simpsonetal2012_subset-renamed.csv` is based on the original data that is available from https://link.springer.com/article/10.3758/s13428-012-0271-4#SecESM1 . It has been subsetted and the labels have been renamed to make it comparable to this study.

In [2]:
import pandas as pd
import numpy as np
from scipy import stats

In [5]:
# compare the similarity matrix with matrix by Simpson et al.

import glob

# prepare matrices from other studies

Simpson  = pd.read_csv("csv/other-studies/Simpsonetal2012_subset-renamed.csv", header=0, index_col=0, sep=";")
# erase diagonal
for c in Simpson.columns:
  Simpson.loc[c][c] = np.NaN

#display(Simpson)

# prepare data from the present study
# and statistical coefficients

paths = sorted(glob.glob("csv/similarity-matrices/latin/*.csv"))
# add Sparse Latin to the end
paths.append("csv/similarity-matrices/latin.csv")
# alpha after Bonferroni correction
alpha = 0.05 / len(paths)
print("Corrected alpha is:", round(alpha, 5), ", for the number of hypotheses:", len(paths))

# compile results
studyname = "Simpson"
paths = {path.split("/")[-1].replace(".csv", "").replace("_latin", "").replace("-", " ").title().replace("Pt", "PT"):path for path in paths}
e = pd.DataFrame(index=paths.keys(), columns=["Size", "%s (coef)" % studyname, "%s (p-val)" % studyname])

for font, _ in e.iterrows():
    if "Sparse" in font:
      e.loc[font]["Size"] = "26 x 26"
    else:
      e.loc[font]["Size"] = "8 x 8"
    sm  = pd.read_csv(paths[font], header=0, index_col=0, skiprows=[1,], usecols=[0, 2,3,4,5,6,7,8,9])
    # erase diagonal
    for c in sm.columns:
      sm.loc[c][c] = np.NaN
    ll = np.dot(*sm.shape)
    s1 = sm.values.reshape(ll)
    
    # calculate spearman’s rank correlation coefficient
    # subset and reshape
    s2 = pd.DataFrame(eval(studyname), index=sm.index, columns=sm.columns).values.reshape(ll)
    coef, p_val = stats.spearmanr(s1, s2, nan_policy="omit")
    e.loc[font][studyname + " (coef)"] = round(coef, 5)
    e.loc[font][studyname + " (p-val)"] = round(p_val, 5)
display(e)

Corrected alpha is: 0.00385 , for the number of hypotheses: 13


Unnamed: 0,Size,Simpson (coef),Simpson (p-val)
Arial,8 x 8,0.88471,0.0
Calibri,8 x 8,0.76863,0.0
Cambria,8 x 8,0.79248,0.0
Candara,8 x 8,0.6202,0.0
Century Schoolbook,8 x 8,0.81436,0.0
Courier New,8 x 8,0.80678,0.0
Futura,8 x 8,0.82552,0.0
Georgia,8 x 8,0.74279,0.0
PT Sans,8 x 8,0.89305,0.0
PT Serif,8 x 8,0.81641,0.0
