In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import colors
import numpy as np
import itertools

In [2]:
dimensions_list = ['grade','race','sex']
num_dimensions = len(dimensions_list)
sex_list = ["Female","Male"]
race_list = ["White","Black","Hispanic","Asian","Mixed/other"]
grade_list = ["7th","8th","9th","10th","11th","12th"]
all_attributes_dict = {
    "grade":grade_list,
    "race":race_list,
    "sex":sex_list
}

multidim_groups = list(itertools.product(*[all_attributes_dict[d] for d in dimensions_list]))

results_df = pd.read_csv("preference_results_v4_merged.csv")

In [3]:
## https://stackoverflow.com/questions/47391948/pandas-style-background-gradient-using-other-dataframe
def b_g(s, df_ref, cmap='viridis', vmin=0, vmax=1):
    # Pass the columns from Dataframe A 
    a = df_ref.loc[:,s.name].copy()
    norm = colors.Normalize(vmin,vmax)
    normed = norm(a.values)
    c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed)]

    return ['background-color: %s' % color for color in c]

def font_color(s, df_ref, cmap='viridis', vmin=0,vmax=1):
    # Pass the columns from Dataframe A 
    a = df_ref.loc[:,s.name].copy()
    norm = colors.Normalize(vmin,vmax)
    normed = norm(a.values)
    c2 = ["#ffffff" if colors.rgb_to_hsv(x[:-1])[-1]<0.7 else "#000000" for x in plt.cm.get_cmap(cmap)(normed) ]
    
    return ['color: %s' % color for color in c2]

In [4]:
## Select school
school = 61
resutls_df_school = results_df[results_df.school == school]

## Race

In [13]:
## Race
pvals_dfs = {}
h_dfs = {}
h_norm_dfs = {}
for i, gi in enumerate(race_list):
    for j, gj in enumerate(race_list):
        if gi == gj:
            continue
        col = "MRQAP_pval1s_h_and_1d-simple_race_" + gi + "-" + gj
        vals = resutls_df_school[col].values
        assert len(vals)==1
        vals = vals[0]
        if np.isnan(vals):
            if np.isnan(resutls_df_school["MRQAP_pval2s_h_and_1d-simple_race_" + gj + "-" + gi].values[0]):
                continue
            else:
                print ("We have a problem in", gi,gj)
        if gi not in pvals_dfs:
            pvals_dfs[gi] = {}
            h_norm_dfs[gi] = {}
        pvals_dfs[gi][gj] = vals
        h_norm_dfs[gi][gj] = resutls_df_school["h_norm_and_1d-simple_race_" + gi + "-" + gj].values[0]
t1 = pd.DataFrame.from_dict(h_norm_dfs,orient="index")
t2 = pd.DataFrame.from_dict(pvals_dfs,orient="index")
# t1.style.apply(lambda x: t2.applymap(color_cells), axis=None)
## https://stackoverflow.com/questions/25571882/pandas-columns-correlation-with-statistical-significance
p = t2.applymap(lambda x: ''.join(['*' for t in [.05, .01, .001] if x<=t]))
t1.round(2).astype(str) + p
t1str = t1.round(2).astype(str) + p
t1str[t1str=="nan"] = "-"
t1str = t1str[[i for i in race_list if i in h_norm_dfs]]
t1str = t1str.loc[[i for i in race_list if i in h_norm_dfs]]
## https://stackoverflow.com/questions/55929264/using-a-dataframe-to-format-the-style-of-another-dataframe
t1str_style = t1str.style.format(na_rep='-').set_properties(**{'text-align': 'center'}).applymap_index(lambda v: "font-weight: bold;", axis="index").applymap_index(lambda v: "font-weight: bold;", axis="columns")

t1 = t1[[i for i in race_list if i in h_norm_dfs]]
t1 = t1.loc[[i for i in race_list if i in h_norm_dfs]]
styled_result = t1str.style.apply(lambda x: b_g(x,t1,vmin=0,vmax=1)).apply(lambda x: font_color(x,t1)).set_properties(**{'text-align': 'center'}).applymap_index(lambda v: "font-weight: bold;", axis="index").applymap_index(lambda v: "font-weight: bold;", axis="columns")
styled_result

  c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed)]
  c2 = ["#ffffff" if colors.rgb_to_hsv(x[:-1])[-1]<0.7 else "#000000" for x in plt.cm.get_cmap(cmap)(normed) ]


Unnamed: 0,White,Black,Hispanic,Asian,Mixed/other
White,-,0.09***,0.31***,0.1***,0.35*
Black,0.1***,-,0.21***,0.08***,0.77*
Hispanic,0.5***,0.18***,-,0.19***,0.36***
Asian,0.03***,0.05***,0.09***,-,0.31***
Mixed/other,1.27,0.95,0.44***,0.62**,-


## Grade

In [15]:
## Grade
pvals_dfs = {}
h_dfs = {}
h_norm_dfs = {}
for i, gi in enumerate(grade_list):
    for j, gj in enumerate(grade_list):
        if gi == gj:
            continue
        col = "MRQAP_pval1s_h_and_1d-simple_grade_" + gi + "-" + gj
        vals = resutls_df_school[col].values
        assert len(vals)==1
        vals = vals[0]
        if np.isnan(vals):
            if np.isnan(resutls_df_school["MRQAP_pval2s_h_and_1d-simple_grade_" + gj + "-" + gi].values[0]):
                continue
            else:
                print ("We have a problem in", gi,gj)
        if gi not in pvals_dfs:
            pvals_dfs[gi] = {}
            h_norm_dfs[gi] = {}
        pvals_dfs[gi][gj] = vals
        h_norm_dfs[gi][gj] = resutls_df_school["h_norm_and_1d-simple_grade_" + gi + "-" + gj].values[0]
print (h_norm_dfs)
t1 = pd.DataFrame.from_dict(h_norm_dfs,orient="index")
t2 = pd.DataFrame.from_dict(pvals_dfs,orient="index")
# t1.style.apply(lambda x: t2.applymap(color_cells), axis=None)
## https://stackoverflow.com/questions/25571882/pandas-columns-correlation-with-statistical-significance
p = t2.applymap(lambda x: ''.join(['*' for t in [.05, .01, .001] if x<=t]))
t1.round(2).astype(str) + p
t1str = t1.round(2).astype(str) + p
t1str[t1str=="nan"] = "-"
t1str = t1str[[i for i in grade_list if i in h_norm_dfs]]
t1str = t1str.loc[[i for i in grade_list if i in h_norm_dfs]]
## https://stackoverflow.com/questions/55929264/using-a-dataframe-to-format-the-style-of-another-dataframe
t1str_style = t1str.style.format(na_rep='-').set_properties(**{'text-align': 'center'}).applymap_index(lambda v: "font-weight: bold;", axis="index").applymap_index(lambda v: "font-weight: bold;", axis="columns")

t1 = t1[[i for i in grade_list if i in h_norm_dfs]]
t1 = t1.loc[[i for i in grade_list if i in h_norm_dfs]]
styled_result = t1str.style.apply(lambda x: b_g(x,t1,vmin=0,vmax=0.4)).apply(lambda x: font_color(x,t1)).set_properties(**{'text-align': 'center'}).applymap_index(lambda v: "font-weight: bold;", axis="index").applymap_index(lambda v: "font-weight: bold;", axis="columns")
styled_result

{'7th': {'8th': 1.7640748115038327e-10, '9th': 0.0433672773334746, '10th': 1.7640748115038327e-10, '11th': 1.7640748115038327e-10, '12th': 1.7640748115038327e-10}, '8th': {'7th': 0.0146330671949283, '9th': 0.0653940286237804, '10th': 0.0043420798864322, '11th': 2.21965719691007e-10, '12th': 2.21965719691007e-10}, '9th': {'7th': 0.0957045373178224, '8th': 0.0441803504424266, '10th': 0.0148044854998004, '11th': 0.0121922908926148, '12th': 0.0113644098798525}, '10th': {'7th': 6.217364450996021e-10, '8th': 6.217364450996021e-10, '9th': 6.217364450996021e-10, '11th': 0.1474189216129636, '12th': 0.1174349858125715}, '11th': {'7th': 6.426115327110866e-10, '8th': 6.426115327110866e-10, '9th': 0.0133122202653665, '10th': 0.1703057412945039, '12th': 0.2245671167396079}, '12th': {'7th': 4.922840595587987e-10, '8th': 4.922840595587987e-10, '9th': 4.922840595587987e-10, '10th': 0.0636326985202053, '11th': 0.1470399292139637}}


  c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed)]
  c2 = ["#ffffff" if colors.rgb_to_hsv(x[:-1])[-1]<0.7 else "#000000" for x in plt.cm.get_cmap(cmap)(normed) ]


Unnamed: 0,7th,8th,9th,10th,11th,12th
7th,-,0.0***,0.04***,0.0***,0.0***,0.0***
8th,0.01***,-,0.07***,0.0***,0.0***,0.0***
9th,0.1***,0.04***,-,0.01***,0.01***,0.01***
10th,0.0***,0.0***,0.0***,-,0.15***,0.12***
11th,0.0***,0.0***,0.01***,0.17***,-,0.22***
12th,0.0***,0.0***,0.0***,0.06***,0.15***,-


## Sex

In [17]:
## Sex
pvals_dfs = {}
h_dfs = {}
h_norm_dfs = {}
for i, gi in enumerate(sex_list):
    for j, gj in enumerate(sex_list):
        if gi == gj:
            continue
        col = "MRQAP_pval1s_h_and_1d-simple_sex_" + gi + "-" + gj
        vals = resutls_df_school[col].values
        assert len(vals)==1
        vals = vals[0]
        if np.isnan(vals):
            if np.isnan(resutls_df_school["MRQAP_pval2s_h_and_1d-simple_sex_" + gj + "-" + gi].values[0]):
                continue
            else:
                print ("We have a problem in", gi,gj)
        if gi not in pvals_dfs:
            pvals_dfs[gi] = {}
            h_norm_dfs[gi] = {}
        pvals_dfs[gi][gj] = vals
        h_norm_dfs[gi][gj] = resutls_df_school["h_norm_and_1d-simple_sex_" + gi + "-" + gj].values[0]
print (h_norm_dfs)
print (pvals_dfs)
t1 = pd.DataFrame.from_dict(h_norm_dfs,orient='index')
t2 = pd.DataFrame.from_dict(pvals_dfs,orient="index")
# t1.style.apply(lambda x: t2.applymap(color_cells), axis=None)
## https://stackoverflow.com/questions/25571882/pandas-columns-correlation-with-statistical-significance
p = t2.applymap(lambda x: ''.join(['*' for t in [.05, .01, .001] if x<=t]))
t1.round(2).astype(str) + p
t1str = t1.round(2).astype(str) + p
t1str[t1str=="nan"] = "-"
t1str = t1str.loc[[i for i in sex_list if i in h_norm_dfs]]
t1str = t1str[[i for i in sex_list if i in h_norm_dfs]]
## https://stackoverflow.com/questions/55929264/using-a-dataframe-to-format-the-style-of-another-dataframe
t1str_style = t1str.style.format(na_rep='-').set_properties(**{'text-align': 'center'}).applymap_index(lambda v: "font-weight: bold;", axis="index").applymap_index(lambda v: "font-weight: bold;", axis="columns")

t1 = t1[[i for i in sex_list if i in h_norm_dfs]]
t1 = t1.loc[[i for i in sex_list if i in h_norm_dfs]]
styled_result = t1str.style.apply(lambda x: b_g(x,t1,vmin=0.4,vmax=1.0)).apply(lambda x: font_color(x,t1,vmin=0.4,vmax=1.0)).set_properties(**{'text-align': 'center'}).applymap_index(lambda v: "font-weight: bold;", axis="index").applymap_index(lambda v: "font-weight: bold;", axis="columns")
styled_result

{'Female': {'Male': 0.5032928141412708}, 'Male': {'Female': 0.6020329003562032}}
{'Female': {'Male': 0.0}, 'Male': {'Female': 0.0}}


  c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed)]
  c2 = ["#ffffff" if colors.rgb_to_hsv(x[:-1])[-1]<0.7 else "#000000" for x in plt.cm.get_cmap(cmap)(normed) ]


Unnamed: 0,Female,Male
Female,-,0.5***
Male,0.6***,-
