In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

from os.path import join
from nilearn import plotting
from scipy.spatial.distance import jaccard, dice

In [2]:
nbs_dir = '/Users/katherine/Dropbox/Projects/physics-retrieval/data/output/nbs'

In [3]:
all_retr = pd.read_csv(join(nbs_dir, 'all_students-retr.csv'), index_col=0, header=0, dtype=int)
fml_retr = pd.read_csv(join(nbs_dir, 'female_students-retr.csv'), index_col=0, header=0, dtype=int)
mle_retr = pd.read_csv(join(nbs_dir, 'male_students-retr.csv'), index_col=0, header=0, dtype=int)
lec_retr = pd.read_csv(join(nbs_dir, 'lecture_students-retr.csv'), index_col=0, header=0, dtype=int)
lf_retr = pd.read_csv(join(nbs_dir, 'female_lecture_students-retr.csv'), index_col=0, header=0, dtype=int)
lm_retr = pd.read_csv(join(nbs_dir, 'male_lecture_students-retr.csv'), index_col=0, header=0, dtype=int)
mod_retr = pd.read_csv(join(nbs_dir, 'modeling_students-retr.csv'), index_col=0, header=0, dtype=int)
mf_retr = pd.read_csv(join(nbs_dir, 'female_modeling_students-retr.csv'), index_col=0, header=0, dtype=int)
mm_retr = pd.read_csv(join(nbs_dir, 'male_modeling_students-retr.csv'), index_col=0, header=0, dtype=int)

In [4]:
subject_groups = {'all': all_retr,
                  'female': fml_retr, 
                  'male': mle_retr,
                  'lecture': lec_retr, 
                  'modeling': mod_retr,
                  'female_lecture': lf_retr, 
                  'female_modeling': mf_retr,
                  'male_lecture': lm_retr, 
                  'male_modeling': mm_retr}

In [5]:
dice_df = pd.Series()
jaccard_df = pd.Series()

In [6]:
for group1 in subject_groups.keys():
    for group2 in subject_groups.keys():
        if group1 != group2:
            one = subject_groups[group1]
            two = subject_groups[group2]
            jaccard_df['{0}-{1}'.format(group1, group2)] = jaccard(np.ravel(one.values, order='F'),
                                                                   np.ravel(two.values, order='F'))
            dice_df['{0}-{1}'.format(group1, group2)] = dice(np.ravel(one.values, order='F'), 
                                                                np.ravel(two.values, order='F'))


In [10]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(jaccard_df.sort_values(ascending=False))

male_modeling-female_modeling     0.911729
female_modeling-male_modeling     0.911729
male_lecture-female_modeling      0.902695
female_modeling-male_lecture      0.902695
male-female_modeling              0.898912
female_modeling-male              0.898912
male_modeling-female_lecture      0.895918
female_lecture-male_modeling      0.895918
female_lecture-male               0.893787
male-female_lecture               0.893787
female_lecture-male_lecture       0.892004
male_lecture-female_lecture       0.892004
female_lecture-female_modeling    0.891269
female_modeling-female_lecture    0.891269
female-male_modeling              0.882790
male_modeling-female              0.882790
modeling-female_lecture           0.880488
female_lecture-modeling           0.880488
female_modeling-lecture           0.876215
lecture-female_modeling           0.876215
male_lecture-female               0.861032
female-male_lecture               0.861032
female-male                       0.860630
male-female

In [9]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(dice_df.sort_values(ascending=False))


male_modeling-female_modeling     0.837778
female_modeling-male_modeling     0.837778
male_lecture-female_modeling      0.822648
female_modeling-male_lecture      0.822648
male-female_modeling              0.816386
female_modeling-male              0.816386
male_modeling-female_lecture      0.811460
female_lecture-male_modeling      0.811460
female_lecture-male               0.807970
male-female_lecture               0.807970
female_lecture-male_lecture       0.805060
male_lecture-female_lecture       0.805060
female_lecture-female_modeling    0.803864
female_modeling-female_lecture    0.803864
female-male_modeling              0.790174
male_modeling-female              0.790174
modeling-female_lecture           0.786492
female_lecture-modeling           0.786492
female_modeling-lecture           0.779700
lecture-female_modeling           0.779700
male_lecture-female               0.755975
female-male_lecture               0.755975
female-male                       0.755357
male-female

In [11]:
all_fci = pd.read_csv(join(nbs_dir, 'all_students-fci.csv'), index_col=0, header=0, dtype=int)
fml_fci = pd.read_csv(join(nbs_dir, 'female_students-fci.csv'), index_col=0, header=0, dtype=int)
mle_fci = pd.read_csv(join(nbs_dir, 'male_students-fci.csv'), index_col=0, header=0, dtype=int)
lec_fci = pd.read_csv(join(nbs_dir, 'lecture_students-fci.csv'), index_col=0, header=0, dtype=int)
lf_fci = pd.read_csv(join(nbs_dir, 'female_lecture_students-fci.csv'), index_col=0, header=0, dtype=int)
lm_fci = pd.read_csv(join(nbs_dir, 'male_lecture_students-fci.csv'), index_col=0, header=0, dtype=int)
mod_fci = pd.read_csv(join(nbs_dir, 'modeling_students-fci.csv'), index_col=0, header=0, dtype=int)
mf_fci = pd.read_csv(join(nbs_dir, 'female_modeling_students-fci.csv'), index_col=0, header=0, dtype=int)
mm_fci = pd.read_csv(join(nbs_dir, 'male_modeling_students-fci.csv'), index_col=0, header=0, dtype=int)

In [12]:
subject_groups = {'all': all_fci,
                  'female': fml_fci, 
                  'male': mle_fci,
                  'lecture': lec_fci, 
                  'modeling': mod_fci,
                  'female_lecture': lf_fci, 
                  'female_modeling': mf_fci,
                  'male_lecture': lm_fci, 
                  'male_modeling': mm_fci}

In [13]:
dice_df = pd.Series()
jaccard_df = pd.Series()

In [14]:
for group1 in subject_groups.keys():
    for group2 in subject_groups.keys():
        if group1 != group2:
            one = subject_groups[group1]
            two = subject_groups[group2]
            jaccard_df['{0}-{1}'.format(group1, group2)] = jaccard(np.ravel(one.values, order='F'),
                                                                   np.ravel(two.values, order='F'))
            dice_df['{0}-{1}'.format(group1, group2)] = dice(np.ravel(one.values, order='F'), 
                                                                np.ravel(two.values, order='F'))


In [15]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(jaccard_df.sort_values(ascending=False))

male_modeling-female_modeling     0.892596
female_modeling-male_modeling     0.892596
male_lecture-female_modeling      0.881364
female_modeling-male_lecture      0.881364
female_modeling-female_lecture    0.871838
female_lecture-female_modeling    0.871838
female_modeling-male              0.861599
male-female_modeling              0.861599
male_lecture-male_modeling        0.860974
male_modeling-male_lecture        0.860974
female_lecture-male_modeling      0.855840
male_modeling-female_lecture      0.855840
male_lecture-female_lecture       0.844652
female_lecture-male_lecture       0.844652
female_modeling-lecture           0.844301
lecture-female_modeling           0.844301
male_modeling-female              0.842278
female-male_modeling              0.842278
male_lecture-modeling             0.829131
modeling-male_lecture             0.829131
male_modeling-lecture             0.824794
lecture-male_modeling             0.824794
female_lecture-modeling           0.821092
modeling-fe

In [17]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    display(dice_df.sort_values(ascending=False))

male_modeling-female_modeling     0.806026
female_modeling-male_modeling     0.806026
male_lecture-female_modeling      0.787892
female_modeling-male_lecture      0.787892
female_modeling-female_lecture    0.772796
female_lecture-female_modeling    0.772796
female_modeling-male              0.756850
male-female_modeling              0.756850
male_lecture-male_modeling        0.755886
male_modeling-male_lecture        0.755886
female_lecture-male_modeling      0.748007
male_modeling-female_lecture      0.748007
male_lecture-female_lecture       0.731081
female_lecture-male_lecture       0.731081
female_modeling-lecture           0.730555
lecture-female_modeling           0.730555
male_modeling-female              0.727531
female-male_modeling              0.727531
male_lecture-modeling             0.708134
modeling-male_lecture             0.708134
male_modeling-lecture             0.701830
lecture-male_modeling             0.701830
female_lecture-modeling           0.696485
modeling-fe