# Create bar charts showing the percentage of people who volunteered to participate in the network from each role

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
# I removed duplicates by hand in Excel
namelist = pd.read_csv('cleaned_data/namelist_unique.csv')
# fix the roles to match our survey
namelist.loc[namelist['Role'] == 'Postdocs', 'Role'] = 'Postdoc'
namelist.loc[namelist['Role'] == 'Graduate Students', 'Role'] = 'Graduate student'
namelist.loc[namelist['Role'] == 'Undergraduate', 'Role'] = 'Undergraduate student / Post-Bac'

namelist

In [None]:
mentors = pd.read_csv('cleaned_data/mentors.csv')
mentees = pd.read_csv('cleaned_data/mentees.csv')

In [None]:
def createFigure(data, title = ''):
    f,ax = plt.subplots(figsize=(5,4))

    # sort the data (nice aspect of pandas dataFrames)
    data.sort_values('pct', inplace=True)

    ind = np.arange(len(data))  # the x locations for the bars
    width = 0.75 # the width of the bars
    rects = ax.barh(ind, data['pct'], width, zorder=2)

    # add some text for labels, title and axes ticks
    ax.set_title(title, fontsize = 24)
    ax.set_yticks(ind)
    ax.set_yticklabels(data['label'], fontsize = 16)

    # remove all the axes, ticks and lower x label
    aoff = ['right', 'left', 'top', 'bottom']
    for x in aoff:
        ax.spines[x].set_visible(False)
    ax.tick_params(length=0)
    _ = ax.set_xticklabels([' ']*len(data))

    for i, r in enumerate(rects):
        h = r.get_height()
        w = r.get_width()
        y = r.get_y()
        x = w + 1
        y += 0.4
        r.set_color('gray')
        text = f'{data["pct"].iloc[i]:.1f}% ({data["n"].iloc[i]:d}/{data["total"].iloc[i]:d})'
        ax.text(x, y ,text , ha='left', va='center', zorder = 3, fontsize = 16) 
        
    return f,ax

In [None]:
unique_roles = namelist['Role'].unique()
unique_roles

In [None]:
# mentors

pct_volunteered = np.zeros_like(unique_roles)
n_volunteered = np.zeros_like(unique_roles)
tot_volunteered = np.zeros_like(unique_roles)
for i,r in enumerate(unique_roles):
    tot_volunteered[i] = len(namelist.loc[namelist['Role'] == r])
    n_volunteered[i] = len(mentors.loc[mentors['Role'] == r])
    pct_volunteered[i] = n_volunteered[i]/tot_volunteered[i]*100.
    # print(f'{r} : total = {tot}, n_volunteered = {n}, percentage = {pct_volunteered[i]:.1f}')


data = pd.DataFrame({'label':unique_roles,'pct':pct_volunteered, 'n':n_volunteered, 'total':tot_volunteered})
print(data)

f, ax = createFigure(data, title = 'Percent Participation as Mentors')
f.savefig('fall2022_mentor_participation.png', bbox_inches = 'tight')

In [None]:
# mentees

pct_volunteered = np.zeros_like(unique_roles)
n_volunteered = np.zeros_like(unique_roles)
tot_volunteered = np.zeros_like(unique_roles)
for i,r in enumerate(unique_roles):
    tot_volunteered[i] = len(namelist.loc[namelist['Role'] == r])
    n_volunteered[i] = len(mentees.loc[mentees['Role'] == r])
    pct_volunteered[i] = n_volunteered[i]/tot_volunteered[i]*100.
    # print(f'{r} : total = {tot}, n_volunteered = {n}, percentage = {pct_volunteered[i]:.1f}')


data = pd.DataFrame({'label':unique_roles,'pct':pct_volunteered, 'n':n_volunteered, 'total':tot_volunteered})
print(data)

f, ax = createFigure(data, title = 'Percent Participation as Mentees')
f.savefig('fall2022_mentee_participation.png', bbox_inches = 'tight')