In [None]:
import os
import pandas as pd
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 1000)
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.transforms
plt.style.use('fivethirtyeight')

In [None]:
# Import relevant dataframes:

main_table = pd.read_csv(r"C:\Users\Public\_Data\_Data\Latest_Version\Main_Data\Main_Table_10182019.csv")
print(len(main_table))
faculty_table = pd.read_csv(r"C:\Users\Public\_Data\_Data\Latest_Version\Faculty_Data\Faculty_Table_08272019.csv")
print(len(faculty_table))
skill_table = pd.read_csv(r"C:\Users\Public\_Data\_Data\Latest_Version\Skills_Data\Skill_Table_06072019.csv")
print(len(skill_table))

In [None]:
def title_swap(string):
    dictionary = {'Community and Social Services':'Counseling and Religious Life',
                  'Customer and Client Support':'Online Support and University Information',
                  'Hospitality, Food, and Tourism':'Event Management and Hospitality', 
                  'Planning and Analysis':'Analysis', 
                  'Curriculum and Instructional Designer / Developer':'Curriculum and Instructional Designer', 
                  'Special Education Teacher':'Accessibility and Disability Services', 
                  'Teaching Assistant':'Faculty Support', 
                  'Tutor':'Academic Tutor',
                  'Clerical and Administrative':'Administrative',
                  'na' : 'Uncategorized'}
    if(string in dictionary):
        return dictionary[string]
    else:
        return string
    
def val_year(year1, year2):
    if(year1 >= year2):
        if(year1>year2):
            temp = year1
            year1 = year2
            year2 = temp
        else:
            raise ValueError('Years cannot be the same.')
            
    return (year1, year2)

def cat_validate(actual, desired_list):
    return actual in desired_list

In [None]:
skill_table[skill_table['Skill Name'].str.contains('Health', na=False)]['Skill Name'].value_counts()

#  Policy & Research Bucket

In [None]:
main_table.merge(temp, on='Job ID', how='inner')['Occupation'].value_counts()

In [None]:
get_colocated_skills(main_table[main_table['Occupation']=='Policy Analyst'])

# Isolating the skills of Nurse Practitioners

In [None]:
nurse = main_table[main_table['Occupation']=='Nurse Practitioner']

In [None]:
get_colocated_skills(nurse, year=2010)

In [None]:
get_colocated_skills(nurse, year=2017)

# Other Social Skills for Selection 

In [None]:
social_list = ['Social Work', 'Social Studies', 'Social Services', 'Social Services Industry Knowledge']

mask = skill_table[skill_table['Skill Name'].apply(cat_validate, args=(social_list,))][['Job ID']]
mask = mask.drop_duplicates('Job ID')
# print(len(mask))

table = skill_table.merge(mask, on='Job ID', how='inner')['Skill Name']
table['Skill Name'].value_counts()

In [None]:
skill_table[skill_table['Skill Name'].str.contains('Research', na=False)]['Skill Name'].value_counts()

# Begin Breakdowns Section 

In [None]:
social_list = ['Social Work', 'Social Studies', 'Social Services', 'Social Services Industry Knowledge']

social = skill_table[skill_table['Skill Name'].apply(cat_validate, args=(social_list,))][['Job ID']]
social = social.drop_duplicates('Job ID')
print("Number of jobs in Social Skill Bucket:")
print("\t" + str(len(social)))

policy_list = ['Policy Analysis', 'Policy Implementation', 'Policy Research', 'Policy Evaluation',
              'Policy Development', 'Public Policy Development', 'Policy Recommendation', 
               'Policy Establishment', 'Health Care Industry Knowledge', 'Health and Human Services']

policy = skill_table[skill_table['Skill Name'].apply(cat_validate, args=(policy_list,))][['Job ID']]
policy = policy.drop_duplicates('Job ID')
print("Number of jobs in Policy Skill Bucket:")
print("\t" + str(len(policy)))

research_list = ['Research', 'Qualitative Research', 'Quantitative Research', 'SPSS', 'SAS', 'STATA',
                 'Data Collection', 'Data Analysis', 'Data Management', 'Statistical Analysis', 'Regression Analysis']

research = skill_table[skill_table['Skill Name'].apply(cat_validate, args=(research_list,))][['Job ID']]
research = research.drop_duplicates('Job ID')
print("Number of jobs in Research Skill Bucket:")
print("\t" + str(len(research)))

print()

temp = social.merge(policy, on='Job ID', how='inner')
temp = temp.drop_duplicates('Job ID')
print("Number of jobs in Social and Policy:")
print("\t" + str(len(temp)))

other = social.merge(research, on='Job ID', how='inner')
other = other.drop_duplicates('Job ID')
print("Number of jobs in Social and Research:")
print("\t" + str(len(other)))

mask = temp.merge(other, on='Job ID', how='outer')
mask = mask.drop_duplicates('Job ID')

temp = policy.merge(research, on='Job ID', how='inner')
temp = temp.drop_duplicates('Job ID')
print("Number of jobs in Research and Policy:")
print("\t" + str(len(temp)))

mask = mask.merge(temp, on='Job ID', how='outer')
mask = mask.drop_duplicates('Job ID')

print()

# Communication Skills
# Teamwork / Collaboration
# Schedulling
# Project Management
# Supervisory Skills
# Budgeting
# Planning
# Grant Writing
# Data Management
# Quantatative Analysis

# to add another filter: | (skill_table['Skill Name']=='')

# print(len(mask))

# Remove Medical Information
main = main_table[~(main_table['IPEDS Institution Name'].str.contains('Nurs', na=False)|
                  main_table['IPEDS Institution Name'].str.contains('Medical', na=False)|
                  main_table['IPEDS Institution Name'].str.contains('Medicine', na=False)|
                  main_table['IPEDS Institution Name'].str.contains('Pharm', na=False))]

table = main.merge(mask, on='Job ID', how='inner')

# Omit Nurses
table = table[~(table['Occupation'].str.contains('Nurse', na=False)|
                table['Occupation'].str.contains('Attorney', na=False))]

print('Number of total Jobs:\n\t' + str(len(table)))

# Remove na jobs
table = table[~(table['Occupation']=='na')]

# Apply title swap
table['Career Area'] = table['Career Area'].apply(title_swap)
table['Occupation'] = table['Occupation'].apply(title_swap)

table = table[(table['Minimum EDU Requirements']>=18)]

print('Number of Jobs with MA or Above:\n\t' + str(len(table)))

# print('Number of total Jobs:\n\t', len(table))

mask = faculty_table[(faculty_table['Faculty']==0)&(faculty_table['Post-Doctoral']==0)]['Job ID']
table_non = table.merge(mask, on='Job ID', how='inner')
table_fac = table.merge(mask, on='Job ID', how='outer', indicator=True)
table_fac = table_fac[table_fac['_merge']=='left_only'].drop(columns='_merge')
print('Number of non-faculty Jobs:\n\t' + str(len(table_non)))
print('Number of faculty Jobs:\n\t' + str(len(table_fac)))

# Social Services and Social Work Only:
# Number of total Jobs:
# 	81817
# Number of Jobs with MA or Above:
# 	25126
# Number of non-faculty Jobs:
# 	17701

# Adding Policy Analysis, Policy Reserach, Social Studies, Policy Evaluation, and Policy Development
# Number of total Jobs:
# 	81817
# Number of Jobs with MA or Above:
# 	25126
# Number of non-faculty Jobs:
# 	18674

# Newest Isolation Method:
# Number of jobs in Social Skill Bucket:
# 	39044
# Number of jobs in Policy Skill Bucket:
# 	77379
# Number of jobs in Research Skill Bucket:
# 	1153855

# Number of jobs in Social and Policy:
# 	1280
# Number of jobs in Social and Research:
# 	10782
# Number of jobs in Research and Policy:
# 	38691

# 3776562
# Number of total Jobs:
# 	48136
# Number of Jobs with MA or Above:
# 	13452
# Number of non-faculty Jobs:
# 	9463
# Number of faculty Jobs:
# 	3989

In [None]:
pd.DataFrame(table[['Job ID']].merge(skill_table, on='Job ID', how='inner')['Skill Name'].value_counts())

In [None]:
display(table[['Career Area', 'Occupation', 'Job ID']].groupby([
    'Career Area', 'Occupation']).count())

In [None]:
lib = table[table['Occupation']=='Librarian']
display(lib)
get_colocated_skills(lib)

# Social Policy Job Analysis

### The following visualizations are based on the space of jobs identified as being in the "Social Policy" sector based on a set of skills. Here are some examples of that skillspace:
##### Social Work
##### Social Services
##### Policy Analysis
##### Policy Reserach
##### Social Studies
##### Policy Evaluation
##### Policy Development
### All postings that request at least one of these Skills are then used as a the set of postings for further division. We are attemping to describe the Higher Education job space for those with PhDs in social policy or related fields. Therefore, the posting space is also limited to those that require a masters degree or greater.

In [None]:
def get_categorical_sizes(df, category):
    df_size = df['Job ID'].nunique()
    cat = pd.DataFrame(df[category].value_counts()).reset_index().rename(
        columns={category:'count'})
    cat['inc'] = np.true_divide(cat['count'], df_size)
    return cat

def get_colocated_skills(df, year=0):
    if year > 0:
        df = df[df['Year']==year]
        skill_source = skill_table[skill_table['Year']==year]
    else:
        skill_source = skill_table
    
    co_loc = pd.DataFrame(df[['Job ID']].merge(
        skill_source, on='Job ID', how='inner')['Skill Name'].value_counts())
    co_loc.columns = ['count']
    co_loc['inc'] = np.true_divide(co_loc['count'], df['Job ID'].nunique())
    return co_loc.reset_index()

def get_growth(df1, df2, category, cutoff = 30, min_rate=0.025):
    df1 = df1[df1['inc']>=min_rate]
    df2 = df2[df2['inc']>=min_rate]
    final_df = df1.merge(df2, on='index', how='outer', indicator = True)
    final_df = final_df[final_df['count_y']>=cutoff]
    final_df['growth'] = np.true_divide(final_df['inc_y']-final_df['inc_x'], final_df['inc_x'])
    return final_df

def graph_categories(df, category, title, top=10):
    df = df[:top]

    rates = df['inc'].values
    names = df.iloc[:,0].values
    
    ind = np.array([x for x, _ in enumerate(names)])

    plt.bar(ind, rates, label='Incidence Rate')

    plt.xticks(ind, names, rotation = 'vertical')
    plt.xlabel(category + 's')
    plt.ylabel('Incidence Rate of\n' + category + 's in Social Policy')
    plt.legend(loc='upper right')
    plt.title(title)

    display(df)
    plt.show()

def graph_growth(df, year1, year2, category, title, min_rate=0.025, top=10):
    df = df[df['inc_y']>=min_rate]
    df = df[:top]
        
    rates = df['growth'].values
    names = df.iloc[:,0].values

    ind = np.array([x for x, _ in enumerate(names)])

    plt.bar(ind, rates, label='Growth rate')

    plt.xticks(ind, names, rotation = 'vertical')
    plt.xlabel(category + 's')
    plt.ylabel('Growth Rate of\n' + category + ' from ' + str(year1) + ' to ' + str(year2))
    plt.legend(loc='upper right')
    plt.title(title)

    plt.show()
    
def show_skill_details(df, year1, year2, title, name, cutoff=30, min_rate=0.025):
    years = val_year(year1, year2)
    year1 = years[0]
    year2 = years[1]
    
    print(title)
    skill_1 = get_colocated_skills(df, year1)
    skill_2 = get_colocated_skills(df, year2)
    graph_categories(skill_2, 'Skill Name', 'Top 10 Faculty Skills in ' + str(year2))
    grown = get_growth(skill_1, skill_2, 'Skill Name', cutoff=cutoff, min_rate=min_rate)
    grown = grown.sort_values('growth', ascending=False)
    graph_growth(grown, year1, year2, 'Skill Name', 'Fastest Growing ' + title, min_rate=min_rate)
    
    print('Top Added Skills in ' + name + ': ')
    show_added(grown, min_rate=min_rate)
    
def show_added(df, top=10, min_rate = 0.025):
    df = df[df['_merge']=='right_only']
    df = df[['index', 'count_y', 'inc_y']]
    df = df[df['inc_y']>=min_rate]
    df = df[:top]
    display(df)

In [None]:
# Categorical Breakdowns
def occupational_breakdowns(df, category, year1, year2, title, verbose=True):

    years = val_year(year1, year2)
    year1 = years[0]
    yaer2 = years[1]

    df = df[(df['Year']==year1)|(df['Year']==year2)]

    if(verbose):
        print(title)

    skill_2nd = ''

    if (skill_2nd != ''):
        mask = skill_table[skill_table['Skill Name']==skill_2nd][['Job ID']]
        df = df.merge(mask, on='Job ID', how='inner')

    cat_1 = get_categorical_sizes(df[df['Year']==year1], category)

    cat_2 = get_categorical_sizes(df[df['Year']==year2], category)

    if(verbose):
        graph_categories(cat_1, category, title='Top ' + category + 's in ' + str(year1))
        graph_categories(cat_2, category, title='Top ' + category + 's in ' + str(year2))

    growth = get_growth(cat_1, cat_2, category, cutoff=5, min_rate=0.01)
    show_added(growth, min_rate=0.01)
        
    cat = cat_1.merge(cat_2, on='index', how='inner')
    cat = cat[cat['count_y']>=30]
    cat['growth'] = round(np.true_divide(cat['inc_y']-cat['inc_x'], cat['inc_x'])*100, 2)
    cat = cat.sort_values(by='growth', ascending=False)
    cat = cat.reset_index(drop=True)
#     cat = cat.rename(columns={'count_x':'count_'+str(year1), 'inc_x':'inc_'+str(year1),
#                               'count_y':'count_'+str(year2), 'inc_y':'inc_'+str(year2)})
    return cat


In [None]:
cat = get_categorical_sizes(table_non, 'Occupation')
graph_categories(cat, 'Occupation', title='Top 10 Non-Faculty Occupations in All Years')

In [None]:
year1 = 2010
year2 = 2017
# Breakdowns of Faculty Occupational Information
cat = occupational_breakdowns(table_fac, 'Occupation', year1, year2, 
                              'Growth of Faculty Social Policy Jobs from ' +  str(year1) + ' to ' + str(year2))
display(cat)
graph_growth(cat, year1, year2, 'Occupation', 'Growth of Faculty Social Policy Occupations from '
             + str(year1) + ' to ' + str(year2), min_rate = 0.01)

In [None]:
# Breakdowns of Non-Faculty Occupational Information
cat = occupational_breakdowns(table_non, 'Occupation', year1, year2, 
                              'Growth of Non-Faculty Social Policy Jobs from' +  str(year1) + ' to ' + str(year2))
display(cat)
graph_growth(cat, year1, year2, 'Occupation', 'Growth of non-Faculty Social Policy Occupations from '
             + str(year1) + ' to ' + str(year2), min_rate = 0.01)

# Occupations to keep from the Fastest growing slide above:
## Dean of Academic Affairs
## Social / Human Services Manager (no medical)
## Policy Analyst
## Librarian
## Administrative Manager
## General Manager

In [None]:
year1 = 2010
year2 = 2017

show_skill_details(table_fac, year1, year2, 
                   'Co-Located Faculty Skills in ' + str(year1) + ' and ' + str(year2), 'Faculty')

show_skill_details(table_non, year1, year2,
                   'Co-Located Non-Faculty Skills in ' + str(year1) + ' and ' + str(year2), 'Non-Faculty')


In [None]:
# Break out Occupations of particular interest in the non_faculty sector

# Removed:
#     (table_non['Occupation']=='Librarian')|
#     (table_non['Occupation']=='Research Associate')|
#     (table_non['Occupation']=='Research Scientist')|
#     (table_non['Occupation']=='Human Reseources Specialist')|
#     (table_non['Occupation']=='Social Science Researcher')|
    
occ_list = ['Administrator, Higher Education',
            'Dean of Academic Division',
            'Director of Student Affairs', 
            'Dean of Academic Affairs',
            'Program Director, Higher Education',
            'Administrative Manager', 
            'Dean of Students',
            'General Manager',
            'Policy Analyst',
            'Education Program Manager',
            'Academic / Guidance Counselor',
            'Associate Dean, Higher Education', 
            'Program Manager (General)',
            'Curriculum and Instructional Designer',
            'Admissions Director', 
            'Compliance Manager',
            'Fundraising Manager',
            'Student Affairs Administrator', 
            'Grants Manager / Administrator',
            'Social / Human Services Manager', 
            'Librarian']

"""
Administrator, Higher Education
Assocaite Dean, Higher Education
Dean of Academic Division
Program Director, Higher Education
Education Program Manager
Director of Student Affairs
Administrative Manager
"""

occ_break = table_non[table_non['Occupation'].apply(cat_validate, args=(occ_list, ))]

In [None]:
# Special Bucket of 'Administrative/Academic' type jobs
year1 = 2010
year2 = 2017

category = 'Occupation'
cat = get_categorical_sizes(occ_break, category)
graph_categories(cat, category, title='Top ' + category + 's in Special Bucket')

growth = occupational_breakdowns(occ_break, category, year1, year2, '', verbose=False)
graph_growth(growth, year1, year2, 'Occupation', 
             'Growth of Special Bucket Occupations from ' + str(year1) + ' to ' + str(year2))

category = 'Skill Name'
co_loc = get_colocated_skills(occ_break)
graph_categories(co_loc, category, title='Top ' + category + 's in Special Bucket')

show_skill_details(occ_break, year1, year2, 
                   'Skills in Special Bucket from ' + str(year1) + ' to ' + str(year2), 'Special Bucket')


schools = occ_break['IPEDS Institution Name'].nunique()
total = main_table['IPEDS Institution Name'].nunique()
print('Selected Non-Faculty Occupations appear in ' + str(schools) + 
      ' different institutions across the sample.\n')
print('This represents ' + str(round((schools/total)*100, 2))
      + '% of the institutions within the dataset.\n')
print('Most of the postings come from 4-Year institutions with many coming from R1 Institutions.')

display(occ_break[['R1', 'Public', 'Private', '4-year', '2-year', 'Job ID']].groupby(['R1', 'Public', 'Private', '4-year', '2-year']).count())


In [None]:
# Identifying the Occupations within the Special Bucket with the highest rate of entry level positions
def find_entry_level(df, entry_level=1, cutoff=10):

    exp = pd.DataFrame(df['Occupation'].value_counts()).reset_index().rename(columns={'Occupation':'count'})
    exp['Number Reporting Experience'] = float('nan')
    exp['Number at Entry Level'] = float('nan')

    for occ in exp['index']:
        num_w = df[(df['Occupation']==occ)&(df['Minimum Experience Requirements']>0)]['Job ID'].nunique()
        exp['Number Reporting Experience'] = exp['Number Reporting Experience'].where(exp['index']!=occ, other=num_w)

        num = df[(df['Occupation']==occ)&((df['Minimum Experience Requirements']>0)&(df['Minimum Experience Requirements']<=entry_level))]['Job ID'].nunique()
        exp['Number at Entry Level'] = exp['Number at Entry Level'].where(exp['index']!=occ, other=num)

    exp = exp[exp['Number Reporting Experience']>=cutoff]
    exp['Rate of Entry Level'] = np.true_divide(exp['Number at Entry Level'], exp['Number Reporting Experience'])
    exp = exp.sort_values('Rate of Entry Level', ascending=False).reset_index(drop=True)
    return exp
    
exp = find_entry_level(occ_break)
display(exp.sort_values('count', ascending=False).reset_index(drop=True))

In [None]:
Potential Entry Level Postions:
    Policy analyst
                                14.7%
    Education Program Manager
                                7.41%
    Administrative manager
                                3.14%
    General manager
                                0.00%
    Director of student affairs
                                6.13%
    
Actual Top 5 Entry Levels:
    Policy analyst
                                14.7%
    Curriculum and Instructional Designer
                                11.8%
    Compliance Manager
                                11.7%
    Student Affairs Administrator
                                7.69%
    Education Program Manager
                                7.41%


In [None]:
# Division of Special Bucket with a Focus on "Entry Level" type positions
occ_list =['Librarian']
# occ_list = ['Policy Analyst', 'Education Program Manager', 'Administrative Manager',
#             'General Manager', 'Director of Student Affairs']
# occ_list = list(exp['index'][:12].values)

df = table_non[table_non['Occupation'].apply(cat_validate, args=(occ_list,))]

for occ in occ_list:
    entry = df[df['Occupation']==occ]
    num = (entry['Job ID'].nunique())
    print('#########################################')
    print(occ)
    print('\tNumber of Jobs: \t' + str(num))
    entry_w = entry[entry['Minimum Experience Requirements']>0]
    num_exp = entry_w['Job ID'].nunique()
    print('\tNumber of Jobs w/ Exp:\t' + str(num_exp))
    print('\tNumber of Jobs w/o Exp:\t' + str(num - num_exp) + '\n')
    
    print('Breakdown based on Requested Years of Experience:', end='')
    levels = pd.DataFrame(entry_w[['Job ID', 'Minimum Experience Requirements']].groupby(
        ['Minimum Experience Requirements']).count()).rename(columns={'Job ID':'count'})
    levels['inc'] = np.true_divide(levels['count'], num_exp)
    display(levels)
    
    print('Top 10 Job Titles:')
    display(pd.DataFrame(entry['Job Title'].value_counts())[:10])
    
#     print('Top 10 Job Titles with Experience Information:')
#     display(pd.DataFrame(entry_w['Job Title'].value_counts())[:10])
    
    print('Top 10 Job Titles for Entry Level Poisitions: (Experience Requested <= 1 Year)')
    display(pd.DataFrame(entry_w[entry_w['Minimum Experience Requirements']<=3][
        'Job Title'].value_counts())[:10])
    print()
    
    

In [None]:
# Fastest Biggest Added in Experiemntal Bucket:

year1 = 2010
year2 = 2017
category = 'Skill Name'

co_loc = get_colocated_skills(df)
graph_categories(co_loc, category, title='Top ' + category + 's in Experiemntal Bucket')

show_skill_details(df, year1, year2, 
                   'Skills in Special Bucket from ' + str(year1) + ' to ' + str(year2),
                   'Experiemntal Bucket', cutoff=10, min_rate=0.01)

In [None]:
# Entry level positions in
find_entry_level(table_non)


In [None]:
# Exploration of Social / Human Services Manager
occ = 'Social / Human Services Manager'
shsm = occ_break[occ_break['Occupation']==occ]

num = shsm['Job Title'].nunique()
print('Number of Jobs ' + str(num))

display(pd.DataFrame(shsm['IPEDS Institution Name'].value_counts()[:10]))

health = shsm[shsm['IPEDS Institution Name'].str.contains('Nursing', na=False)|shsm['IPEDS Institution Name'].str.contains('Medical', na=False)]
non_health = shsm.drop(health.index)

display(get_colocated_skills(health)[:10])
display(get_colocated_skills(non_health)[:10])

In [None]:
# Clarification and Exploration

In [None]:
table = occ_break.merge(skill_table[['Job ID', 'Skill Name']], on='Job ID', how='inner')
table[table['Skill Name']=='Positive Disposition']['Occupation'].value_counts()


In [None]:
teach = skill_table[skill_table['Skill Name']=='Teaching'][['Job ID']].merge(occ_break, on='Job ID', how='inner')
teach

In [None]:
main_table['IPEDS Institution Name'].nunique()

In [None]:
social = table[table['Occupation']=='Social / Human Services Manager']
social

In [None]:
s_skill = social[['Job ID']].merge(skill_table, on='Job ID', how='inner')
s_skill['Skill Name'].value_counts()

In [None]:
deans = table[table['Occupation']=='Dean of Academic Affairs']
display(deans['IPEDS Institution Name'].value_counts())


In [None]:
deans['Metropolitan Statistical Area'].value_counts()

In [None]:
deans['BEA_Zone'].value_counts()

In [None]:
deans['Job Title'].value_counts()

In [None]:
main_table[['Job ID', 'IPEDS Institution Name', 'State']].groupby(['IPEDS Institution Name', 'State']).count()

In [None]:
table = main_table[(main_table['Year']==2017)&(main_table['4-year']==1)]
fac = faculty_table[(faculty_table['Year']==2017)]

table = table.merge(fac, on='Job ID', how='inner')

In [None]:
total = len(table)
print(len(table[(table['Faculty']==0)&(table['Post-Doctoral']==0)]))
print(len(table[(table['Faculty']==1)]))
medicine = table[((table['FS_Physical_sciences_and_earth_sciences']==1)|(table['Agricultural sciences and natural resources']==1))&(table['Faculty']==1)]
print(len(medicine))
science = table[(table['Career Area']=='Science and Research')&((table['Faculty']==0)&table['Post-Doctoral']==0)]
print(len(science))

In [None]:
table = main_table[(main_table['Year']==2016)&(main_table['R1']==1)]
fac = faculty_table[(faculty_table['Year']==2016)&(faculty_table['Faculty']==1)]

table = table.merge(fac, on='Job ID', how='inner')

In [None]:
fac = table[table['Faculty']==1]
print(len(fac))
ten = fac[fac['Tenured']==1]
print(len(ten))
track = fac[fac['Tenured_Track']==1]
print(len(track))
line = fac[fac['Tenure_Line']==1]
print(len(line))

In [None]:
fac.groupby(['Tenure_Line', 'Tenured', 'Tenured_Track']).count()

In [None]:
faculty_table.columns