## Analyze departure rate first post-election quarters

This notebook loads federal payroll data, and uses that data to estimate the departure rates during the past three presidents' first post-election fiscal quarters — overall, by pay plan, by agency, by occupational category, by occupation, and by education.

In [1]:
import pandas as pd
from collections import OrderedDict
import sys
import os

## Load data

In [2]:
fwf_columns = OrderedDict([
    ('Pseudo ID', (0, 9)),
    ('Name', (9, 32)),
    ('File Date', (32, 40)),
    ('SubAgency', (40, 44)),
    ('Duty Station', (44, 53)),
    ('Age Range', (53, 59)),
    ('Education Level', (59, 61)),
    ('Pay Plan', (61, 63)),
    ('Grade', (63, 65)),
    ('LOS Level', (65, 71)),
    ('Occupation', (71, 75)),
    ('PATCO', (75, 76)),
    ('Adjusted Basic Pay', (76, 82)),
    ('Supervisory Status', (82, 83)),
    ('TOA', (83, 85)),
    ('Work Schedule', (85, 86)),
    ('NSFTP Indicator', (86, 87))
])

In [3]:
BASE_DATA_PATH = "../inputs/fedscope-data/"

In [4]:
def process_fwf(path):
    slices = fwf_columns.values()
    with open(BASE_DATA_PATH + path) as f:
        rows = []
        for line in f:
            row = [ line[start:end].strip() for start, end in slices ]
            rows.append(row)
        return pd.DataFrame(rows, columns=list(fwf_columns.keys()))\
            .drop_duplicates(subset=["Pseudo ID"])

In [5]:
employees = pd.concat([
    process_fwf("1973-09-to-2014-06/non-dod/status/Status_Non_DoD_2000_12.txt")\
        .assign(**{"President Elect": "Bush"}),
    
    process_fwf("1973-09-to-2014-06/non-dod/status/Status_Non_DoD_2001_03.txt")\
        .assign(**{"President Elect": "Bush"}),
    
    process_fwf("1973-09-to-2014-06/non-dod/status/Status_Non_DoD_2008_12.txt")\
        .assign(**{"President Elect": "Obama"}),
    
    process_fwf("1973-09-to-2014-06/non-dod/status/Status_Non_DoD_2009_03.txt")\
        .assign(**{"President Elect": "Obama"}),
    
    pd.read_csv(
        BASE_DATA_PATH + "2016-12-to-2017-03/non-dod/status/Non-DoD FOIA 2017-04762 201612.txt",
        sep=";",
        low_memory=False
    ).assign(**{
        "President Elect": "Trump",
        "Name": lambda x: x[[ "Last Name", "First Name" ]].fillna("").apply(",".join, axis=1)
    }),
    
    pd.read_csv(
        BASE_DATA_PATH + "2016-12-to-2017-03/non-dod/status/Non-DoD FOIA 2017-04762 201703.txt",
        sep=";",
        low_memory=False
    ).assign(**{
        "President Elect": "Trump",
        "Name": lambda x: x[[ "Last Name", "First Name" ]].fillna("").apply(",".join, axis=1)
    }),
], axis=0)

In [6]:
employees["File Date"] = employees["File Date"].astype(str).str.slice(0, 6).astype("category")

In [7]:
CODED_COLS = [
    "SubAgency",
    "Work Schedule",
    "Education Level",
    "Pay Plan",
    "Grade",
    "NSFTP Indicator",
    "Supervisory Status",
    "TOA"
]

In [8]:
for col in CODED_COLS:
    employees[col] = employees[col].apply(lambda x: x.split("-")[0])

In [9]:
cateogory_cols = [
    "File Date",
    "President Elect",
    "SubAgency",
    "Age Range",
    "Education Level",
    "Pay Plan",
    "Grade",
    "LOS Level",
    "Occupation",
    "PATCO",
    "Supervisory Status",
    "TOA",
    "Work Schedule",
    "NSFTP Indicator",
]

In [10]:
for col in employees.columns:
    if col in cateogory_cols:
        employees[col] = employees[col].astype("category")

In [11]:
employees.dtypes

Adjusted Basic Pay      object
Age Range             category
Agency                  object
Duty Station            object
Education Level       category
File Date             category
First Name              object
Grade                 category
LOS Level             category
Last Name               object
NSFTP Indicator       category
Name                    object
Occupation            category
PATCO                 category
Pay Plan              category
President Elect       category
Pseudo ID               object
State                   object
SubAgency             category
Supervisory Status    category
TOA                   category
Work Schedule         category
YSD Range               object
dtype: object

### Derive name-is-provided, agency, and education columns

In [12]:
employees["Name Is Provided"] = ~employees["Name"].str.contains("NAME WITHHELD|NAME UNKNOWN", na=True)

In [13]:
employees[
    employees["Name Is Provided"] == True
]["Name"].value_counts().head(10)

SMITH,MICHAEL       298
JOHNSON,MICHAEL     275
SMITH,DAVID         273
SMITH,JAMES         268
SMITH,ROBERT        220
BROWN,MICHAEL       212
WILLIAMS,MICHAEL    212
JOHNSON,DAVID       202
JOHNSON,JAMES       186
WILLIAMS,JAMES      184
Name: Name, dtype: int64

In [14]:
employees["Agency"] = employees["SubAgency"].str.slice(0, 2).astype("category")

In [15]:
def categorize_education(ed_code):
    if "*" in ed_code: return None
    if ed_code < "07": return "00-No college"
    if ed_code < "13": return "01-Some college"
    if ed_code == "13": return "02-Bachelor's degree only"
    if ed_code < "21": return "03-Post-bachelor's degree"
    if ed_code >= "21": return "04-Doctorate"
    raise Exception("Don't recognize educational code")

In [16]:
employees["Education Category"] = employees["Education Level"]\
    .apply(categorize_education).astype("category")

In [17]:
employees["Has College Degree"] = employees["Education Category"].astype(str) >= "02-Bachelor's degree only"

## Flag employees with unique names

In [18]:
def determine_uniqueness(names):
    counts = names.value_counts()
    df = pd.DataFrame({
        "Name Is Unique": counts == 1
    })
    df.index.name = "Name"
    return df

In [19]:
def append_uniqueness(df):
    quarterly_uniqueness = df\
        .groupby("File Date")["Name"].apply(determine_uniqueness)\
        .reset_index()
    return pd.merge(
        df,
        quarterly_uniqueness,
        on=[ "File Date", "Name" ],
        how="left",
    )

## Select only named, non-seasonal, full-time, permanent employees

In [20]:
employees[
    employees["NSFTP Indicator"] == "1"
].groupby([
    "Name Is Provided",
    "File Date",
]).size().unstack().T.assign(total=lambda x: x.sum(axis=1))

Name Is Provided,False,True,total
File Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
200012,330904,582843,913747
200103,331130,583408,914538
200812,421907,651334,1073241
200903,424616,656482,1081098
201612,298255,900370,1198625
201703,297526,900955,1198481


In [21]:
named_nsftp_employees = employees[
    (employees["NSFTP Indicator"] == "1") &
    (employees["Name Is Provided"] == True)
].pipe(append_uniqueness)

### Approx. 25% of recent names are ambiguous in recent data

Before mid-2014, middle initials were often included in the data, resulting in a smaller proportion of ambiguous names. The shift happened toward the end of the Obama administration, and so isn't reflected in the rates below for the quarters after Obama's (2008) or Bush's (2000) initial elections.

In [22]:
named_nsftp_employees\
    .groupby("President Elect")["Name Is Unique"].mean()

President Elect
Bush     0.952351
Obama    0.951326
Trump    0.753645
Name: Name Is Unique, dtype: float64

## Compare pre-inauguration December to post-inauguration March employees

In [23]:
def append_whether_found(q1, q2):
    q1_copy = q1.copy()

    # Determine if name/Pseudo ID appears in the March data
    q1_copy["Name Found Next Quarter"] = (
        q1_copy["Name"].isin(q2["Name"].unique())
    ).astype(int)
    
    q1_copy["Pseudo ID Found Next Quarter"] = (
        q1_copy["Pseudo ID"].isin(q2["Pseudo ID"].unique())
    ).astype(int)
    
    q1_copy.loc[
        q1_copy["Pseudo ID"].isnull(),
        "Pseudo ID Found Next Quarter"
    ] = None

    return q1_copy

In [24]:
PREVIEW_COLUMNS = [
    "SubAgency",
    "Grade",
    "Name Is Unique",
    "Name Found Next Quarter",
    "Pseudo ID Found Next Quarter"
]

In [25]:
trump_findings = append_whether_found(
    named_nsftp_employees[
        named_nsftp_employees["File Date"] == "201612"
    ], named_nsftp_employees[
        named_nsftp_employees["File Date"] == "201703"
    ]
)
trump_findings.head()[PREVIEW_COLUMNS]

Unnamed: 0,SubAgency,Grade,Name Is Unique,Name Found Next Quarter,Pseudo ID Found Next Quarter
2474067,VALA,5,True,1,
2474068,VALA,9,True,1,
2474069,VALA,11,False,1,
2474070,VALA,13,True,1,
2474071,VALA,9,True,1,


In [26]:
obama_findings = append_whether_found(
    named_nsftp_employees[
        named_nsftp_employees["File Date"] == "200812"
    ], named_nsftp_employees[
        named_nsftp_employees["File Date"] == "200903"
    ]
)
obama_findings.head()[PREVIEW_COLUMNS]

Unnamed: 0,SubAgency,Grade,Name Is Unique,Name Found Next Quarter,Pseudo ID Found Next Quarter
1166251,AB00,15,True,1,1.0
1166252,AB00,12,True,1,1.0
1166253,AB00,9,True,1,1.0
1166254,AB00,14,True,1,1.0
1166255,AB00,12,True,1,1.0


In [27]:
bush_findings = append_whether_found(
    named_nsftp_employees[
        named_nsftp_employees["File Date"] == "200012"
    ], named_nsftp_employees[
        named_nsftp_employees["File Date"] == "200103"
    ]
)
bush_findings.head()[PREVIEW_COLUMNS]

Unnamed: 0,SubAgency,Grade,Name Is Unique,Name Found Next Quarter,Pseudo ID Found Next Quarter
0,AB00,13,True,1,1.0
1,AB00,11,True,1,1.0
2,AB00,12,True,1,1.0
3,AB00,11,True,1,1.0
4,AB00,11,True,1,1.0


In [28]:
all_findings = pd.concat([
    bush_findings,
    obama_findings,
    trump_findings
], axis=0)

In [29]:
print("{:,d}".format(len(all_findings)))

2,134,547


## Select only employees with unique names that quarter, and who weren't political appointees

... where political appointees are identified as those with a "type of appointment" (TOA) of "55-Senior Executive Service - Non-Career". (More than 90% of political appointees departed during the timeframes in question.)

In [30]:
1 - all_findings[
    (all_findings["Name Is Unique"] == True)
].assign(is_political_appointee=lambda x: x["TOA"] == "55")\
    .groupby("is_political_appointee")["Name Found Next Quarter"].mean()

is_political_appointee
False    0.019367
True     0.909353
Name: Name Found Next Quarter, dtype: float64

In [31]:
unique_name_findings = all_findings[
    (all_findings["Name Is Unique"] == True) &
    (all_findings["TOA"] != "55")
]

In [32]:
unique_name_findings["President Elect"].value_counts().sort_index()

Bush     554607
Obama    619198
Trump    677676
Name: President Elect, dtype: int64

In [33]:
def classify_employees(df):
    counts = df.groupby("Name Found Next Quarter")\
        .size()\
        .rename(index={
            1: "found",
            0: "not_found",
        }).loc[["found", "not_found"]].fillna(0)
    counts["sample_size"] = counts[[ "found", "not_found" ]].sum()
    counts["prop_missing"] = counts["not_found"] / counts["sample_size"]
    return counts

In [34]:
def save(df, *args, **kwargs):
    df.to_csv(*args, **kwargs)
    return df

In [35]:
def summarize_prop_missing(df):
    if type(df.index) == pd.MultiIndex:
        comparison = df["prop_missing"].unstack()
        comparison.columns = comparison.columns.astype(str)
        return comparison
    else:
        return df["prop_missing"].to_frame("prop_missing")

# Estimate turnover rates

### Overall, by president elect

In [36]:
turnover_rate_overall = unique_name_findings.groupby([
    "President Elect",
]).apply(classify_employees)
turnover_rate_overall

Name Found Next Quarter,found,not_found,sample_size,prop_missing
President Elect,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bush,544870.0,9737.0,554607.0,0.017557
Obama,608003.0,11195.0,619198.0,0.01808
Trump,662750.0,14926.0,677676.0,0.022025


In [37]:
turnover_rate_overall\
    .pipe(summarize_prop_missing)\
    .pipe(save, "../outputs/turnover-overall.csv")

Unnamed: 0_level_0,prop_missing
President Elect,Unnamed: 1_level_1
Bush,0.017557
Obama,0.01808
Trump,0.022025


### By pay plan

In [38]:
pay_plans = pd.read_csv("../inputs/plan-codes.csv", sep="|").set_index("plan_code")["plan_name"]
pay_plans.head()

plan_code
AA    ADMINISTRATIVE APPEALS JUDGES           
AC    GAO ADMINISTRATIVE POSITIONS            
AD    ADMINISTRATIVELY DETERMINED RATES, NOT E
AE    SMITHSONIAN UNCLASSIFIED NON-FEDERAL PO 
AF    AMERICAN FAMILY MEMBERS, PL 96-465, SEC 
Name: plan_name, dtype: object

In [39]:
turnover_by_pay_plan = unique_name_findings.groupby([
    "Pay Plan",
    "President Elect",
]).apply(classify_employees)    \
    .unstack()\
    .pipe(lambda x: x[x[("sample_size", "Trump")] >= 3000])\
    .stack()
turnover_by_pay_plan

Unnamed: 0_level_0,Name Found Next Quarter,found,not_found,sample_size,prop_missing
Pay Plan,President Elect,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AT,Bush,16925.0,137.0,17062.0,0.00803
AT,Obama,16313.0,268.0,16581.0,0.016163
AT,Trump,12801.0,215.0,13016.0,0.016518
CG,Bush,4594.0,85.0,4679.0,0.018166
CG,Obama,3098.0,64.0,3162.0,0.02024
CG,Trump,3421.0,72.0,3493.0,0.020613
ES,Bush,3440.0,115.0,3555.0,0.032349
ES,Obama,3511.0,90.0,3601.0,0.024993
ES,Trump,3818.0,120.0,3938.0,0.030472
FG,Bush,9057.0,145.0,9202.0,0.015757


In [40]:
turnover_by_pay_plan\
    .pipe(summarize_prop_missing)\
    .sort_values("Trump", ascending=False)\
    .join(pay_plans)\
    .pipe(save, "../outputs/turnover-by-pay-plan.csv")

Unnamed: 0_level_0,Bush,Obama,Trump,plan_name
Pay Plan,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
WG,0.018193,0.024461,0.032761,NONSUPERVISORY PAY SCHEDULES - FEDERAL W
ES,0.032349,0.024993,0.030472,SENIOR EXECUTIVE SERVICE
GL,,0.017179,0.025912,GS EMP GRADES 3-10 PAID LEO SPECIAL RATE
VN,0.019862,0.017947,0.023505,NURSING
VM,0.015921,0.017307,0.022975,MEDICAL AND DENTAL
WS,0.014561,0.021366,0.022514,SUPERVISORY PAY SCHEDULES - FEDERAL WAGE
SV,,0.007063,0.021846,TRANS SECURITY ADMIN EMP OTHER THAN EXEC
GS,0.016975,0.017605,0.021556,GENERAL SCHEDULE
CG,0.018166,0.02024,0.020613,CORPORATE GRADED
FG,0.015757,0.013315,0.016749,SIMILAR TO THE GENERAL SCHEDULE


### By (large) agency

In [41]:
agency_names = pd.read_csv("../inputs/DTagy.txt")\
    .rename(columns={"AGYT": "Agency Name" })\
    .groupby("AGY")["Agency Name"].first()
agency_names.head()

AGY
AA    AA-ADMINISTRATIVE CONFERENCE OF THE UNITED STATES
AB              AB-AMERICAN BATTLE MONUMENTS COMMISSION
AF                       AF-DEPARTMENT OF THE AIR FORCE
AG                         AG-DEPARTMENT OF AGRICULTURE
AH    AH-NATIONAL FOUNDATION ON THE ARTS AND THE HUM...
Name: Agency Name, dtype: object

In [42]:
unique_name_findings.groupby([
    "Agency",
    "President Elect",
]).apply(classify_employees)\
    .unstack()\
    .pipe(lambda x: x[x[("sample_size", "Trump")] >= 5000])\
    .stack()\
    .pipe(summarize_prop_missing)\
    .join(agency_names)\
    .sort_values("Trump", ascending=False)\
    .pipe(save, "../outputs/turnover-by-agency.csv")

Unnamed: 0_level_0,Bush,Obama,Trump,Agency Name
Agency,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
DN,0.013306,0.018637,0.02721,DN-DEPARTMENT OF ENERGY
DJ,0.015755,0.014509,0.026065,DJ-DEPARTMENT OF JUSTICE
IN,0.016039,0.021097,0.0257,IN-DEPARTMENT OF THE INTERIOR
ST,0.02131,0.013694,0.024631,ST-DEPARTMENT OF STATE
HU,0.011835,0.017549,0.024487,HU-DEPARTMENT OF HOUSING AND URBAN DEVELOPMENT
HS,,0.013504,0.023925,HS-DEPARTMENT OF HOMELAND SECURITY
VA,0.019486,0.018274,0.023474,VA-DEPARTMENT OF VETERANS AFFAIRS
AG,0.016602,0.022509,0.02319,AG-DEPARTMENT OF AGRICULTURE
DL,0.019744,0.018868,0.022321,DL-DEPARTMENT OF LABOR
NN,0.018977,0.017532,0.02145,NN-NATIONAL AERONAUTICS AND SPACE ADMINISTRATION


### By PATCOB occupation classification

In [43]:
turnover_rate_by_patco = unique_name_findings.groupby([
    "PATCO",
    "President Elect",
]).apply(classify_employees)
turnover_rate_by_patco

Unnamed: 0_level_0,Name Found Next Quarter,found,not_found,sample_size,prop_missing
PATCO,President Elect,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
*,Bush,70.0,2.0,72.0,0.027778
*,Obama,106.0,2.0,108.0,0.018519
*,Trump,93.0,3.0,96.0,0.03125
A,Bush,186494.0,2870.0,189364.0,0.015156
A,Obama,217929.0,3776.0,221705.0,0.017032
A,Trump,246245.0,5098.0,251343.0,0.020283
B,Bush,38276.0,695.0,38971.0,0.017834
B,Obama,36358.0,907.0,37265.0,0.024339
B,Trump,32393.0,1030.0,33423.0,0.030817
C,Bush,39559.0,823.0,40382.0,0.02038


In [44]:
turnover_rate_by_patco\
    .pipe(summarize_prop_missing).loc[list("PATCB")]\
    .pipe(save, "../outputs/turnover-by-patco.csv")

President Elect,Bush,Obama,Trump
PATCO,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
P,0.017257,0.015757,0.019628
A,0.015156,0.017032,0.020283
T,0.018052,0.018945,0.024461
C,0.02038,0.019714,0.029211
B,0.017834,0.024339,0.030817


### By occupationn, among "professional" employees

In [45]:
occupation_names = pd.read_csv("../inputs/DTocc.txt")\
    .rename(columns={"OCCT": "Occupation Name" })\
    .set_index("OCC")["Occupation Name"]
occupation_names.head()

OCC
0006      0006-CORRECTIONAL INSTITUTION ADMINISTRATION
0007                         0007-CORRECTIONAL OFFICER
0017                            0017-EXPLOSIVES SAFETY
0018    0018-SAFETY AND OCCUPATIONAL HEALTH MANAGEMENT
0019                            0019-SAFETY TECHNICIAN
Name: Occupation Name, dtype: object

In [46]:
unique_name_findings[
    unique_name_findings["PATCO"] == "P"
].pipe(lambda x: x.groupby([
    "Occupation",
    "President Elect",
]).apply(classify_employees)).unstack()\
    .pipe(lambda x: x[x[("sample_size", "Trump")] >= 5000])\
    .stack()\
    .pipe(summarize_prop_missing)\
    .join(occupation_names)\
    .sort_values("Trump", ascending=False)\
    .pipe(save, "../outputs/turnover-professionals-by-occupation.csv")

Unnamed: 0_level_0,Bush,Obama,Trump,Occupation Name
Occupation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
610,0.021016,0.018472,0.023743,0610-NURSE
602,0.01892,0.017948,0.022806,0602-MEDICAL OFFICER
801,0.013557,0.012457,0.021805,0801-GENERAL ENGINEERING
1102,0.01826,0.016176,0.020982,1102-CONTRACTING
905,0.019886,0.012693,0.020264,0905-GENERAL ATTORNEY
101,0.018032,0.016361,0.01973,0101-SOCIAL SCIENCE
185,0.016812,0.013959,0.018444,0185-SOCIAL WORK
601,0.023667,0.011406,0.018184,0601-GENERAL HEALTH SCIENCE
401,0.015156,0.017075,0.015739,0401-GENERAL NATURAL RESOURCES MANAGEMENT AND ...
510,0.014094,0.012831,0.014928,0510-ACCOUNTING


In [47]:
unique_name_findings[
    unique_name_findings["Occupation"] == "0905"
].pipe(lambda x: x.groupby([
    "Agency",
    "President Elect",
]).apply(classify_employees))\
    .unstack()\
    .pipe(lambda x: x[x[("sample_size", "Trump")] >= 1000])\
    .stack()\
        .pipe(summarize_prop_missing)\
    .join(agency_names)\
    .sort_values("Trump", ascending=False)\
    .pipe(save, "../outputs/turnover-attorneys-by-agency.csv")

Unnamed: 0_level_0,Bush,Obama,Trump,Agency Name
Agency,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
DJ,0.021247,0.014255,0.025592,DJ-DEPARTMENT OF JUSTICE
HS,,0.009524,0.01963,HS-DEPARTMENT OF HOMELAND SECURITY
SE,0.015842,0.012739,0.017796,SE-SECURITIES AND EXCHANGE COMMISSION
SZ,0.012729,0.008796,0.015343,SZ-SOCIAL SECURITY ADMINISTRATION


### By education level

In [48]:
turnover_rate_by_education = unique_name_findings.groupby([
    "Education Category",
    "President Elect",
]).apply(classify_employees)

turnover_rate_by_education

Unnamed: 0_level_0,Name Found Next Quarter,found,not_found,sample_size,prop_missing
Education Category,President Elect,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
00-No college,Bush,149411.0,2682.0,152093.0,0.017634
00-No college,Obama,147054.0,3070.0,150124.0,0.02045
00-No college,Trump,140280.0,3454.0,143734.0,0.024031
01-Some college,Bush,133193.0,2429.0,135622.0,0.01791
01-Some college,Obama,137288.0,2782.0,140070.0,0.019861
01-Some college,Trump,133356.0,3541.0,136897.0,0.025866
02-Bachelor's degree only,Bush,133451.0,2102.0,135553.0,0.015507
02-Bachelor's degree only,Obama,163549.0,2682.0,166231.0,0.016134
02-Bachelor's degree only,Trump,185131.0,3689.0,188820.0,0.019537
03-Post-bachelor's degree,Bush,108266.0,2152.0,110418.0,0.01949


In [49]:
turnover_rate_by_education\
    .pipe(summarize_prop_missing)\
    .pipe(save, "../outputs/turnover-by-education.csv")

President Elect,Bush,Obama,Trump
Education Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
00-No college,0.017634,0.02045,0.024031
01-Some college,0.01791,0.019861,0.025866
02-Bachelor's degree only,0.015507,0.016134,0.019537
03-Post-bachelor's degree,0.01949,0.016743,0.020826
04-Doctorate,0.017326,0.014326,0.017807


In [50]:
unique_name_findings.groupby([
    "Has College Degree",
    "President Elect",
]).apply(classify_employees)\
    .pipe(summarize_prop_missing)\
    .pipe(save, "../outputs/turnover-by-college-degree.csv")

President Elect,Bush,Obama,Trump
Has College Degree,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,0.017764,0.020166,0.024926
True,0.017333,0.01624,0.019975


---

---

---