In [11]:
import pandas as pd
from collections import OrderedDict
import os
import statsmodels

In [12]:
opm_dynamic_dir = os.path.join('..', 'rawdata', 'opm-federal-employment-data', 'data', '1973-09-to-2014-06', 'non-dod', 'dynamic')
opm_status_dir = os.path.join('..', 'rawdata', 'opm-federal-employment-data', 'data', '1973-09-to-2014-06', 'non-dod', 'status')

In [13]:
opm_status_fwf_columns = OrderedDict([
    ('Pseudo-ID', (0, 9)),
    ('Employee Name', (9, 32)),
    ('File Date (yyyymmdd)', (32, 40)),
    ('Agency/Subelement', (40, 44)),
    ('Duty Station', (44, 53)),
    ('Age Range', (53, 59)),
    ('Education Level', (59, 61)),
    ('Pay Plan', (61, 63)),
    ('Grade', (63, 65)),
    ('LOS Level', (65, 71)),
    ('Occupation', (71, 75)),
    ('Occupational Category (PATCO)', (75, 76)),
    ('Adjusted Basic Pay', (76, 82)),
    ('Supervisory Status', (82, 83)),
    ('Type of Appointment', (83, 85)),
    ('Work Schedule', (85, 86)),
    ('NSFTP Indicator', (86, 87))
])

In [14]:
opm_dynamic_fwf_columns = OrderedDict([
    ('Pseudo-ID', (0, 9)),
    ('Employee Name', (9, 32)),
    ('Agency/Subelement', (32, 36)),
    ('Accession/Separation Indicator', (36, 38)),
    ('Effective Date', (38, 46)),
    ('Age', (46, 52)),
    ('Pay Plan', (52, 54)),
    ('Grade', (54, 56)),
    ('LOS Level', (56, 62)),
    ('Duty Station', (62, 71)),
    ('Occupation', (71, 75)),
    ('Occupational Cateogy (PATCO)', (75, 76)),
    ('Adjusted Basic Pay', (76, 82)),
    ('Type of Appointment', (82, 84)),
    ('Work Schedule', (84, 85))
])

In [15]:
start_date = '1982-03'
end_date = '1982-06'

In [16]:
def read_opm_dynamic_fwf(start_date, end_date, path, fwf_dict):
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)

    # Create list of files to load based on date ranges selected
    files_to_load = []
    for file in os.listdir(opm_dynamic_dir):
        file_date = pd.to_datetime(file[0:7])
        if start_date <= file_date <= end_date:
            files_to_load.append(file)

    files_to_load.sort(key = lambda x: pd.to_datetime(x[0:7])) # Sort list of files to load by date

    # Load files in list into dataframe
    df = pd.DataFrame()
    for file in files_to_load:
        df = df.append(pd.read_fwf(os.path.join(opm_dynamic_dir, file), colspecs = list(opm_status_fwf_columns.values()), names = list(opm_status_fwf_columns.keys())))

    return df