In [2]:
import pandas as pd
import os

In [None]:
dir_path = '/Users/andrellbower/Code/sd_airflow/poseidon/'

In [None]:
accounts_xlsx = os.path.join(dir_path, 'data/base/budget/chart_of_accounts.xlsx')
codes_dataframes = []

Open the Chart of Accounts Excel and pull in each sheet to make a subset for joining and to rename columns for publishing as a reference dataset

#### Funds codes

In [None]:
# Pull in the tab with Fund numbers
funds_file = pd.read_excel(accounts_xlsx,sheetname='Funds')
funds_out_fname = os.path.join(dir_path, 'data/temp/budget_reference_funds_datasd.csv')

# Create subset with just code and one descriptor column, in this case Fund Type
code_index = funds_file.columns.get_loc("(code)")
name_index = code_index - 2
xlsx_subset = funds_file.iloc[:, [name_index,code_index]]
xlsx_subset.columns = ['description','code']
xlsx_subset['code'] = xlsx_subset['code'].astype(str)
codes_dataframes.append(xlsx_subset)

#Create subset with all relevant columns for publishable dataset
xlsx_subset_2 = funds_file[['Fund Type','Funds','(code)']]
xlsx_subset_2['(code)'] = xlsx_subset_2['(code)'].astype(str)
xlsx_subset_2.columns = ['fund_type', 'fund_name', 'fund_number']
xlsx_subset_2.to_csv(funds_out_fname,index=False,encoding='utf-8')

#### Project codes

In [None]:
# Pull in the tab with asset owning department
depts_file = pd.read_excel(accounts_xlsx,sheetname='Asset Owning Department')
# Make a subset that is only the code and one descriptor column, in this case, Asset Owning Department
depts_code_index = depts_file.columns.get_loc("(code)")
depts_desc_index = depts_code_index - 2
depts_subset = depts_file.iloc[:, [depts_code_index,depts_desc_index]]
depts_subset.columns = ['code','owning_department']
depts_subset['code'] = depts_subset['code'].astype(str)

# Pull in the tab with asset project details
types_file = pd.read_excel(accounts_xlsx,sheetname='Asset Type Project')
# Make a subset that is inly the code and one descriptor column, in this case, Asset Type Project
types_code_index = types_file.columns.get_loc("(code)")
types_desc_index = types_code_index - 1
types_subset = types_file.iloc[:, [types_code_index,types_desc_index]]
types_subset.columns = ['code','description']
types_subset['code'] = types_subset['code'].astype(str)

# Merge the two subsets to result in a code column, plus the department and the description
# This is the subset for joining
projects_join_merge = pd.merge(depts_subset, types_subset, on='code', how='outer')
codes_dataframes.append(projects_join_merge)

# Now make a second subset from the asset project details
# This is to create the publishable reference dataset
types_subset_2 = types_file[['Asset Type/Project',
                             'Asset Type',
                             'Asset Type Project',
                             '(code)']]
types_subset_2['(code)'] = types_subset_2['(code)'].astype(str)
types_subset_2.columns = ['project_type', 'asset_type', 'project_description','code']

# Merge the depts subset with the second project details subset for the reference dataset.
projects_df_merge = pd.merge(depts_subset, types_subset_2, on='code', how='outer')
projects_df_merge.columns = ['asset_owning_department',
                             'asset_type',
                             'asset_subtype',
                             'project_name',
                            'project_number']
project_out_fname = os.path.join(dir_path, 'data/temp/budget_reference_projects_datasd.csv')
projects_df_merge.to_csv(project_out_fname,index=False,encoding='utf-8')


#### Department codes

In [None]:
# Pull in tab with departments
depts_file = pd.read_excel(accounts_xlsx,sheetname='Departments Programs')
depts_out_fname = os.path.join(dir_path, 'data/temp/budget_reference_depts_datasd.csv')

# Make a subset with just code and one descriptor column, in this case Department
code_index = depts_file.columns.get_loc("(code)")
name_index = code_index - 5
xlsx_subset = depts_file.iloc[:, [name_index, code_index]]
xlsx_subset.columns = ['description','code']
xlsx_subset['code'] = xlsx_subset['code'].astype(str)
codes_dataframes.append(xlsx_subset)

# Make a new subset with multiple columns for the publishable dataset
xlsx_subset_2 = depts_file[['Department Group',
                            'Department', 
                            'Division', 
                            'Section', 
                            'Fund Center', 
                            '(code)']]
xlsx_subset_2['(code)'] = xlsx_subset_2['(code)'].astype(str)
xlsx_subset_2.columns = ['dept_group','dept_name','dept_division','dept_section','funds_center','funds_center_number']
xlsx_subset_2.to_csv(depts_out_fname,index=False,encoding='utf-8')

#### Account codes

In [None]:
# Pull in tabs with Expenses and Revenues
expenses_file = pd.read_excel(accounts_xlsx,sheetname='Expenses')
revenues_file = pd.read_excel(accounts_xlsx,sheetname='Revenues')

# Make a subset of expenses with a code column and just one descriptor column, in this case Object
exp_code_index = expenses_file.columns.get_loc("(code)")
exp_name_index = exp_code_index - 1
exp_subset = expenses_file.iloc[:, [exp_name_index, exp_code_index]]
exp_subset.columns = ['description','code']
exp_subset['code'] = exp_subset['code'].astype(str)

# Make a subset of revenues with a code column and just one descriptor column, in this case Object
rev_code_index = revenues_file.columns.get_loc("(code)")
rev_name_index = rev_code_index - 1
rev_subset = revenues_file.iloc[:, [rev_name_index, rev_code_index]]
rev_subset.columns = ['description','code']
rev_subset['code'] = rev_subset['code'].astype(str)


# Concat the two subsets to result in a code column
# This is the subset for joining
items_join_merge = pd.concat([rev_subset, exp_subset])
codes_dataframes.append(items_join_merge)

# Now make second subsets to concat for publishable dataset
exp_subset_2 = expenses_file[['Object Type','Object Class','Object Group','Object','(code)']]
rev_subset_2 = revenues_file[['Object Type','Object Class','Object Group','Object','(code)']]

exp_subset_2['(code)'] = exp_subset_2['(code)'].astype(str)
rev_subset_2['(code)'] = rev_subset_2['(code)'].astype(str)

exp_subset_2.columns = ['account_type',
                        'account_class',
                        'account_group',
                        'account',
                        'account_number']
rev_subset_2.columns = ['account_type',
                        'account_class',
                        'account_group',
                        'account',
                        'account_number']

items_df = pd.concat([exp_subset_2, rev_subset_2])
items_out_fname = os.path.join(dir_path, 'data/temp/budget_reference_accounts_datasd.csv')
items_df.to_csv(items_out_fname,index=False,encoding='utf-8')

In [None]:
capital_ptd_xlsx = os.path.join(dir_path, 'data/base/budget/Budget/Capital/P-T-D/')
for root, dirs_list, files_list in os.walk(capital_ptd_xlsx):
    year = 11
    for file_name in files_list:
        capital_ptd = pd.read_excel(os.path.join(root, file_name))
        capital_ptd = capital_ptd.iloc[:, [0,1,2,3]]
        capital_ptd.columns = ['amount','code','project_number','object_number']
        capital_ptd['code'] = capital_ptd['code'].astype(str)
        capital_ptd['project_number'] = capital_ptd['project_number'].astype(str)
        capital_ptd['object_number'] = capital_ptd['object_number'].astype(str)
        capital_ptd_1 = pd.merge(capital_ptd, codes_dataframes[0], on='code', how='left')
        capital_ptd_1.columns = ['amount',
                                 'fund_number',
                                 'code',
                                 'object_number',
                                 'fund_type']
        capital_ptd_2 = pd.merge(capital_ptd_1, codes_dataframes[1], on='code', how='left')
        capital_ptd_2.columns = ['amount',
                                 'fund_number',
                                 'project_number',
                                 'code',
                                 'fund_type',
                                 'asset_owning_dept',
                                 'project_name']
        capital_ptd_3 = pd.merge(capital_ptd_2, codes_dataframes[3], on='code', how='left')
        capital_ptd_3.columns = ['amount',
                                 'fund_number',
                                 'project_number',
                                 'account_number',
                                 'fund_type',
                                 'asset_owning_dept',
                                 'project_name',
                                 'account']
        capital_ptd_3 = capital_ptd_3[['amount',
                                       'fund_type',
                                       'fund_number',
                                       'asset_owning_dept',
                                       'project_name',
                                       'project_number',
                                       'account',
                                       'account_number'
                                      ]]
        capital_ptd_out = os.path.join(dir_path, 'data/temp/capital_budget_ptd_FY'+str(year)+'_datasd.csv')
        capital_ptd_3.to_csv(capital_ptd_out,index=False,encoding='utf-8')
        year += 1

In [None]:
capital_fy_xlsx = os.path.join(dir_path, 'data/base/budget/Budget/Capital/FY/')
for root, dirs_list, files_list in os.walk(capital_fy_xlsx):
    year = 11
    for file_name in files_list:
        capital_fy = pd.read_excel(os.path.join(root, file_name))
        capital_fy = capital_fy.iloc[:, [0,1,2,3]]
        capital_fy.columns = ['amount','code','project_number','object_number']
        capital_fy['code'] = capital_fy['code'].astype(str)
        capital_fy['project_number'] = capital_fy['project_number'].astype(str)
        capital_fy['object_number'] = capital_fy['object_number'].astype(str)
        capital_fy_1 = pd.merge(capital_fy, codes_dataframes[0], on='code', how='left')
        capital_fy_1.columns = ['amount',
                                 'fund_number',
                                 'code',
                                 'object_number',
                                 'fund_type']
        capital_fy_2 = pd.merge(capital_fy_1, codes_dataframes[1], on='code', how='left')
        capital_fy_2.columns = ['amount',
                                 'fund_number',
                                 'project_number',
                                 'code',
                                 'fund_type',
                                 'asset_owning_dept',
                                 'project_name']
        capital_fy_3 = pd.merge(capital_fy_2, codes_dataframes[3], on='code', how='left')
        capital_fy_3.columns = ['amount',
                                 'fund_number',
                                 'project_number',
                                 'account_number',
                                 'fund_type',
                                 'asset_owning_dept',
                                 'project_name',
                                 'account']
        capital_fy_3 = capital_fy_3[['amount',
                                       'fund_type',
                                       'fund_number',
                                       'asset_owning_dept',
                                       'project_name',
                                       'project_number',
                                       'account',
                                       'account_number'
                                      ]]
        capital_fy_out = os.path.join(dir_path, 'data/temp/capital_budget_FY'+str(year)+'_datasd.csv')
        capital_fy_3.to_csv(capital_fy_out,index=False,encoding='utf-8')
        year += 1

In [None]:
operating_xlsx = os.path.join(dir_path, 'data/base/budget/Budget/Operating/')
for root, dirs_list, files_list in os.walk(operating_xlsx):
    year = 11
    for file_name in files_list:
        operating = pd.read_excel(os.path.join(root, file_name))
        operating = operating.iloc[:, [0,1,2,3]]
        operating.columns = ['amount',
                             'code',
                             'dept_number',
                             'commitment_item']
        operating['code'] = operating['code'].astype(str)
        operating['dept_number'] = operating['dept_number'].astype(str)
        operating['commitment_item'] = operating['commitment_item'].astype(str)
        operating_1 = pd.merge(operating, codes_dataframes[0], on='code', how='left', indicator=False)
        operating_1.columns = ['amount',
                               'fund_number',
                               'code',
                               'commitment_item',
                               'fund_type']
        operating_2 = pd.merge(operating_1, codes_dataframes[2], on='code', how='left', indicator=False)
        operating_2.columns = ['amount',
                               'fund_number',
                               'funds_center_number',
                               'code',
                               'fund_type',
                               'dept_name']
        operating_3 = pd.merge(operating_2, codes_dataframes[3], on='code', how='left', indicator=False)
        operating_3.columns = ['amount',
                               'fund_number',
                               'funds_center_number',
                               'account_number',
                               'fund_type',
                               'dept_name',
                               'account']
        operating_3 = operating_3[['amount',
                                   'fund_type',
                                   'fund_number',
                                   'dept_name',
                                   'funds_center_number',
                                   'account',
                                   'account_number'
                                  ]]
        operating_out = os.path.join(dir_path, 'data/temp/operating_budget_FY'+str(year)+'_datasd.csv')
        operating_3.to_csv(operating_out,index=False,encoding='utf-8')
        year += 1

from subprocess import Popen, PIPE
path = 'smb://abower:Ilomjatrb9@ad.sannet.gov/dfs/FMGT-Shared/Shared/BUDGET/Open Data/Open Data Portal/Shared with Performance and Analytics'

command = "smbclient //ad.sannet.gov/dfs " \
        + "--user={adname}%{adpass} -W ad -c " \
        + "'cd \"FMGT-Shared/Shared/BUDGET/Open\ Data/Open\ Data\ Portal/" \
        + "Shared with Performance and Analytics/Budget/Operating\";" \
        + " ls; get FY11_ADOPT_OM_BUDGET.xlsx {out_f}.xlsx;'"

command = command.format(adname='abower',
                        adpass='Ilomjatrb9',
                        temp_dir='/data/temp',
                        out_f='chart')

p = Popen(command, shell=True, stdout=PIPE, stderr=PIPE)
output, error = p.communicate()
if p.returncode != 0:
    print(p.__dict__)
else:
    print('Success')

In [10]:
temp_path = './././data/temp'

In [11]:
df = pd.read_excel(temp_path + "/FY19_ADOPT_CIP_BUDGET.xlsx")
df = capital.iloc[:, [0,1,2,3]]
df.columns = ['amount','code','project_number','object_number']
df['code'] = df['code'].astype(str)
df['project_number'] = df['project_number'].astype(str)
df['object_number'] = df['object_number'].astype(str)

FileNotFoundError: [Errno 2] No such file or directory: './././data/temp/FY19_ADOPT_CIP_BUDGET.xlsx'

In [None]:
df = pd.merge(df,fund_ref[['fund_type','fund_number']],left_on='code',right_on='fund_number',how='left')
df = pd.merge(df,proj_ref[['asset_owning_dept','project_name','project_number']],left_on='project_number',right_on='project_number',how='left')
df = pd.merge(df,accounts_ref[['account','account_number']],left_on='object_number',right_on='account_number',how='left')

df = df.rename(columns={"code":"fund_number"})
df = df[['amount','fund_type','fund_number','asset_owning_dept','project_name','project_number','account','account_number']]