In [1]:
import requests
import pandas as pd
import json

# Sample Raw json data from Louisville Metro KY Expenditures Open Data API

### From:
https://services1.arcgis.com/79kfd2K6fskCAkyg/arcgis/rest/services/Louisville_Metro_KY_Expenditures_Data_For_Fiscal_Year_2022/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson

### Show data


In [2]:
response = requests.get('https://services1.arcgis.com/79kfd2K6fskCAkyg/arcgis/rest/services/Louisville_Metro_KY_Expenditures_Data_For_Fiscal_Year_2022/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson')
data = response.json()
data = pd.DataFrame.from_dict(data['features'])
print(f'Type from response: {type(data)}')
print('')
print(f'data shape {data.shape}')
print('')
print('data head() shown below')
data.head()

Type from response: <class 'pandas.core.frame.DataFrame'>

data shape (1000, 4)

data head() shown below


Unnamed: 0,type,id,geometry,properties
0,Feature,1,,"{'Fiscal_Year': 2022, 'Budget_Type': 'Metro Go..."
1,Feature,2,,"{'Fiscal_Year': 2022, 'Budget_Type': 'Metro Go..."
2,Feature,3,,"{'Fiscal_Year': 2022, 'Budget_Type': 'Metro Go..."
3,Feature,4,,"{'Fiscal_Year': 2022, 'Budget_Type': 'Metro Go..."
4,Feature,5,,"{'Fiscal_Year': 2022, 'Budget_Type': 'Metro Go..."


#### Clean up 
##### Step down json to get inner "properties" dictionary 

In [3]:
data.columns

Index(['type', 'id', 'geometry', 'properties'], dtype='object')

In [4]:
data = pd.DataFrame.from_dict(data["properties"])
data


Unnamed: 0,properties
0,"{'Fiscal_Year': 2022, 'Budget_Type': 'Metro Go..."
1,"{'Fiscal_Year': 2022, 'Budget_Type': 'Metro Go..."
2,"{'Fiscal_Year': 2022, 'Budget_Type': 'Metro Go..."
3,"{'Fiscal_Year': 2022, 'Budget_Type': 'Metro Go..."
4,"{'Fiscal_Year': 2022, 'Budget_Type': 'Metro Go..."
...,...
995,"{'Fiscal_Year': 2022, 'Budget_Type': 'Metro Go..."
996,"{'Fiscal_Year': 2022, 'Budget_Type': 'Metro Go..."
997,"{'Fiscal_Year': 2022, 'Budget_Type': 'Metro Go..."
998,"{'Fiscal_Year': 2022, 'Budget_Type': 'Metro Go..."


# Create DataFrame
## Clean up

- limit DataFrame to first 100 of 1000 JSON object literals
- Capture inner dictionary (properties) from features object literals
    - Create list of "properties" from features object literals
    - Create DataFrame from list of "properties" object literals
- Drop columns with 'None', 'null' values or not needed from "properties" dictionary


In [5]:
data1 = data.to_dict('list')
data2 = pd.DataFrame.from_dict(data1["properties"][0:100])
print(f'Type from response: {type(data2)}')
print(f'data2 shape {data2.shape}')
print('data2 *** Before column drop ***')
print('--------------------')
print(data2.head())
data.head()
drop_columns =[
    'Budget_Type',
    'Agency_Name',
    'Sub_Agency_Name',
    'DepartmentName',
    'Sub_DepartmentName',
    'Category',
    'Sub_Category',
    'Stimulus_Type',
    'Funding_Source',
    'InvoiceDt',
    'DistributionAmt',
    'CheckDt',
    'CheckVoidDt',
    'ObjectId'
    ]
data2.drop(drop_columns,inplace=True, axis=1)
print('')
print('--------------------')
print(f'Type from response: {type(data2)}')
print(f'data2 shape {data2.shape}')
print('data2 *** After column drop ***')
#data2.to_excel("data5.xlsx")
print('')
print(data2)
print('')
print('--------------------')



Type from response: <class 'pandas.core.frame.DataFrame'>
data2 shape (100, 20)
data2 *** Before column drop ***
--------------------
   Fiscal_Year               Budget_Type                       Agency_Name  \
0         2022  Metro Government Capital  Public Works & Assets Department   
1         2022  Metro Government Capital                      Metro Police   
2         2022  Metro Government Capital                      Metro Police   
3         2022  Metro Government Capital                      Metro Police   
4         2022  Metro Government Capital  Public Works & Assets Department   

                                     Sub_Agency_Name DepartmentName  \
0                        18th & Broadway Realignment           None   
1                            1st Division, Council 6           None   
2                            1st Division, Council 6           None   
3                            1st Division, Council 6           None   
4  3800-3900 Blk W Market St MSD Green Inf

#### Clean up
##### Rename column "Fiscal_Year" and "Vendor names" for better viewing of dataframe.

In [6]:
print(f'Type from response: {type(data2)}')
print(f'data2 shape {data2.shape}')
print('data2 *** Before rename of "Fiscal_Year" and "Vendor names" ***')
print('--------------------')
data2[['Fiscal_Year','Vendor_Name']]


Type from response: <class 'pandas.core.frame.DataFrame'>
data2 shape (100, 6)
data2 *** Before rename of "Fiscal_Year" and "Vendor names" ***
--------------------


Unnamed: 0,Fiscal_Year,Vendor_Name
0,2022,LOUISVILLE GAS & ELECTRIC COMPANY
1,2022,ALL TRAFFIC SOLUTIONS INC
2,2022,ALL TRAFFIC SOLUTIONS INC
3,2022,ALL TRAFFIC SOLUTIONS INC
4,2022,ACTION LANDSCAPE INC
...,...,...
95,2022,YANCEY LIVING TRUST
96,2022,HALL CONTRACTING OF KENTUCKY INC
97,2022,HALL CONTRACTING OF KENTUCKY INC
98,2022,HALL CONTRACTING OF KENTUCKY INC


In [7]:
print(f'Type from response: {type(data2)}')
print(f'data2 shape {data2.shape}')
print('data2 *** after rename of "Fiscal_Year" and "Vendor names" ***')
print('--------------------')
data2.rename(columns = {'Fiscal_Year':'FY'}, inplace = True)
replace_vendor_names = {
    'LOUISVILLE GAS & ELECTRIC COMPANY' : 'LG&E',
    'ALL TRAFFIC SOLUTIONS INC' : 'ALL TRAFFIC SOL',
    'PPG ARCHITECTURAL FINISHES INC DBA PITTSBURGH PAINTS' : 'PITTSBURGH PAINTS',
    'WATERFRONT DEVELOPMENT CORPORATION' : 'WATERFRONT DEV CORP',
    'ROBIN MARIE MELTON IRREVOCABLE TRUST DATED AUGUST 17 1983' : 'ROBIN MARIE MELTON',
    'HALL CONTRACTING OF KENTUCKY INC' : 'HALL CONT OF KY INC',
    'CONSOLIDATED ELECTRICAL DISTRIBUTORS' : 'CONSOLIDATED ELEC DIST',
    'FOREST VILLAGE HOMEOWNERS ASSOCIATION INC' : 'FOREST VILLAGE HOMEOWNERS ASSOC INC'
    
    }
data2['Vendor_Name'] = data2['Vendor_Name'].replace(replace_vendor_names)
data2[['FY','Vendor_Name']]

Type from response: <class 'pandas.core.frame.DataFrame'>
data2 shape (100, 6)
data2 *** after rename of "Fiscal_Year" and "Vendor names" ***
--------------------


Unnamed: 0,FY,Vendor_Name
0,2022,LG&E
1,2022,ALL TRAFFIC SOL
2,2022,ALL TRAFFIC SOL
3,2022,ALL TRAFFIC SOL
4,2022,ACTION LANDSCAPE INC
...,...,...
95,2022,YANCEY LIVING TRUST
96,2022,HALL CONT OF KY INC
97,2022,HALL CONT OF KY INC
98,2022,HALL CONT OF KY INC


In [8]:
print('dataframe after cleanup')
print('------------------------')
data2

dataframe after cleanup
------------------------


Unnamed: 0,FY,Vendor_Name,InvoiceID,InvoiceAmt,CheckID,CheckAmt
0,2022,LG&E,2458376,664.00,1210984,664.00
1,2022,ALL TRAFFIC SOL,2406773,10193.44,1191356,10193.44
2,2022,ALL TRAFFIC SOL,2406773,10193.44,1191356,10193.44
3,2022,ALL TRAFFIC SOL,2406773,10193.44,1191356,10193.44
4,2022,ACTION LANDSCAPE INC,2431857,958.33,1201291,958.33
...,...,...,...,...,...,...
95,2022,YANCEY LIVING TRUST,2324144,187300.00,1158481,187300.00
96,2022,HALL CONT OF KY INC,2343641,32355.32,1167899,1437480.61
97,2022,HALL CONT OF KY INC,2343643,27358.52,1167899,1437480.61
98,2022,HALL CONT OF KY INC,2343643,27358.52,1167899,1437480.61
