In [1]:
import requests
import pandas as pd
import json

# Sample Raw json data from Louisville Metro Open Data API

### From:
https://services1.arcgis.com/79kfd2K6fskCAkyg/arcgis/rest/services/Louisville_Metro_KY_Expenditures_Data_For_Fiscal_Year_2022/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson

### Show sample data from API in json format 


In [2]:
response = requests.get('https://services1.arcgis.com/79kfd2K6fskCAkyg/arcgis/rest/services/Louisville_Metro_KY_Expenditures_Data_For_Fiscal_Year_2022/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson')
raw_data = response.json()
print(type(raw_data))
print('----------')
formated_response = json.dumps(raw_data, indent=4)
print(formated_response[:1300])

<class 'dict'>
----------
{
    "type": "FeatureCollection",
    "properties": {
        "exceededTransferLimit": true
    },
    "features": [
        {
            "type": "Feature",
            "id": 1,
            "geometry": null,
            "properties": {
                "Fiscal_Year": 2022,
                "Budget_Type": "Metro Government Capital",
                "Agency_Name": "Public Works & Assets Department",
                "Sub_Agency_Name": "18th & Broadway Realignment",
                "DepartmentName": null,
                "Sub_DepartmentName": null,
                "Category": "Contractual Services",
                "Sub_Category": "Road Construction/ Paving Services",
                "Stimulus_Type": null,
                "Funding_Source": "FY19 Metro GO Bond - 10 yr",
                "Vendor_Name": "LOUISVILLE GAS & ELECTRIC COMPANY",
                "InvoiceID": 2458376,
                "InvoiceDt": 1652673600000,
                "InvoiceAmt": 664,
             

# Create DataFrame
## Clean up 

- limit DataFrame to first 100 of 1000 JSON object literals
- Capture inner dictionary (properties) from features object literals
    - NOT utilizing Pandas json_normalize()
- Create list of "properties" object literals
- Create DataFrame from list of "properties" object literals
- Drop columns not needed


In [3]:
response = requests.get('https://services1.arcgis.com/79kfd2K6fskCAkyg/arcgis/rest/services/Louisville_Metro_KY_Expenditures_Data_For_Fiscal_Year_2022/FeatureServer/0/query?outFields=*&where=1%3D1&f=geojson')
data1 = response.json()
data2 = pd.DataFrame.from_dict(data1['features'][0:100])
data3 = pd.DataFrame.from_dict(data2["properties"])
data4 = data3.to_dict('list')
data5 = pd.DataFrame.from_dict(data4["properties"])
data5 = data5.drop(columns=['Budget_Type','Agency_Name','Sub_Agency_Name','DepartmentName','Sub_DepartmentName','Category','Sub_Category','Stimulus_Type','Funding_Source','InvoiceDt','DistributionAmt','CheckDt','CheckVoidDt','ObjectId'])
#data5.to_excel("data5.xlsx") 
print(data5)

    Fiscal_Year                        Vendor_Name  InvoiceID  InvoiceAmt  \
0          2022  LOUISVILLE GAS & ELECTRIC COMPANY    2458376      664.00   
1          2022          ALL TRAFFIC SOLUTIONS INC    2406773    10193.44   
2          2022          ALL TRAFFIC SOLUTIONS INC    2406773    10193.44   
3          2022          ALL TRAFFIC SOLUTIONS INC    2406773    10193.44   
4          2022               ACTION LANDSCAPE INC    2431857      958.33   
..          ...                                ...        ...         ...   
95         2022                YANCEY LIVING TRUST    2324144   187300.00   
96         2022   HALL CONTRACTING OF KENTUCKY INC    2343641    32355.32   
97         2022   HALL CONTRACTING OF KENTUCKY INC    2343643    27358.52   
98         2022   HALL CONTRACTING OF KENTUCKY INC    2343643    27358.52   
99         2022   HALL CONTRACTING OF KENTUCKY INC    2343641    32355.32   

    CheckID    CheckAmt  
0   1210984      664.00  
1   1191356    10193.44

### Show DataFrame type and pandas.DataFrame.head (.head()) function 

In [4]:
print(f'Type from response: {type(data5)}')
data5.head()

Type from response: <class 'pandas.core.frame.DataFrame'>


Unnamed: 0,Fiscal_Year,Vendor_Name,InvoiceID,InvoiceAmt,CheckID,CheckAmt
0,2022,LOUISVILLE GAS & ELECTRIC COMPANY,2458376,664.0,1210984,664.0
1,2022,ALL TRAFFIC SOLUTIONS INC,2406773,10193.44,1191356,10193.44
2,2022,ALL TRAFFIC SOLUTIONS INC,2406773,10193.44,1191356,10193.44
3,2022,ALL TRAFFIC SOLUTIONS INC,2406773,10193.44,1191356,10193.44
4,2022,ACTION LANDSCAPE INC,2431857,958.33,1201291,958.33


### Show descriptive statistics with pandas.DataFrame.describe (.describe)

In [5]:
data5.describe()

Unnamed: 0,Fiscal_Year,InvoiceID,InvoiceAmt,CheckID,CheckAmt
count,100.0,100.0,100.0,100.0,100.0
mean,2022.0,2397876.0,19186.7606,1187942.0,79327.02
std,0.0,47752.02,50027.042761,19098.75,283272.0
min,2022.0,2322714.0,-1556.5,1156652.0,46.0
25%,2022.0,2366576.0,844.2,1174004.0,1081.697
50%,2022.0,2391352.0,2728.0,1186628.0,3785.695
75%,2022.0,2445905.0,16725.0,1205449.0,25424.66
max,2022.0,2486033.0,347000.0,1221082.0,1437481.0


### Show tuple representing the dimensionality of the DataFrame.  pandas.DataFrame.shape(.shape)

In [6]:
data5.shape

(100, 6)

### Using DataFrame.mean() to get "CheckAmt" column mean

In [7]:
chk_amount_avg = data5['CheckAmt'].mean()
print(chk_amount_avg)

79327.02200000001


### Using DataFrame.sum() to get CheckAmt column sum

In [8]:
chk_amount_sum = data5['CheckAmt'].sum()
print(chk_amount_sum)

7932702.200000001


### Find the maximum value of a column (CheckAmt) and to return its corresponding row values

In [9]:
col = "CheckAmt"
max_x = data5.loc[data5[col].idxmax()]
print(max_x)

Fiscal_Year                                2022
Vendor_Name    HALL CONTRACTING OF KENTUCKY INC
InvoiceID                               2343641
InvoiceAmt                             32355.32
CheckID                                 1167899
CheckAmt                             1437480.61
Name: 96, dtype: object


### Pull subset of vendor(TEK SYSTEMS) from Vendor_name, using loc also .query

In [10]:
data6 = data5.loc[data5['Vendor_Name'] == 'ALL TRAFFIC SOLUTIONS INC']
data6 = data6.reset_index(drop=True)
print(data6)

   Fiscal_Year                Vendor_Name  InvoiceID  InvoiceAmt  CheckID  \
0         2022  ALL TRAFFIC SOLUTIONS INC    2406773    10193.44  1191356   
1         2022  ALL TRAFFIC SOLUTIONS INC    2406773    10193.44  1191356   
2         2022  ALL TRAFFIC SOLUTIONS INC    2406773    10193.44  1191356   
3         2022  ALL TRAFFIC SOLUTIONS INC    2409768    11645.00  1193275   
4         2022  ALL TRAFFIC SOLUTIONS INC    2409768    11645.00  1193275   
5         2022  ALL TRAFFIC SOLUTIONS INC    2409768    11645.00  1193275   
6         2022  ALL TRAFFIC SOLUTIONS INC    2397777    11745.00  1200593   

   CheckAmt  
0  10193.44  
1  10193.44  
2  10193.44  
3  11645.00  
4  11645.00  
5  11645.00  
6  11745.00  


In [11]:
data7 = data5.query("Vendor_Name == 'ALL TRAFFIC SOLUTIONS INC'")
print(data7)

    Fiscal_Year                Vendor_Name  InvoiceID  InvoiceAmt  CheckID  \
1          2022  ALL TRAFFIC SOLUTIONS INC    2406773    10193.44  1191356   
2          2022  ALL TRAFFIC SOLUTIONS INC    2406773    10193.44  1191356   
3          2022  ALL TRAFFIC SOLUTIONS INC    2406773    10193.44  1191356   
17         2022  ALL TRAFFIC SOLUTIONS INC    2409768    11645.00  1193275   
18         2022  ALL TRAFFIC SOLUTIONS INC    2409768    11645.00  1193275   
19         2022  ALL TRAFFIC SOLUTIONS INC    2409768    11645.00  1193275   
21         2022  ALL TRAFFIC SOLUTIONS INC    2397777    11745.00  1200593   

    CheckAmt  
1   10193.44  
2   10193.44  
3   10193.44  
17  11645.00  
18  11645.00  
19  11645.00  
21  11745.00  


### Show second and thrid columns

In [12]:
data5[['Vendor_Name', 'InvoiceID']]

Unnamed: 0,Vendor_Name,InvoiceID
0,LOUISVILLE GAS & ELECTRIC COMPANY,2458376
1,ALL TRAFFIC SOLUTIONS INC,2406773
2,ALL TRAFFIC SOLUTIONS INC,2406773
3,ALL TRAFFIC SOLUTIONS INC,2406773
4,ACTION LANDSCAPE INC,2431857
...,...,...
95,YANCEY LIVING TRUST,2324144
96,HALL CONTRACTING OF KENTUCKY INC,2343641
97,HALL CONTRACTING OF KENTUCKY INC,2343643
98,HALL CONTRACTING OF KENTUCKY INC,2343643


### Show first 4 rows of DataFrame

In [13]:
data5.iloc[:4]

Unnamed: 0,Fiscal_Year,Vendor_Name,InvoiceID,InvoiceAmt,CheckID,CheckAmt
0,2022,LOUISVILLE GAS & ELECTRIC COMPANY,2458376,664.0,1210984,664.0
1,2022,ALL TRAFFIC SOLUTIONS INC,2406773,10193.44,1191356,10193.44
2,2022,ALL TRAFFIC SOLUTIONS INC,2406773,10193.44,1191356,10193.44
3,2022,ALL TRAFFIC SOLUTIONS INC,2406773,10193.44,1191356,10193.44
