# Demo 5.2 acs5 Data Profile (*acs/acs5/profile*)    

- Census Dataset:  *acs/acs5/profile*   

- **Vintage: 2022**    
 
- **Examples**: https://api.census.gov/data/2022/acs/acs5/profile/examples.html  
- **Variables**:  https://api.census.gov/data/2022/acs/acs5/profile/variables.html 
- **Home Page**: https://www.census.gov/data/developers/data-sets/acs-5year.html  
 




- **Question**:  What is the percent of population for each NC County  who have a Graduate degrees and who are 25 or older?     

  


In [1]:
import pandas as pd
import json
import requests
import pprint

import plotly.express as px

# Get Data from API Call  

### 1. Build the API Request URL

### 1. How to Start your Data Request  
- Requests always begin with: https://api.census.gov/data  

In [2]:
base_url = "https://api.census.gov/data"

### 2. Add the Dataset Name


In [3]:
dataset_name = "/2022/acs/acs5/profile"

### 3. Start your Variable Request


In [4]:
get_start = "?get="

### 4. Add your Variables  
- **DP02_0066PE**: Percent of Population 25 years and over with Graduate or professional degree


In [5]:
get_variables = "NAME,DP02_0154E,DP02_0154PE"

###   5. Add your Geography  


In [6]:
#geography = "&for=state:*"
geography = "&for=state:*"

###   6. Put it all Together 

In [7]:
request_url = base_url + dataset_name + get_start + get_variables + geography
print("request_url = ", request_url)

request_url =  https://api.census.gov/data/2022/acs/acs5/profile?get=NAME,DP02_0154E,DP02_0154PE&for=state:*


### 2. Use *requests* library to make the API call

In [8]:
# Make API Call
r = requests.get(request_url)

api_results = r.json()

In [9]:
print(api_results)

[['NAME', 'DP02_0154E', 'DP02_0154PE', 'state'], ['Alabama', '1619717', '83.8', '01'], ['Alaska', '236265', '89.4', '02'], ['Arizona', '2443288', '89.2', '04'], ['Arkansas', '965688', '82.4', '05'], ['California', '12177584', '91.5', '06'], ['Colorado', '2091383', '91.8', '08'], ['Connecticut', '1269855', '90.1', '09'], ['Delaware', '351432', '90.3', '10'], ['District of Columbia', '281132', '89.0', '11'], ['Florida', '7417320', '88.8', '12'], ['Georgia', '3464027', '87.8', '13'], ['Hawaii', '434531', '89.8', '15'], ['Idaho', '606182', '89.8', '16'], ['Illinois', '4379346', '88.1', '17'], ['Indiana', '2301440', '86.7', '18'], ['Iowa', '1112880', '86.3', '19'], ['Kansas', '1010155', '87.9', '20'], ['Kentucky', '1513993', '85.6', '21'], ['Louisiana', '1464943', '83.0', '22'], ['Maine', '506690', '87.3', '23'], ['Maryland', '2099376', '90.6', '24'], ['Massachusetts', '2485271', '90.7', '25'], ['Michigan', '3520966', '87.8', '26'], ['Minnesota', '2024014', '89.7', '27'], ['Mississippi', '8

In [10]:
#pprint makes it possible to see the structure of the returned data -- but it can be very, very long!
pprint.pprint(api_results)

[['NAME', 'DP02_0154E', 'DP02_0154PE', 'state'],
 ['Alabama', '1619717', '83.8', '01'],
 ['Alaska', '236265', '89.4', '02'],
 ['Arizona', '2443288', '89.2', '04'],
 ['Arkansas', '965688', '82.4', '05'],
 ['California', '12177584', '91.5', '06'],
 ['Colorado', '2091383', '91.8', '08'],
 ['Connecticut', '1269855', '90.1', '09'],
 ['Delaware', '351432', '90.3', '10'],
 ['District of Columbia', '281132', '89.0', '11'],
 ['Florida', '7417320', '88.8', '12'],
 ['Georgia', '3464027', '87.8', '13'],
 ['Hawaii', '434531', '89.8', '15'],
 ['Idaho', '606182', '89.8', '16'],
 ['Illinois', '4379346', '88.1', '17'],
 ['Indiana', '2301440', '86.7', '18'],
 ['Iowa', '1112880', '86.3', '19'],
 ['Kansas', '1010155', '87.9', '20'],
 ['Kentucky', '1513993', '85.6', '21'],
 ['Louisiana', '1464943', '83.0', '22'],
 ['Maine', '506690', '87.3', '23'],
 ['Maryland', '2099376', '90.6', '24'],
 ['Massachusetts', '2485271', '90.7', '25'],
 ['Michigan', '3520966', '87.8', '26'],
 ['Minnesota', '2024014', '89.7', '

In [11]:
type(api_results)

list

### 3. Get the data into a Dataframe  
- These Census Data results are in a list and have a specific form:  
  - The first element is a list of column names  
  - The remaining list elements are data  
  
  

In [12]:
df = pd.DataFrame(api_results)

print(df.shape)
df.head()

(53, 4)


Unnamed: 0,0,1,2,3
0,NAME,DP02_0154E,DP02_0154PE,state
1,Alabama,1619717,83.8,01
2,Alaska,236265,89.4,02
3,Arizona,2443288,89.2,04
4,Arkansas,965688,82.4,05


### 4. Get the first Row into columns and then get rid of it  

In [13]:
df.columns = df.iloc[0]

df =df.iloc[1:]

print("5.2 Demo acs5 - 2022 Vintage")
print(df.shape)
df.head()

5.2 Demo acs5 - 2022 Vintage
(52, 4)


Unnamed: 0,NAME,DP02_0154E,DP02_0154PE,state
1,Alabama,1619717,83.8,1
2,Alaska,236265,89.4,2
3,Arizona,2443288,89.2,4
4,Arkansas,965688,82.4,5
5,California,12177584,91.5,6


# Change Data Types as Needed

In [14]:
df.dtypes

0
NAME           object
DP02_0154E     object
DP02_0154PE    object
state          object
dtype: object

In [15]:
df.head()

Unnamed: 0,NAME,DP02_0154E,DP02_0154PE,state
1,Alabama,1619717,83.8,1
2,Alaska,236265,89.4,2
3,Arizona,2443288,89.2,4
4,Arkansas,965688,82.4,5
5,California,12177584,91.5,6


# rename

In [16]:
cols_to_rename = {
                  'DP02_0154E': 'Number of Households with a broadband Internet subscription',
                  'DP02_0154PE':'Percent of Households with a broadband Internet subscription',
                  'NAME': 'State Name',
                  'state':'GEOID'

                  
                  
                  
                  
                  
                 }

df.rename(columns= cols_to_rename, inplace=True)

print(df.shape)
df

(52, 4)


Unnamed: 0,State Name,Number of Households with a broadband Internet subscription,Percent of Households with a broadband Internet subscription,GEOID
1,Alabama,1619717.0,83.8,1
2,Alaska,236265.0,89.4,2
3,Arizona,2443288.0,89.2,4
4,Arkansas,965688.0,82.4,5
5,California,12177584.0,91.5,6
6,Colorado,2091383.0,91.8,8
7,Connecticut,1269855.0,90.1,9
8,Delaware,351432.0,90.3,10
9,District of Columbia,281132.0,89.0,11
10,Florida,7417320.0,88.8,12


In [17]:
df.head()

Unnamed: 0,State Name,Number of Households with a broadband Internet subscription,Percent of Households with a broadband Internet subscription,GEOID
1,Alabama,1619717,83.8,1
2,Alaska,236265,89.4,2
3,Arizona,2443288,89.2,4
4,Arkansas,965688,82.4,5
5,California,12177584,91.5,6


# Uploaded merged data

In [26]:
df_yes = pd.read_csv('Data/StateDatawAbbrev.csv')
df_yes.head()

Unnamed: 0,State Name,Number of Households with a broadband Internet subscription,Percent of Households with a broadband Internet subscription,Abbrev,GEOID
0,Alabama,1619717.0,83.8,AL,1
1,Alaska,236265.0,89.4,AK,2
2,Arizona,2443288.0,89.2,AZ,4
3,Arkansas,965688.0,82.4,AR,5
4,California,12177584.0,91.5,CA,6


In [31]:
df_yes['Abbrev'].unique()

array([' AL', ' AK', ' AZ', ' AR', ' CA', ' CO', ' CT', ' DE', ' DC',
       ' FL', ' GA', ' HI', ' ID', ' IL', ' IN', ' IA', ' KS', ' KY',
       ' LA', ' ME', ' MD', ' MA', ' MI', ' MN', ' MS', ' MO', ' MT',
       ' NE', ' NV', ' NH', ' NJ', ' NM', ' NY', ' NC', ' ND', ' OH',
       ' OK', ' OR', ' PA', ' RI', ' SC', ' SD', ' TN', ' TX', ' UT',
       ' VT', ' VA', ' WA', ' WV', ' WI', ' WY', nan], dtype=object)

### strip abbreviations of whitespace

In [32]:
df_yes['Abbrev'] = df_yes['Abbrev'].str.strip( )


In [19]:
df_yes.dtypes

State Name                                                       object
Number of Households with a broadband Internet subscription     float64
Percent of Households with a broadband Internet subscription    float64
Abbrev                                                           object
GEOID                                                             int64
dtype: object

In [20]:
df_yes['Percent of Households with a broadband Internet subscription'] = df_yes['Percent of Households with a broadband Internet subscription'].astype(float)
df_yes['Number of Households with a broadband Internet subscription'] = df_yes['Number of Households with a broadband Internet subscription'].astype(str)
df_yes['Abbrev'] = df_yes['Abbrev'].astype(str)

# Display the Regions Map

In [33]:
fig = px.choropleth(
                    df_yes,
                    locationmode='USA-states',  
                    scope='usa',
                    locations='Abbrev',
                    labels = {'Abbrev':'State Abbreviation'},  
                    color='Percent of Households with a broadband Internet subscription',  
                    color_continuous_scale='rdylgn',  
                    title="Percent of Households with a Broadband Internet (2022)"
)

fig.update_geos(fitbounds='locations', scope= 'usa')
fig.update_layout(margin={"r":0, "t":40, "l":0, "b":0})
fig.show()

In [None]:
csv_file_to_create = "States.csv"

filename_with_path = "Data/" + csv_file_to_create
df.to_csv(filename_with_path, index=False)