In [28]:
import requests
import csv
import pandas
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from encodings.aliases import aliases


r = requests.get("https://api.census.gov/data/2020/acs/acs5?get=NAME,B08303_001E&for=state:*")
r_json = r.json ()
print(r_json)

with open("new_census_api.csv", mode = "w", newline = "") as file:
    writer = csv.writer(file)
    writer.writerows(r_json)


[['NAME', 'B08303_001E', 'state'], ['Pennsylvania', '5652158', '42'], ['California', '16710195', '06'], ['West Virginia', '697042', '54'], ['Utah', '1378826', '49'], ['New York', '8584828', '36'], ['District of Columbia', '332618', '11'], ['Alaska', '330203', '02'], ['Florida', '8817718', '12'], ['South Carolina', '2174285', '45'], ['North Dakota', '380227', '38'], ['Maine', '608861', '23'], ['Georgia', '4475685', '13'], ['Alabama', '2002359', '01'], ['New Hampshire', '656277', '33'], ['Oregon', '1790252', '41'], ['Wyoming', '267249', '56'], ['Arizona', '2897654', '04'], ['Louisiana', '1917930', '22'], ['Indiana', '2985939', '18'], ['Idaho', '744565', '16'], ['Connecticut', '1641807', '09'], ['Hawaii', '657643', '15'], ['Illinois', '5686627', '17'], ['Massachusetts', '3251466', '25'], ['Texas', '12371575', '48'], ['Montana', '471094', '30'], ['Nebraska', '928064', '31'], ['Ohio', '5163570', '39'], ['Colorado', '2604698', '08'], ['New Jersey', '4016070', '34'], ['Maryland', '2799889', '

# Description of Script Output

The script makes an HTTP GET request to the U.S. Census Bureau's API to retrieve data from the 2020 American Community Survey (ACS) for states. Specifically, it requests data for the variable "B08303_001E" (the total number of workers who commute) and the name of each state.

Upon running the script, the following actions occur:

1. The `requests` library is used to send an HTTP GET request to the specified API endpoint, which contains the desired data query. The query includes the variables to retrieve (name and B08303_001E) and specifies that data should be retrieved for all states.

2. The response from the API is stored in the variable `r`.

3. The `r.json` method is called to parse the response data as JSON and store it in the variable `r_json`. This JSON data likely contains an array of arrays, with each inner array representing data for a specific state.

4. Finally, the script prints the content of `r_json`.

The expected output of the script is a JSON structure containing data from the 2020 ACS, specifically the name of each state and the total number of workers who commute in each state. The format of the output will look like an array of arrays or a list of lists.


In [32]:
# Read the CSV file into a DataFrame
census_df = pandas.read_csv("new_census_api.csv")

census_df.columns = ["CITY_NAME", "B08303_001E", "STATE"]
print(census_df.head())

       CITY_NAME  B08303_001E  STATE
0   Pennsylvania      5652158     42
1     California     16710195      6
2  West Virginia       697042     54
3           Utah      1378826     49
4       New York      8584828     36


### Reading and Renaming Columns in a Census DataFrame

In the following code, we read a CSV file named "new_census_api.csv" into a Pandas DataFrame and rename the columns. We then display the first few rows of the DataFrame using the `head()` function.

In [34]:
# we need to find the encodings that work for us
alias_values = set(aliases.values())

for encoding in set(aliases.values()):
    try:
        df = pandas.read_csv("new_census_api.csv", nrows=10, encoding=encoding)
        print('sucesseful', encoding)
    except:
        pass

sucesseful cp861
sucesseful iso8859_8
sucesseful cp949
sucesseful cp1252
sucesseful iso2022_jp_2
sucesseful cp1256
sucesseful latin_1
sucesseful mac_latin2
sucesseful iso2022_jp_1
sucesseful cp850
sucesseful ptcp154
sucesseful cp855
sucesseful shift_jis_2004
sucesseful gb18030
sucesseful iso2022_jp_ext
sucesseful cp857
sucesseful iso8859_2
sucesseful cp775
sucesseful euc_jp
sucesseful mac_cyrillic
sucesseful cp1258
sucesseful cp932
sucesseful gbk
sucesseful iso2022_jp_3
sucesseful cp863
sucesseful cp437
sucesseful mbcs
sucesseful cp1254
sucesseful iso8859_16
sucesseful mac_iceland
sucesseful mac_turkish
sucesseful ascii
sucesseful iso2022_jp
sucesseful iso8859_3
sucesseful gb2312
sucesseful iso8859_13
sucesseful iso8859_5
sucesseful shift_jis
sucesseful euc_jis_2004
sucesseful cp860
sucesseful cp1250
sucesseful cp862
sucesseful cp864
sucesseful big5
sucesseful cp858
sucesseful cp1257
sucesseful iso2022_jp_2004
sucesseful cp869
sucesseful big5hkscs
sucesseful kz1048
sucesseful cp1255
su

In [36]:
new_census_api = pandas.read_csv("new_census_api.csv", encoding="ISO-8859-11")
new_census_api.head()

Unnamed: 0,NAME,B08303_001E,state
0,Pennsylvania,5652158,42
1,California,16710195,6
2,West Virginia,697042,54
3,Utah,1378826,49
4,New York,8584828,36


In [37]:
new_census_api.shape # 52 rows and 3 columns

(52, 3)

In [38]:
new_census_api.duplicated().sum() # counting the duplicates rows 

0

Exoloring the dataset

In [39]:
new_census_api.head() # checking the begining of the dataframe

Unnamed: 0,NAME,B08303_001E,state
0,Pennsylvania,5652158,42
1,California,16710195,6
2,West Virginia,697042,54
3,Utah,1378826,49
4,New York,8584828,36


In [40]:
new_census_api.tail() # checking the tail of the dataframe

Unnamed: 0,NAME,B08303_001E,state
47,Delaware,419600,10
48,Puerto Rico,969379,72
49,Kentucky,1858458,21
50,South Dakota,411590,46
51,Tennessee,2909026,47


In [52]:
pandas.options.display.max_rows = 8
new_census_api

Unnamed: 0,NAME,B08303_001E,state
0,Pennsylvania,5652158,42
1,California,16710195,6
2,West Virginia,697042,54
3,Utah,1378826,49
...,...,...,...
48,Puerto Rico,969379,72
49,Kentucky,1858458,21
50,South Dakota,411590,46
51,Tennessee,2909026,47


In [53]:
new_census_api.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   NAME         52 non-null     object
 1   B08303_001E  52 non-null     int64 
 2   state        52 non-null     int64 
dtypes: int64(2), object(1)
memory usage: 1.3+ KB


In [54]:
new_census_api.describe()

Unnamed: 0,B08303_001E,state
count,52.0,52.0
mean,2759268.0,29.788462
std,3110342.0,16.774557
min,267249.0,1.0
25%,732684.2,16.75
50%,1824355.0,29.5
75%,3264259.0,42.5
max,16710200.0,72.0


In [55]:
new_census_api.describe(include='object')

Unnamed: 0,NAME
count,52
unique,52
top,Pennsylvania
freq,1
