In [None]:
'''
This script fetches and processes census data from the 2010 Census API making
it ready for integration with other datasets or for direct analysis.
'''

In [2]:
# Importing necessary libraries 

import pandas as pd
import requests


In [3]:
# Defining the API endpoint and parameters
api_url = "https://api.census.gov/data/2010/dec/sf1"
variables = [
    "P003001",  # Total
    "P003002",  # White alone
    "P003003",  # Black or African American alone
    "P003004",  # American Indian and Alaska Native alone
    "P003005",  # Asian alone
    "P003006",  # Native Hawaiian and Other Pacific Islander alone
    "P003007",  # Some Other Race alone
    "P003008",  # Two or More Races
    "P004001",  # Total Hispanic or Latino origin
    "P004002",  # Not Hispanic or Latino
    "P004003",  # Hispanic or Latino
]
params = {
    "get": ",".join(variables + ["NAME"]),
    "for": "tract:*",
    "in": "county:025 state:25"
}

# Making the API request
response = requests.get(api_url, params=params)

# Checking if the request was successful
if response.status_code == 200:
    # Parsong the JSON response
    data = response.json()
    # Converting to a pandas DataFrame
    df = pd.DataFrame(data[1:], columns=data[0])
    # Displaying the DataFrame
    print(df)
    
    # Saving the DataFrame to a CSV file (optional)
    df.to_csv("census2010.csv", index=False)


    P003001 P003002 P003003 P003004 P003005 P003006 P003007 P003008 P004001  \
0      4254    2748     300       6     469       0     513     218    4254   
1      3854    2918     276       8     327       2     209     114    3854   
2      3885    2620     330      17     527       1     270     120    3885   
3      2740    2150     131       6     298       0      84      71    2740   
4      3142    2472      96       2     453       0      56      63    3142   
..      ...     ...     ...     ...     ...     ...     ...     ...     ...   
199       9       9       0       0       0       0       0       0       9   
200       0       0       0       0       0       0       0       0       0   
201       0       0       0       0       0       0       0       0       0   
202      82      73       5       0       3       0       0       1      82   
203       0       0       0       0       0       0       0       0       0   

    P004002 P004003                                

In [4]:
# Loading the recently created dataframe to examine 

census_tract_data = pd.read_csv("census2010.csv")
census_tract_data.head()

Unnamed: 0,P003001,P003002,P003003,P003004,P003005,P003006,P003007,P003008,P004001,P004002,P004003,NAME,state,county,tract
0,4254,2748,300,6,469,0,513,218,4254,3633,621,"Census Tract 1, Suffolk County, Massachusetts",25,25,100
1,3854,2918,276,8,327,2,209,114,3854,3447,407,"Census Tract 2.01, Suffolk County, Massachusetts",25,25,201
2,3885,2620,330,17,527,1,270,120,3885,3289,596,"Census Tract 2.02, Suffolk County, Massachusetts",25,25,202
3,2740,2150,131,6,298,0,84,71,2740,2550,190,"Census Tract 3.01, Suffolk County, Massachusetts",25,25,301
4,3142,2472,96,2,453,0,56,63,3142,2913,229,"Census Tract 3.02, Suffolk County, Massachusetts",25,25,302


In [5]:
# Standardizing the tract codes in the census-tract-data to 6 digits to match the tract codes in the Boston Globe dataset. 
census_tract_data['tract'] = census_tract_data['tract'].astype(str).str.zfill(6)

In [6]:
# Removing NAME,state and county columns
census_tract_data.drop(columns=['NAME', 'state', 'county'], inplace=True)

In [7]:
# Renaming columns in the dataframe for better readability and understanding.
census_tract_data.rename(columns={"P003001": "Total", 
                                  "P003002": "White alone",
                                  "P003003": "Black or African American alone", 
                                  "P003004": "American Indian and Alaska Native alone", 
                                  "P003005": "Asian alone", 
                                  "P003006": "Native Hawaiian and Other Pacific Islander alone", 
                                  "P003007": "Some Other Race alone", 
                                  "P003008": "Population of two or more races", 
                                  "P004001": "Total Hispanic or Latino origin",
                                  "P004002": "Not Hispanic or Latino",
                                  "P004003": "Hispanic or Latino"}, inplace=True)

In [13]:
df = census_tract_data[['tract']]

# Saving cleaned dataframe to a new CSV file
df.to_csv('df.csv', index=False)

In [None]:
# Saving cleaned dataframe to a new CSV file
census_tract_data.to_csv('census2010data.csv', index=False)