### Import Libraries



- To run this script we need the libraries below
- Pandas : For creating DataFrames
- Numpy:  For computations 
- OS : For modifying working directories 
- censusdata: This is a library for accessing US Census Bureau, we use some of its function to preview variables *
- Census: Main library for accessing the US Census Bureau API *
- pd.set_option allows us to view our entire output instead of it being truncated in jupyter notebook



 * You need to install these packages before you can use. Census = pip install census | censusdata = pip install CensusData

In [1]:
import pandas as pd
import numpy as np
import os
import censusdata as cs
from census import Census
import warnings

pd.set_option('display.max_rows',None, 'display.max_columns',None,'display.max_colwidth', None)

warnings.filterwarnings("ignore", message=".*DataFrame is highly fragmented.*")

In [2]:
pwd

'C:\\Users\\Thepr\\Downloads'

##### Pass your API Key

- Go to this [website](https://api.census.gov/data/key_signup.html 'sign up for your personal API key') and sign up your personal API key 

- Once you get your API key, call the Census function and pass your API key. Eg. Census("API Key") 

In [3]:
c = Census("e0577a26a616f4dda60446eae987e3b6d0d944a3")

#### Preview ACS variable 

In [4]:
cs.printtable(cs.censustable('acs5',2019,'C15002A'))

Variable     | Table                          | Label                                                    | Type 
-------------------------------------------------------------------------------------------------------------------
C15002A_001E | SEX BY EDUCATIONAL ATTAINMENT  | !! Estimate Total:                                       | int  
C15002A_002E | SEX BY EDUCATIONAL ATTAINMENT  | !! !! Estimate Total: Male:                              | int  
C15002A_003E | SEX BY EDUCATIONAL ATTAINMENT  | !! !! !! Estimate Total: Male: Less than high school dip | int  
C15002A_004E | SEX BY EDUCATIONAL ATTAINMENT  | !! !! !! Estimate Total: Male: High school graduate (inc | int  
C15002A_005E | SEX BY EDUCATIONAL ATTAINMENT  | !! !! !! Estimate Total: Male: Some college or associate | int  
C15002A_006E | SEX BY EDUCATIONAL ATTAINMENT  | !! !! !! Estimate Total: Male: Bachelor's degree or high | int  
C15002A_007E | SEX BY EDUCATIONAL ATTAINMENT  | !! !! Estimate Total: Female:                

## Set Geographies
- Arlington
- Tarrant County
- Dallas Fort Worth
- Texas

In [5]:

geographies = {
    'Arlington, TX': {'for': 'place:04000', 'in': 'state:48'},  
    'Tarrant County, TX': {'for': 'county:439', 'in': 'state:48'},
    'Dallas-Fort Worth-Arlington MSA': {'for': 'metropolitan statistical area/micropolitan statistical area:19100'},
    'Texas State': {'for': 'state:48'}
}


## Get Data 

- **ACS 5-Year Estimate**

In [6]:
AllData = []
Years = [2017, 2022]

# Race 
Ethnicity = {'White alone': 'C15002A','Black or African American alone': 'C15002B',
             'American Indian and Alaska Native alone': 'C15002C','Asian alone': 'C15002D',
             'Native Hawaiian and Other Pacific Islander alone': 'C15002E',
             'Some other race alone': 'C15002F','Two or more races': 'C15002G',
             'White alone, not Hispanic or Latino': 'C15002H','Hispanic or Latino': 'C15002I'}

for year in Years:
    
    for geo in geographies:
        
        juris = geographies[geo]
        
        for race in Ethnicity:
            racialcategory = Ethnicity[race]
          
            highschool = [ f'{racialcategory}_004E', f'{racialcategory}_005E', f'{racialcategory}_006E',
                          f'{racialcategory}_009E', f'{racialcategory}_010E', f'{racialcategory}_011E']
            
            bachelors = [ f'{racialcategory}_006E',f'{racialcategory}_011E']

            fields = ['NAME'] + highschool + bachelors

            # Get data
            
            ACS = c.acs5.get(fields, juris, year=year)[0]

            # Compute totals
            
            highschooltotal = sum(int(ACS[row] or 0) for row in highschool)
            bachelorstotal  = sum(int(ACS[row] or 0) for row in bachelors)

            # Append to master list
            
            AllData.append({'Year': year,'Geography': geo,'Race': race,'Metric': 'High School or Higher',
                            'Value': highschooltotal})

            AllData.append({'Year': year,'Geography': geo,'Race': race,'Metric': "Bachelor's or Higher",
                            'Value': bachelorstotal})


Data = pd.DataFrame(AllData)




Datapivot = Data.pivot_table(index=['Geography', 'Race', 'Metric'],columns='Year',values='Value').reset_index()

Datapivot.columns.name = None
Datapivot.rename(columns={2017: '2017', 2022: '2022'}, inplace=True)


Datapivot['Change'] = Datapivot['2022'] - Datapivot['2017']
Datapivot['Percent Change'] = (((Datapivot['2022'] - Datapivot['2017'])
                                / Datapivot['2017']) * 100).round().astype(int).astype(str) + '%'



for col in ['2017', '2022', 'Change']:
    Datapivot[col] = Datapivot[col].apply(lambda x: f"{x:,}" if pd.notnull(x) else "")


Datapivot['Eduction Attainment'] = Datapivot['Metric'].apply(lambda x: '    ' + x)

# Final table
FinalData = Datapivot[['Geography', 'Race', 'Eduction Attainment', '2017', '2022', 'Change', 'Percent Change']]



# Display result
FinalData.to_csv('Arlington Education Attainment by Race ACS 5-Year Estimates.csv', index = False)




- **Summary by Geography**

In [7]:
Bachelors = Data[Data['Metric'] == "Bachelor's or Higher"]


DataBachelors = Bachelors.groupby(['Geography', 'Year'])['Value'].sum().reset_index()


BachcelorPivot = DataBachelors.pivot(index='Geography', columns='Year', values='Value').reset_index()

BachcelorPivot.columns.name = None
BachcelorPivot.rename(columns={2017: '2017', 2022: '2022'}, inplace=True)


BachcelorPivot['Change'] = BachcelorPivot['2022'] - BachcelorPivot['2017']
BachcelorPivot['Percent Change'] = ((BachcelorPivot['Change'] /
                                     BachcelorPivot['2017']) * 100).round().astype(int).astype(str) + '%'


for col in ['2017', '2022', 'Change']:
    BachcelorPivot[col] = BachcelorPivot[col].apply(lambda x: f"{x:,}" if pd.notnull(x) else "")


BachcelorPivot.rename(columns={'Geography': 'Geography Name'}, inplace=True)


BachcelorPivot.to_csv('Education Attainment Bachelor Degree or Higher by Geography.csv', index=False)
