# JSON of Counties

The dataset parsed is from [this github](https://github.com/evangambit/JsonOfCounties)

In [100]:
import json
import csv

In [101]:
with open('counties.json') as json_file:
   jsondata = json.load(json_file)

# List of States

Each state dictionary holds several counties which holds several values themselves. 

`county_attributes.txt` will be used to only select a few of these values for the data visualization project.

In [102]:
#states
print(jsondata.keys())

# To add to headers
county_attributes = []

dict_keys(['Nebraska', 'Washington', 'New Mexico', 'South Dakota', 'Texas', 'California', 'Kentucky', 'Ohio', 'Alabama', 'Georgia', 'Wisconsin', 'Arkansas', 'Oregon', 'Pennsylvania', 'Mississippi', 'Missouri', 'Colorado', 'North Carolina', 'Utah', 'Oklahoma', 'Virginia', 'Tennessee', 'Wyoming', 'West Virginia', 'Louisiana', 'New York', 'Michigan', 'Indiana', 'Massachusetts', 'Kansas', 'Idaho', 'Florida', 'Alaska', 'Nevada', 'Illinois', 'Vermont', 'Connecticut', 'Montana', 'New Jersey', 'Minnesota', 'North Dakota', 'Maryland', 'Iowa', 'South Carolina', 'Maine', 'Hawaii', 'New Hampshire', 'Arizona', 'Delaware', 'District of Columbia', 'Rhode Island'])


# Cost of Living

The cost of living dict is broken down into 5 categories.

<b>For 1 adult and 0 children</b>

`living_wage`

`food_costs`

`medical_costs`

`housing_costs`

`tax_costs`

In [103]:
for state in jsondata.keys():
	for county in jsondata[state].keys():
		# print(jsondata[state][county]['cost-of-living'])
		for attr in jsondata[state][county]['cost-of-living']:
			county_attributes.append(attr)
		break
	break

print(county_attributes)

['living_wage', 'food_costs', 'medical_costs', 'housing_costs', 'tax_costs']


# Poverty Rate

In [104]:
for state in jsondata.keys():
	for county in jsondata[state].keys():
		print(jsondata[state][county]['poverty-rate'])
		break
	break

county_attributes.append('poverty-rate')

8.9


# Election Data

Per year:

`total`

`dem`

`gop`


### Note for Project:

I chose not to include this in my data visualization. Maybe I will consider it for the future.

In [105]:
for state in jsondata.keys():
	for county in jsondata[state].keys():
		print(jsondata[state][county]['elections'])
		break
	break

{'2008': {'total': 4087, 'dem': 1274, 'gop': 2732}, '2012': {'total': 3928, 'dem': 1015, 'gop': 2858}, '2016': {'total': 3988, 'dem': 712, 'gop': 3095}, '2020': {'total': 4451, 'dem': 870, 'gop': 3507}}


# Average Income

In [106]:
for state in jsondata.keys():
	for county in jsondata[state].keys():
		print(jsondata[state][county]['avg_income'])
		break
	break

county_attributes.append('avg_income')

58610


# Population per Year

`YYYY`

In [107]:
# By year - 2010 to 2019
for state in jsondata.keys():
	for county in jsondata[state].keys():
		print(jsondata[state][county]['population'])
		break
	break

{'2010': 9160, '2011': 9117, '2012': 9082, '2013': 9010, '2014': 9017, '2015': 9075, '2016': 8957, '2017': 8947, '2018': 8935, '2019': 8846}


# Male Population

In [108]:
# Most current male pop.
for state in jsondata.keys():
	for county in jsondata[state].keys():
		print(jsondata[state][county]['male'])
		break
	break

county_attributes.append('male')

4435


# Female Population

In [109]:
# Most current female pop.
for state in jsondata.keys():
	for county in jsondata[state].keys():
		print(jsondata[state][county]['female'])
		break
	break

county_attributes.append('female')

4411


# Race Demographics

Values are in percentages. This could be used for a pie chart visual.

We can sum male and female per race to simplify the data for the pie chart. 

We can create an `other` for races not included so we will have a total of 5 variables used instead of the several below:
<br><br>

`non_hispanic_white_alone_male`

`non_hispanic_white_alone_female`

`black_alone_male`

`black_alone_female`

`asian_alone_male`

`asian_alone_female`

`hispanic_male`

`hispanic_female`

In [110]:
for state in jsondata.keys():
	for county in jsondata[state].keys():
		print(jsondata[state][county]['race_demographics'])
		break
	break

# for headers
county_attributes.append('non_hispanic_white_alone')
county_attributes.append('black_alone')
county_attributes.append('asian_alone')
county_attributes.append('hispanic')
county_attributes.append('other')

{'non_hispanic_white_alone_male': 0.4379380510965408, 'non_hispanic_white_alone_female': 0.4378250056522722, 'black_alone_male': 0.0036174542165950713, 'black_alone_female': 0.0016956816640289396, 'asian_alone_male': 0.0013565453312231518, 'asian_alone_female': 0.0035044087723264754, 'hispanic_male': 0.05403572236038888, 'hispanic_female': 0.049287813701107845}


In [111]:
new_demographics = dict()

for state in jsondata.keys():
	new_demographics[state] = dict()

	for county in jsondata[state].keys():
			new_demographics[state][county] = dict()

			new_demographics[state][county]['non_hispanic_white_alone'] = jsondata[state][county]['race_demographics']['non_hispanic_white_alone_male'] + jsondata[state][county]['race_demographics']['non_hispanic_white_alone_female']
			new_demographics[state][county]['black_alone'] = jsondata[state][county]['race_demographics']['black_alone_male'] + jsondata[state][county]['race_demographics']['black_alone_female']
			new_demographics[state][county]['asian_alone'] = jsondata[state][county]['race_demographics']['asian_alone_male'] + jsondata[state][county]['race_demographics']['asian_alone_female']
			new_demographics[state][county]['hispanic'] = jsondata[state][county]['race_demographics']['hispanic_male'] + jsondata[state][county]['race_demographics']['hispanic_female']

			# Calculate 'other' category for race demographics
			percent_non_other = 0
			for val in new_demographics[state][county].values():
				percent_non_other += val
			
			new_demographics[state][county]['other'] = 1 - percent_non_other

			# For updating json counties copy of race demographics to the above simplified version
			jsondata[state][county]['race_demographics'] = new_demographics[state][county]

Update `county copy.json` with updated demographics.

In [112]:
with open("counties copy.json", "w") as jsonFile:
    json.dump(jsondata, jsonFile, indent=2)

# Confirmed Covid Cases

Recorded in `YYY-MM-DD` format

Simplify the data by making it monthly.

`covid-confirmed.YYYY-MM` format

In [113]:
# Can be used for slider
for state in jsondata.keys():
	for county in jsondata[state].keys():
		print(jsondata[state][county]['covid-confirmed'])
		break
	break

{'2020-05-04': 9, '2020-05-18': 17, '2020-06-01': 32, '2020-06-15': 34, '2020-06-29': 36, '2020-07-13': 38, '2020-07-27': 48, '2020-08-10': 67, '2020-08-24': 74, '2020-09-07': 95, '2020-09-21': 120, '2020-10-05': 189, '2020-10-19': 243, '2020-11-02': 329, '2020-11-16': 467, '2020-11-30': 603, '2020-12-14': 690, '2020-12-28': 786, '2021-01-11': 886, '2021-01-25': 941, '2021-02-08': 974, '2021-02-22': 984, '2021-03-08': 988, '2021-03-22': 1003, '2021-04-05': 1027, '2021-04-19': 1033, '2021-05-03': 1039, '2021-05-17': 1051, '2021-05-31': 1056, '2021-06-14': 1056, '2021-06-28': 1056, '2021-07-12': 1056, '2021-07-26': 1056, '2021-08-09': 1056, '2021-08-23': 1056}


Use `counties_updates.json` to overwrite dates into `covid-confirmed.YYYY-MM` format

In [114]:
formatted_cc_data = {}

for state in jsondata.keys():
    for county in jsondata[state].keys():
        
		# loop through county YYYY-MM-DD formats
        for date in jsondata[state][county]['covid-confirmed']:
			# sum if YYYY-MM key exists in formatted dictionary
            if 'covid-confirmed.'+date[0:7] in formatted_cc_data:
                formatted_cc_data['covid-confirmed.' + date[0:7]] += jsondata[state][county]['covid-confirmed'][date]
                jsondata[state][county]['covid-confirmed.' + date[0:7]] = formatted_cc_data['covid-confirmed.' + date[0:7]]
            else:
                # creating YYYY-MM key
                formatted_cc_data['covid-confirmed.' + date[0:7]] = jsondata[state][county]['covid-confirmed'][date]
                jsondata[state][county]['covid-confirmed.' + date[0:7]] = formatted_cc_data['covid-confirmed.' + date[0:7]]

        # delete county 'covid-confirmed' key for this particular statee and county to replace with new format
        del jsondata[state][county]['covid-confirmed']
        
        # clear county data for next county
        formatted_cc_data.clear()

# overwrite the 'counties copy.json' with the new format
with open("counties copy.json", "w") as jsonFile:
     json.dump(jsondata, jsonFile, indent=2)

Verify the new copy has the correct format `covid-confirmed.YYYY-MM`

In [115]:
with open("counties copy.json", "r") as jsonFile:
    jsoncopy = json.load(jsonFile)
    print(jsoncopy['New Jersey']['essex county']['covid-confirmed.2020-05'])

31221


# Covid Deaths

In `YYYY-MM-DD` format. Parse into `YYYY-MM` format.

Use same header as confirmed cases above if earliest/latest dates fall are the same.

In [116]:
for state in jsondata.keys():
	for county in jsondata[state].keys():
		print(jsondata[state][county]['covid-deaths']) # Should add all covid deaths for each county
		break
	break

{'2020-05-04': 0, '2020-05-18': 0, '2020-06-01': 0, '2020-06-15': 0, '2020-06-29': 0, '2020-07-13': 0, '2020-07-27': 0, '2020-08-10': 0, '2020-08-24': 0, '2020-09-07': 0, '2020-09-21': 0, '2020-10-05': 1, '2020-10-19': 2, '2020-11-02': 3, '2020-11-16': 3, '2020-11-30': 5, '2020-12-14': 8, '2020-12-28': 9, '2021-01-11': 9, '2021-01-25': 9, '2021-02-08': 11, '2021-02-22': 11, '2021-03-08': 11, '2021-03-22': 11, '2021-04-05': 11, '2021-04-19': 11, '2021-05-03': 11, '2021-05-17': 11, '2021-05-31': 15, '2021-06-14': 15, '2021-06-28': 15, '2021-07-12': 15, '2021-07-26': 15, '2021-08-09': 15, '2021-08-23': 15}


In [117]:
formatted_cc_data = {}

for state in jsondata.keys():
    for county in jsondata[state].keys():
        
		# loop through county YYYY-MM-DD formats
        for date in jsondata[state][county]['covid-deaths']:
			# sum if YYYY-MM exists in formatted dictionary
            if 'covid-deaths'+date[0:7] in formatted_cc_data:
                formatted_cc_data['covid-deaths.' + date[0:7]] += jsondata[state][county]['covid-deaths'][date]
                jsondata[state][county]['covid-deaths.' + date[0:7]] = formatted_cc_data['covid-deaths.' + date[0:7]]
            else:
                # creating 'covid-deaths.YYYY-MM'
                formatted_cc_data['covid-deaths.' + date[0:7]] = jsondata[state][county]['covid-deaths'][date]
                jsondata[state][county]['covid-deaths.' + date[0:7]] = formatted_cc_data['covid-deaths.' + date[0:7]]

        # delete this county's 'covid-deaths' key for this particular state to replace with new format
        del jsondata[state][county]['covid-deaths']

        # clear county data for next county
        formatted_cc_data.clear()

# overwrite the 'counties copy.json' with the new format
with open("counties copy.json", "w") as jsonFile:
    json.dump(jsondata, jsonFile, indent=2)

In [118]:
with open("counties copy.json", "r") as jsonFile:
    jsoncopy = json.load(jsonFile)
    print(jsoncopy['New Jersey']['essex county']['covid-deaths.2020-05'])

1546


# Number of Police Deaths

In [12]:
# Theres lots of 0s here, but it can be used maybe? Or maybe for another project?
for state in jsondata.keys():
	for county in jsondata[state].keys():
		print(jsondata[state][county]['police_deaths'])
		break
	break

0


# Fatal Police Shootings

`total-YYYY`

`unarmed-YYYY`

`firearmed-YYYY`

In [13]:
# for state in jsondata.keys():
# 	for county in jsondata[state].keys():
# 		print(jsondata[state][county]['fatal_police_shootings'])
# 		break
# 	break

{'total-2017': 0, 'unarmed-2017': 0, 'firearmed-2017': 0, 'total-2018': 0, 'unarmed-2018': 0, 'firearmed-2018': 0, 'total-2019': 0, 'unarmed-2019': 0, 'firearmed-2019': 0, 'total-2020': 0, 'unarmed-2020': 0, 'firearmed-2020': 0}


# Number of Police

In [16]:
# for state in jsondata.keys():
# 	for county in jsondata[state].keys():
# 		print(jsondata[state][county]['num_police'])
# 		break
# 	break

# Bureau of Labor Statistics (BLS)

{ `YYYY` : { `labor_force`, `employed`, `unemployed`} }

In [17]:
# This would be great to find out the unemployment trends up until 2020

# for state in jsondata.keys():
# 	for county in jsondata[state].keys():
# 		print(jsondata[state][county]['bls'])
# 		break
# 	break

{'2004': {'labor_force': 5538.0, 'employed': 5378.0, 'unemployed': 160.0}, '2008': {'labor_force': 4938.0, 'employed': 4794.0, 'unemployed': 144.0}, '2012': {'labor_force': 4915.0, 'employed': 4763.0, 'unemployed': 152.0}, '2016': {'labor_force': 4711.0, 'employed': 4575.0, 'unemployed': 136.0}, '2020': {'labor_force': 4830.0, 'employed': 4708.0, 'unemployed': 122.0}}


# Number of Deaths

`suicides`

`firearm suicides`

`homicides`

`vehicle`

In [18]:
# suicides, firearm suicides, homicides, vehicle
for state in jsondata.keys():
	for county in jsondata[state].keys():
		print(jsondata[state][county]['deaths'])
		break
	break

{'suicides': None, 'firearm suicides': None, 'homicides': None, 'vehicle': 13}


# Age Demographics

Ages are grouped up into ranges. Values are percentages

`0-4`

`5-9`

`10-14`

`15-19`

`20-24`

`25-29`

`30-34`

`35-39`

`40-44`

`45-49`

`50-54`

`55-59`

`60-64`

`65-69`

`70-74`

`75-79`

`80-84`

`85+`

In [22]:
# Ages are grouped by several range of values

# for state in jsondata.keys():
# 	for county in jsondata[state].keys():
# 		print(jsondata[state][county]['age_demographics'])
# 		break
# 	break

{'0-4': 0.06647072122993443, '5-9': 0.06307935790187655, '10-14': 0.06940990278091792, '15-19': 0.06703594845127742, '20-24': 0.05527922224734343, '25-29': 0.05188785891928555, '30-34': 0.0505313135880624, '35-39': 0.0561835858014922, '40-44': 0.047139950260004525, '45-49': 0.047592132037078905, '50-54': 0.0588966764639385, '55-59': 0.07200994799909563, '60-64': 0.07087949355640967, '65-69': 0.056861858467103774, '70-74': 0.05007913181098802, '75-79': 0.03979199638254578, '80-84': 0.03199186072801266, '85+': 0.044879041374632604}


## Additional Notes:

### There are <strong>several</strong> more variables of data that can be used. These are just a few that can be used for the Data Visualization project.

# Building Header for CSV file

In [119]:
# initial header
header = ['state', 'county']

# add other headers by using attributes found above
header.extend(county_attributes)


print(header)

['state', 'county', 'living_wage', 'food_costs', 'medical_costs', 'housing_costs', 'tax_costs', 'poverty-rate', 'avg_income', 'male', 'female', 'non_hispanic_white_alone', 'black_alone', 'asian_alone', 'hispanic', 'other']


In [124]:
with open('counties copy.json') as f:
    data = json.load(f)

for state in data:
    for county in data[state]:
        for attr in data[state][county].keys():
            if 'covid-deaths' in attr or 'covid-confirmed' in attr:
                # print("{}: {}".format(attr, jsondata[state][county][attr]))
                header.append(attr)
        break
    break

print(header)

['state', 'county', 'living_wage', 'food_costs', 'medical_costs', 'housing_costs', 'tax_costs', 'poverty-rate', 'avg_income', 'male', 'female', 'non_hispanic_white_alone', 'black_alone', 'asian_alone', 'hispanic', 'other', 'covid-confirmed.2020-05', 'covid-confirmed.2020-06', 'covid-confirmed.2020-07', 'covid-confirmed.2020-08', 'covid-confirmed.2020-09', 'covid-confirmed.2020-10', 'covid-confirmed.2020-11', 'covid-confirmed.2020-12', 'covid-confirmed.2021-01', 'covid-confirmed.2021-02', 'covid-confirmed.2021-03', 'covid-confirmed.2021-04', 'covid-confirmed.2021-05', 'covid-confirmed.2021-06', 'covid-confirmed.2021-07', 'covid-confirmed.2021-08', 'covid-deaths.2020-05', 'covid-deaths.2020-06', 'covid-deaths.2020-07', 'covid-deaths.2020-08', 'covid-deaths.2020-09', 'covid-deaths.2020-10', 'covid-deaths.2020-11', 'covid-deaths.2020-12', 'covid-deaths.2021-01', 'covid-deaths.2021-02', 'covid-deaths.2021-03', 'covid-deaths.2021-04', 'covid-deaths.2021-05', 'covid-deaths.2021-06', 'covid-de

# Inserting Headers and Values into csv

In [None]:
data_file = open('counties.csv', 'w', newline='')
csv_writer = csv.writer(data_file)

