# Import necessary libraries

In [1]:
import pandas as pd
import numpy as np
import plotly as py
import plotly.express as px
import json
from urllib.request import urlopen
import wget

# Load Dataset


In [2]:
df = pd.read_csv("sorted_health_factors.csv", dtype={"FIPS":str, "County": str})
    
############# Or ##############
# We can also load the dataset as shown below even though it's not necessary:-
    
# with open("sorted_health_factors.csv", "r") as file:
#     df = pd.read_csv(file, dtype={"FIPS":str})

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 102 entries, 0 to 101
Data columns (total 12 columns):
 #   Column                                       Non-Null Count  Dtype 
---  ------                                       --------------  ----- 
 0   FIPS                                         102 non-null    object
 1   State                                        102 non-null    object
 2   County                                       102 non-null    object
 3   Infant Motality Rate                         102 non-null    int64 
 4   % Limited Access to Healthy Foods            102 non-null    int64 
 5   % Disconnected Youth                         102 non-null    int64 
 6   Average Grade Performance in Reading Scores  102 non-null    int64 
 7   Average Grade Performance in Math Scores     102 non-null    int64 
 8   % Enrolled in Free or Reduced Lunch          102 non-null    int64 
 9   Juvenile Arrest Rate                         102 non-null    int64 
 10  % Broadband Ac

# Create an Intensity column to conve the County Ranks to percentage

In [3]:
df["Intensity (%)"] = ((df["County Rank"]/np.max(df["County Rank"]))*100).round().astype(int)
df

Unnamed: 0,FIPS,State,County,Infant Motality Rate,% Limited Access to Healthy Foods,% Disconnected Youth,Average Grade Performance in Reading Scores,Average Grade Performance in Math Scores,% Enrolled in Free or Reduced Lunch,Juvenile Arrest Rate,% Broadband Acess,County Rank,Intensity (%)
0,17043,Illinois,DuPage,4,3,3,3,3,27,6,92,1,1
1,17111,Illinois,McHenry,4,6,5,3,3,27,5,94,2,2
2,17097,Illinois,Lake,4,6,5,3,3,32,8,92,3,3
3,17089,Illinois,Kane,5,4,5,3,3,42,5,89,4,4
4,17195,Illinois,Whiteside,5,6,6,3,3,50,7,82,5,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,17115,Illinois,Macon,9,8,10,3,2,55,16,81,98,96
98,17081,Illinois,Jefferson,9,10,5,3,3,58,12,82,99,97
99,17143,Illinois,Peoria,9,13,8,3,3,54,25,82,100,98
100,17183,Illinois,Vermilion,10,8,10,3,3,62,15,79,101,99


In [4]:
df.corr()

Unnamed: 0,Infant Motality Rate,% Limited Access to Healthy Foods,% Disconnected Youth,Average Grade Performance in Reading Scores,Average Grade Performance in Math Scores,% Enrolled in Free or Reduced Lunch,Juvenile Arrest Rate,% Broadband Acess,County Rank,Intensity (%)
Infant Motality Rate,1.0,0.11343,0.435813,-0.044119,-0.114441,0.386013,0.214534,-0.317663,0.745778,0.745702
% Limited Access to Healthy Foods,0.11343,1.0,-0.035142,-0.50708,-0.21653,0.348197,-0.114825,-0.321319,0.570327,0.570059
% Disconnected Youth,0.435813,-0.035142,1.0,0.009162,-0.03785,0.192286,0.028279,-0.302133,0.337296,0.338084
Average Grade Performance in Reading Scores,-0.044119,-0.50708,0.009162,1.0,0.473288,-0.464382,0.135569,0.285122,-0.260784,-0.259776
Average Grade Performance in Math Scores,-0.114441,-0.21653,-0.03785,0.473288,1.0,-0.444719,0.019018,0.310707,-0.214398,-0.215225
% Enrolled in Free or Reduced Lunch,0.386013,0.348197,0.192286,-0.464382,-0.444719,1.0,-0.013788,-0.632919,0.406541,0.405038
Juvenile Arrest Rate,0.214534,-0.114825,0.028279,0.135569,0.019018,-0.013788,1.0,0.102537,0.078513,0.077012
% Broadband Acess,-0.317663,-0.321319,-0.302133,0.285122,0.310707,-0.632919,0.102537,1.0,-0.344857,-0.343388
County Rank,0.745778,0.570327,0.337296,-0.260784,-0.214398,0.406541,0.078513,-0.344857,1.0,0.999953
Intensity (%),0.745702,0.570059,0.338084,-0.259776,-0.215225,0.405038,0.077012,-0.343388,0.999953,1.0


# Download the GeoJSON file of Illinois Counties

**GeoJSON is an open standard format designed for representing simple geographical features, along with their non-spatial attributes. It is based on the JSON format**.

**_Ref. Wikipedia_**

In [5]:
# url = "https://raw.githubusercontent.com/codeforgermany/click_that_hood/main/public/data/illinois-counties.geojson"
# wget.download(url)

# Load the Illinois Counties GeoJSON file

In [6]:
with open("illinois-counties.geojson", mode="r") as infile:
    illinois = json.load(infile)

############# Or ###############
# We can load the GeoJSON file directly from the url without downloding it into our local computer as shown below:-

# with urlopen(url) as infile:
#     illinois = json.load(infile)

illinois

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'geometry': {'type': 'MultiPolygon',
    'coordinates': [[[[-90.301945, 39.520635],
       [-90.283193, 39.52053],
       [-90.264517, 39.520269],
       [-90.247067, 39.520241],
       [-90.245748, 39.520191],
       [-90.228083, 39.520462],
       [-90.226581, 39.520295],
       [-90.20938, 39.520348],
       [-90.20758, 39.520295],
       [-90.190861, 39.520375],
       [-90.189374, 39.520296],
       [-90.172049, 39.520341],
       [-90.171438, 39.520346],
       [-90.153534, 39.520526],
       [-90.153532, 39.52025],
       [-90.153362, 39.506621],
       [-90.153368, 39.50607],
       [-90.153573, 39.491709],
       [-90.153013, 39.477446],
       [-90.152696, 39.46284],
       [-90.152287, 39.448344],
       [-90.152096, 39.433866],
       [-90.151539, 39.419311],
       [-90.151535, 39.418791],
       [-90.151406, 39.404688],
       [-90.151396, 39.404385],
       [-90.150918, 39.390065],
       [-90.150657, 39.

In [7]:
illinois["features"][0].keys()

dict_keys(['type', 'geometry', 'properties'])

**Observe that there is no `id keys` in the feature. Because the `feature id keys` will be used to link the dataset to the GeoJSON file, we have to create them. Then, we create a dictionary with `county names` as keys and the `feature id keys` as values. This dictionary will be used to map the GeoJSON file to the dataset**.

# 1. Create an id key in the GeoJSON file  
# 2. Store the names of the Illinois Counties in the GeoJSON file into a dictionary

In [8]:
county_id_map = {}
for feature in illinois["features"]:
    feature["id"] = feature["properties"]["cartodb_id"]
    county_id_map[feature["properties"]["name"]] = feature["id"]
    
# An alternative way to create the id key in the geojson file:-
# for k in range(1, len(illinois["features"])-1, 1):
#     illinois["features"][k]["id"] = k

illinois["features"][0].keys()

dict_keys(['type', 'geometry', 'properties', 'id'])

**Observe now that there is feature id key**

# Store the names of the Illinois Counties in the GeoJSON file into a list

In [9]:
mylist_geojson = []
for feature in illinois["features"]:
    mylist_geojson.append(feature["properties"]["name"])

############## Or ##################
# We can do this directly from the mylist_geojson dictionary already created as shown below:-

# mylist_geojson = []
# for i in county_id_map.keys():
#     mylist_geojson.append(i)

mylist_geojson

['Greene',
 'Clinton',
 'Tazewell',
 'Fulton',
 'Adams',
 'Bureau',
 'Henry',
 'Mercer',
 'Brown',
 'Lake',
 'Gallatin',
 'Johnson',
 'Union',
 'Randolph',
 'Perry',
 'Franklin',
 'Calhoun',
 'Clay',
 'Jersey',
 'Hardin',
 'Christian',
 'Piatt',
 'Cook',
 'Rock Island',
 'Jackson',
 'Pope',
 'Henderson',
 'Carroll',
 'Madison',
 'St. Clair',
 'Monroe',
 'Mason',
 'Hancock',
 'Crawford',
 'Whiteside',
 'Pike',
 'Cass',
 'Wabash',
 'Edwards',
 'Washington',
 'Richland',
 'Jo Daviess',
 'White',
 'Pulaski',
 'Alexander',
 'Peoria',
 'Menard',
 'Schuyler',
 'Ogle',
 'Will',
 'Mclean',
 'Saline',
 'Cumberland',
 'Champaign',
 'Mchenry',
 'Lasalle',
 'Putnam',
 'Iroquois',
 'Kankakee',
 'Ford',
 'Lawrence',
 'Dekalb',
 'Woodford',
 'Edgar',
 'Vermilion',
 'Grundy',
 'Massac',
 'Macon',
 'Morgan',
 'Sangamon',
 'Logan',
 'Shelby',
 'Scott',
 'Montgomery',
 'Coles',
 'Macoupin',
 'Boone',
 'Fayette',
 'Clark',
 'Wayne',
 'Marshall',
 'Livingston',
 'Jasper',
 'Knox',
 'Moultrie',
 'Stark',
 'B

# Store the names of the Illinois Counties in the dataset into a list

In [10]:
mylist_df = []
for i in df["County"]:
    mylist_df.append(i.title())

# Compare the names of the counties from the GeoJSON and the dataframe
**Observe that `Dewitt` in the JSON file is spelled `De Wittt` in the dataframe everthing is uniform**

In [11]:
mylist_df.sort()
mylist_geojson.sort()

print(mylist_df)
print("")
print(mylist_geojson)

['Adams', 'Alexander', 'Bond', 'Boone', 'Brown', 'Bureau', 'Calhoun', 'Carroll', 'Cass', 'Champaign', 'Christian', 'Clark', 'Clay', 'Clinton', 'Coles', 'Cook', 'Crawford', 'Cumberland', 'De Witt', 'Dekalb', 'Douglas', 'Dupage', 'Edgar', 'Edwards', 'Effingham', 'Fayette', 'Ford', 'Franklin', 'Fulton', 'Gallatin', 'Greene', 'Grundy', 'Hamilton', 'Hancock', 'Hardin', 'Henderson', 'Henry', 'Iroquois', 'Jackson', 'Jasper', 'Jefferson', 'Jersey', 'Jo Daviess', 'Johnson', 'Kane', 'Kankakee', 'Kendall', 'Knox', 'Lake', 'Lasalle', 'Lawrence', 'Lee', 'Livingston', 'Logan', 'Macon', 'Macoupin', 'Madison', 'Marion', 'Marshall', 'Mason', 'Massac', 'Mcdonough', 'Mchenry', 'Mclean', 'Menard', 'Mercer', 'Monroe', 'Montgomery', 'Morgan', 'Moultrie', 'Ogle', 'Peoria', 'Perry', 'Piatt', 'Pike', 'Pope', 'Pulaski', 'Putnam', 'Randolph', 'Richland', 'Rock Island', 'Saline', 'Sangamon', 'Schuyler', 'Scott', 'Shelby', 'St. Clair', 'Stark', 'Stephenson', 'Tazewell', 'Union', 'Vermilion', 'Wabash', 'Warren', 'W

# Change `De Witt` to `Dewitt` in the dataframe

In [12]:
df["County"] = np.char.title(df["County"].to_numpy(dtype=str))  # to make each county name start with capital letter    
df["County"].mask(df["County"]=="De Witt", "Dewitt", axis=0, inplace=True)

################# Or ####################
# This is an alternative way to impliment the change
# for i in range(len(mylist_df)):
#     if mylist_df[i] == "De Witt":
#         mylist_df[i] = "Dewitt"
# df["County"] = mylist_df

# Compare the names of the counties from the GeoJSON and the dataframe again
**Observe now that everthing is uniform**

In [13]:
mylist_df.sort()
mylist_geojson.sort()

print(mylist_df)
print("")
print(mylist_geojson)

['Adams', 'Alexander', 'Bond', 'Boone', 'Brown', 'Bureau', 'Calhoun', 'Carroll', 'Cass', 'Champaign', 'Christian', 'Clark', 'Clay', 'Clinton', 'Coles', 'Cook', 'Crawford', 'Cumberland', 'De Witt', 'Dekalb', 'Douglas', 'Dupage', 'Edgar', 'Edwards', 'Effingham', 'Fayette', 'Ford', 'Franklin', 'Fulton', 'Gallatin', 'Greene', 'Grundy', 'Hamilton', 'Hancock', 'Hardin', 'Henderson', 'Henry', 'Iroquois', 'Jackson', 'Jasper', 'Jefferson', 'Jersey', 'Jo Daviess', 'Johnson', 'Kane', 'Kankakee', 'Kendall', 'Knox', 'Lake', 'Lasalle', 'Lawrence', 'Lee', 'Livingston', 'Logan', 'Macon', 'Macoupin', 'Madison', 'Marion', 'Marshall', 'Mason', 'Massac', 'Mcdonough', 'Mchenry', 'Mclean', 'Menard', 'Mercer', 'Monroe', 'Montgomery', 'Morgan', 'Moultrie', 'Ogle', 'Peoria', 'Perry', 'Piatt', 'Pike', 'Pope', 'Pulaski', 'Putnam', 'Randolph', 'Richland', 'Rock Island', 'Saline', 'Sangamon', 'Schuyler', 'Scott', 'Shelby', 'St. Clair', 'Stark', 'Stephenson', 'Tazewell', 'Union', 'Vermilion', 'Wabash', 'Warren', 'W

# Create an id column in the dataset to link the GeoJSON file to the dataset using the `feature id keys` of the GeoJSON file

In [14]:
df["id"] = df["County"].apply(lambda x: county_id_map[x])
df

Unnamed: 0,FIPS,State,County,Infant Motality Rate,% Limited Access to Healthy Foods,% Disconnected Youth,Average Grade Performance in Reading Scores,Average Grade Performance in Math Scores,% Enrolled in Free or Reduced Lunch,Juvenile Arrest Rate,% Broadband Acess,County Rank,Intensity (%),id
0,17043,Illinois,Dupage,4,3,3,3,3,27,6,92,1,1,72
1,17111,Illinois,Mchenry,4,6,5,3,3,27,5,94,2,2,19
2,17097,Illinois,Lake,4,6,5,3,3,32,8,92,3,3,16
3,17089,Illinois,Kane,5,4,5,3,3,42,5,89,4,4,76
4,17195,Illinois,Whiteside,5,6,6,3,3,50,7,82,5,5,73
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,17115,Illinois,Macon,9,8,10,3,2,55,16,81,98,96,38
98,17081,Illinois,Jefferson,9,10,5,3,3,58,12,82,99,97,101
99,17143,Illinois,Peoria,9,13,8,3,3,54,25,82,100,98,25
100,17183,Illinois,Vermilion,10,8,10,3,3,62,15,79,101,99,37


# Make the Distribution plot
### Using choropleth_mapbox

In [15]:
fig = px.choropleth_mapbox(df, 
                           title="Distribution of Health Related factors of Illinois Counties Infant School Children",
                           geojson=illinois, 
                           featureidkey="id",
                           locations='id', 
                           color="Intensity (%)",
                           hover_data=[
                               "Infant Motality Rate",  
                               "Juvenile Arrest Rate",
                               "% Limited Access to Healthy Foods"],
                           hover_name="County",
                           mapbox_style="carto-positron",
                           zoom=6.06, 
                           center={"lat": 39.8, "lon": -89},
                           opacity=0.8,
                           color_continuous_scale="Earth",
                           color_continuous_midpoint=50,
                           labels={
                               "Infant Motality Rate": "Infant Motality Rate(%)",
                               "Juvenile Arrest Rate": "Juvenile Arrest Rate (%)",
                               "% Limited Access to Healthy Foods": "Limited Access to Healthy Foods (%)"},
                          )

fig.update_layout(margin={"r":0, "t":0, "l":0, "b":0})
fig.write_html("Distribution of Health Related factors2.html", auto_open=True)