# Heat Map Creation with Weight in Population by County Using gmaps
# Markers for the Top 10 Most Populated Counties

### Import necessary dependencies

In [35]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import gmaps
import os
import json

# Import API key
from config import gkey

# Configure gmaps with API key
gmaps.configure(api_key=gkey)

## Import Dataframe with coordinates to plot the Heat Map

In [36]:
heat_map_df = pd.read_csv('../3_coordinates/output/coordinates_pd.csv')
heat_map_df.head()

Unnamed: 0,State,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Unemployment Rate,Households with Internet,Households with Just Cell Data Plan,Households with DSL or Fiber Optic,Households with Broadband Any Type,Households with Celular Data Plan,Households with Just DSL or Fiber Optic,Households with Satellite Internet,Households with Just Satellite Internet,Households with No Internet,Lat,Lng
0,72,"Corozal Municipio, Puerto Rico",34933.0,38.1,14752.0,7887.0,19146.0,54.807775,9.435205,11026.0,2381.0,2268.0,5021.0,4088.0,706.0,361.0,161.0,5686.0,18.341062,-66.316836
1,72,"Maunabo Municipio, Puerto Rico",11297.0,43.5,17636.0,8830.0,5945.0,52.624591,6.506152,3976.0,434.0,1078.0,1577.0,987.0,512.0,77.0,37.0,2157.0,18.007189,-65.899329
2,72,"Peñuelas Municipio, Puerto Rico",21661.0,36.4,16868.0,7983.0,12379.0,57.148793,7.391164,7132.0,1361.0,2329.0,3877.0,3090.0,736.0,181.0,27.0,2972.0,18.063358,-66.72739
3,72,"Ponce Municipio, Puerto Rico",148863.0,39.5,16561.0,10775.0,75187.0,50.507514,4.711043,53341.0,5613.0,18366.0,25211.0,17691.0,6802.0,1210.0,510.0,25042.0,18.011077,-66.614062
4,72,"San Sebastián Municipio, Puerto Rico",38970.0,42.0,14275.0,8072.0,21472.0,55.098794,8.486015,14278.0,1525.0,2446.0,4743.0,2737.0,1335.0,829.0,456.0,8622.0,18.335476,-66.994679


## Heat Map Weighted in Population with Markers for the Ten Most Populated Cities

### Heat Map Weighted in Population

In [37]:
# Store 'Lat' and 'Lng' into  locations 
locations = heat_map_df[["Lat", "Lng"]].astype(float)
# Define the center of the map as the middle of all latitudes and longitudes
center_lat = (heat_map_df["Lat"].max() + heat_map_df["Lat"].min()) /2
center_lng = (heat_map_df["Lng"].max() + heat_map_df["Lng"].min()) /2
# Store the population from the dataframe into a variable
population = heat_map_df["Population"].astype(float)
# Define the max intensity of the heatmap as 2.5 times the max population
max_intensity = population.max()*2.5
# Create a population Heatmap layer
fig = gmaps.figure(center = [center_lat,center_lng], zoom_level=4)
# Create the heat layer weighted in population
heat_layer = gmaps.heatmap_layer(locations, weights=population, 
                                 dissipating=False, max_intensity = max_intensity,
                                 point_radius = 5)
# Add the heat layer to the map
fig.add_layer(heat_layer)
# Show the map
fig

Figure(layout=FigureLayout(height='420px'))

### Selection of the Top Ten Most Populated Counties

In [38]:
# Sort Counties by population in descending order
top_ten = heat_map_df.sort_values(by=['Population'], ascending=False)
# Reset index and delete the previous one
top_ten = top_ten.reset_index(drop=True)
# Store the top 10 in the dataframe
top_ten = top_ten.head(10)
# Print the dataframe
top_ten

Unnamed: 0,State,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Unemployment Rate,Households with Internet,Households with Just Cell Data Plan,Households with DSL or Fiber Optic,Households with Broadband Any Type,Households with Celular Data Plan,Households with Just DSL or Fiber Optic,Households with Satellite Internet,Households with Just Satellite Internet,Households with No Internet,Lat,Lng
0,6,"Los Angeles County, California",10105722.0,36.0,61015.0,30798.0,1688505.0,16.708405,4.021741,3295198.0,220563.0,2306545.0,2633021.0,1680314.0,865663.0,199009.0,31351.0,538359.0,34.052227,-118.24366
1,17,"Cook County, Illinois",5238541.0,36.4,59426.0,33722.0,821572.0,15.683222,4.592977,1956561.0,135811.0,1319255.0,1500290.0,986543.0,465829.0,89105.0,13312.0,359116.0,41.737659,-87.697554
2,48,"Harris County, Texas",4525519.0,33.1,57791.0,30856.0,751985.0,16.616547,3.296683,1562813.0,183317.0,999605.0,1225570.0,848627.0,345502.0,82925.0,12605.0,275630.0,29.775182,-95.31025
3,4,"Maricopa County, Arizona",4155501.0,36.0,58580.0,30186.0,644476.0,15.508984,2.98905,1489533.0,109769.0,1058044.0,1216868.0,773917.0,400895.0,94168.0,18396.0,216082.0,33.291797,-112.429146
4,6,"San Diego County, California",3283665.0,35.4,70588.0,34350.0,427031.0,13.004707,3.555722,1111739.0,62443.0,876765.0,966430.0,645331.0,293588.0,70951.0,9144.0,118026.0,32.71573,-117.161097
5,6,"Orange County, California",3155816.0,37.5,81851.0,37603.0,378459.0,11.992429,3.043238,1024976.0,65346.0,809341.0,899047.0,595764.0,281067.0,55099.0,6522.0,97075.0,33.717471,-117.831143
6,12,"Miami-Dade County, Florida",2702602.0,39.5,46338.0,25481.0,505182.0,18.69243,3.735955,858289.0,58131.0,551091.0,627883.0,342068.0,263935.0,44337.0,8004.0,193906.0,25.551603,-80.632692
7,36,"Kings County, New York",2635121.0,34.8,52782.0,29928.0,570731.0,21.658626,4.059624,944650.0,51781.0,645169.0,711897.0,411665.0,281626.0,26654.0,3888.0,200226.0,40.678178,-73.944158
8,48,"Dallas County, Texas",2552213.0,33.3,53626.0,29810.0,446611.0,17.498971,3.081287,906179.0,92944.0,571741.0,692870.0,457642.0,201750.0,41337.0,6080.0,182720.0,32.802468,-96.8351
9,6,"Riverside County, California",2355002.0,35.0,60807.0,25700.0,362215.0,15.380666,4.559784,711724.0,49560.0,515473.0,588200.0,369086.0,195954.0,45073.0,7686.0,101093.0,33.953297,-117.396119


### Add a Layer with Markers for the Ten Most Populated Counties

In [39]:
# Adding info boxes to the markers
info_box_template = """
<dl>
<dt>County</dt><dd>{Name}</dd>
<dt>Population</dt><dd>{Population}</dd>
<dt>Household with Celular Data</dt><dd>{Households with Celular Data Plan}</dd>
</dl>
"""
# Store the DataFrame Row
county_info = [info_box_template.format(**row) for index, row in top_ten.iterrows()]
locations = top_ten[["Lat", "Lng"]]

In [40]:
# Add marker layer ontop of heat map
marker_layer = gmaps.marker_layer(locations, hover_text="", label="", info_box_content=county_info, display_info_box=None)
# Add the marker layer
fig.add_layer(marker_layer)
## Show the Layer
fig

Figure(layout=FigureLayout(height='420px'))

### Cleaning Up Top 10 Most Populated Counties Dataframe

In [41]:
# Select Columns to Show
top_ten_clean = top_ten[[ 'Name', 'Population',
                'Households with Celular Data Plan',
                'Lat', 'Lng']]
# Show the Resulting Data Frame
top_ten_clean

Unnamed: 0,Name,Population,Households with Celular Data Plan,Lat,Lng
0,"Los Angeles County, California",10105722.0,1680314.0,34.052227,-118.24366
1,"Cook County, Illinois",5238541.0,986543.0,41.737659,-87.697554
2,"Harris County, Texas",4525519.0,848627.0,29.775182,-95.31025
3,"Maricopa County, Arizona",4155501.0,773917.0,33.291797,-112.429146
4,"San Diego County, California",3283665.0,645331.0,32.71573,-117.161097
5,"Orange County, California",3155816.0,595764.0,33.717471,-117.831143
6,"Miami-Dade County, Florida",2702602.0,342068.0,25.551603,-80.632692
7,"Kings County, New York",2635121.0,411665.0,40.678178,-73.944158
8,"Dallas County, Texas",2552213.0,457642.0,32.802468,-96.8351
9,"Riverside County, California",2355002.0,369086.0,33.953297,-117.396119


### Store the resulting dataframe

In [42]:
top_ten_clean.to_csv("output/top_ten_clean.csv", encoding="utf-8", index=False)