# Data Analysis of Data Analysts

Project 1

In [1]:
# Dependencies and Setup
import os
import csv
import pandas as pd
from pathlib import Path
from matplotlib import pyplot as plt
import matplotlib.pyplot as plt
from scipy.stats import sem
from scipy.stats import linregress
import scipy.stats as st
import numpy as np
import requests
import json

#Import Heatmap Library
import hvplot.pandas
import folium
from folium.plugins import HeatMap
import cartopy.crs as ccrs

# Turn off warning messages
import warnings
warnings.filterwarnings("ignore")

#Import the API key
from census import Census
from config import geoapify_key
from config import acct_id
from config import api_key

# File to Load (Remember to Change These)
#  = Path("../Resources/schools_complete.csv")
#  = Path("../Resources/students_complete.csv")

# Read School and Student Data File and store into Pandas DataFrames
#  = pd.read_csv(school_data_to_load)
#  = pd.read_csv(student_data_to_load)

# Combine the data into a single dataset.  

In [2]:
# Load the state data into a DataFrame
state_data_df = pd.read_csv("state_data.csv")
state_data_df[['Latitude', 'Longitude']] = ""

# Display state data
state_data_df

Unnamed: 0,State,Full State Name,Estimated Jobs,Projected Jobs,Median Salary (State),Median Salary (National),Latitude,Longitude
0,AL,Alabama,220,280,85870,103500,,
1,AK,Arkansas,90,90,100040,103500,,
2,AZ,Arizona,1440,2170,102770,103500,,
3,CA,California,18400,26700,126810,103500,,
4,CO,Colorado,740,900,103200,103500,,
5,CT,Connecticut,860,1100,105700,103500,,
6,FL,Florida,2550,3900,100520,103500,,
7,GA,Georgia,2250,3030,100400,103500,,
8,HI,Hawaii,50,60,112330,103500,,
9,ID,Idaho,0,0,96490,103500,,


In [3]:
target_states = state_data_df["Full State Name"].unique()
target_states

# Info for State Dataframe
state_lat_lon = []
for state in target_states:
    # URL defined as State = & United States of America Change
    state_url = f"https://api.geoapify.com/v1/geocode/search?state={state}&format=json&apiKey={geoapify_key}&country=United%20States%20of%20America"

    response = requests.get(state_url).json()

    # Extract latitude and longitude
    lat = response["results"][0]["lat"]
    lon = response["results"][0]["lon"]

    # Print the latitude and longitude
    print('''
        State: {0}
        Latitude: {1}
        Longitude: {2}
        '''.format(state, lat, lon))
    state_lat_lon.append({
        "Full State Name": state,
        "Latitude": lat,
        "Longitude": lon
    })
    
state_lat_lon_df = pd.DataFrame(state_lat_lon)
state_lat_lon_df


        State: Alabama
        Latitude: 33.2588817
        Longitude: -86.8295337
        

        State: Arkansas
        Latitude: 35.2048883
        Longitude: -92.4479108
        

        State: Arizona
        Latitude: 34.395342
        Longitude: -111.763275
        

        State: California
        Latitude: 36.7014631
        Longitude: -118.755997
        

        State: Colorado
        Latitude: 38.7251776
        Longitude: -105.607716
        

        State: Connecticut
        Latitude: 41.6500201
        Longitude: -72.7342163
        

        State: Florida
        Latitude: 28.5421109
        Longitude: -81.3790304
        

        State: Georgia
        Latitude: 32.3293809
        Longitude: -83.1137366
        

        State: Hawaii
        Latitude: 19.593801499999998
        Longitude: -155.42837009716908
        

        State: Idaho
        Latitude: 43.6447642
        Longitude: -114.015407
        

        State: Illinois
        Latitude: 40.079

Unnamed: 0,Full State Name,Latitude,Longitude
0,Alabama,33.258882,-86.829534
1,Arkansas,35.204888,-92.447911
2,Arizona,34.395342,-111.763275
3,California,36.701463,-118.755997
4,Colorado,38.725178,-105.607716
5,Connecticut,41.65002,-72.734216
6,Florida,28.542111,-81.37903
7,Georgia,32.329381,-83.113737
8,Hawaii,19.593801,-155.42837
9,Idaho,43.644764,-114.015407


In [4]:
state_data_df["Latitude"] = state_lat_lon_df["Latitude"]
state_data_df["Longitude"] = state_lat_lon_df["Longitude"]
state_data_df

Unnamed: 0,State,Full State Name,Estimated Jobs,Projected Jobs,Median Salary (State),Median Salary (National),Latitude,Longitude
0,AL,Alabama,220,280,85870,103500,33.258882,-86.829534
1,AK,Arkansas,90,90,100040,103500,35.204888,-92.447911
2,AZ,Arizona,1440,2170,102770,103500,34.395342,-111.763275
3,CA,California,18400,26700,126810,103500,36.701463,-118.755997
4,CO,Colorado,740,900,103200,103500,38.725178,-105.607716
5,CT,Connecticut,860,1100,105700,103500,41.65002,-72.734216
6,FL,Florida,2550,3900,100520,103500,28.542111,-81.37903
7,GA,Georgia,2250,3030,100400,103500,32.329381,-83.113737
8,HI,Hawaii,50,60,112330,103500,19.593801,-155.42837
9,ID,Idaho,0,0,96490,103500,43.644764,-114.015407


In [33]:
# DataFrame 'state_coordinates' containing latitude, longitude, and Population data

# Create a base map
heatmap_plot = folium.Map(location=[state_data_df['Latitude'].mean(), state_data_df['Longitude'].mean()], zoom_start=4)

# Create a list of heat points (latitude, longitude, weight/population)
heat_data = [[row['Latitude'], row['Longitude'], row['Median Salary (State)']] for _, row in state_data_df.iterrows()]

# Plot heatmap using Folium's HeatMap function
HeatMap(heat_data, radius=20).add_to(heatmap_plot)

# Save the map as an HTML file
output_file = "heatmap_with_colorbar.html"
heatmap_plot.save(output_file)

# Create a legend using HTML and CSS
legend_html = '''
<div style="position: fixed;
             bottom: 50px; left: 50px; width: 130px; height: 140px; 
             background-color: rgba(255, 255, 255, 0.7);
             z-index: 9999; font-size: 14px; border-radius: 5px;
             border: 1px solid grey; padding: 10px;">
    <strong>Color Legend</strong><br>
    <div style="background: linear-gradient(to right, red, yellow, green, blue, indigo); 
                width: 80px; height: 100%; transform: rotate(270deg);"></div>
    <div style="position: absolute; bottom: 10px; right: -40px;">High</div>
    <div style="position: absolute; top: 10px; right: -40px;">Low</div>
</div>
'''

# Add the legend to the map HTML file
with open(output_file, 'r') as f:
    map_html = f.read()

with open(output_file, 'w') as f:
    map_html = map_html.replace('</body>', legend_html + '</body>')
    f.write(map_html)
    
# Display the map in the notebook
from IPython.display import IFrame

IFrame(output_file, width='100%', height=900)

In [None]:
<div style="position: fixed;
             bottom: 50px; left: 50px; width: 130px; height: 140px; 
             background-color: rgba(255, 255, 255, 0.7);
             z-index: 9999; font-size: 14px; border-radius: 5px;
             border: 1px solid grey; padding: 10px;">
    <strong>Color Legend</strong><br>
    <div style="background: linear-gradient(to right, red, yellow, green, blue, indigo); 
                width: 80px; height: 100%; transform: rotate(270deg);"></div>
    <small>Low</small> <small style="float: right;">High</small>
</div>

In [None]:
<div style="position: fixed;
             bottom: 50px; left: 50px; width: 120px; height: 150px; 
             background-color: rgba(255, 255, 255, 0.7);
             z-index:9999; font-size:14px; border-radius:5px;
             border:1px solid grey; padding: 10px;">
    <strong>Color Legend</strong><br>
    <div style="background: linear-gradient(to bottom, red, yellow, green); 
                width: 100%; height: 80px;"></div>
    <small>Low</small> <small style="float:right;">High</small>
</div>

In [None]:
<div style="position: fixed;
             bottom: 50px; left: 50px; width: 150px; height: 120px; 
             background-color: rgba(255, 255, 255, 0.7);
             z-index: 9999; font-size: 14px; border-radius: 5px;
             border: 1px solid grey; padding: 10px;">
    <strong>Color Legend</strong><br>
    <div style="background: linear-gradient(to right, red, yellow, green); 
                width: 80px; height: 100%; transform: rotate(270deg);"></div>
    <small>Low</small> <small style="float: right;">High</small>
</div>

In [159]:
city_state = {
    'City': ["Atlanta", "Austin", "San Francisco", "New York City", "San Diego", "Boston", "Portland",
            "Seattle", "Los Angeles", "Chicago", "Denver", "Dallas"],
    'Full State Name': ["Georgia", "Texas", "California", "NewYork", "California", "Massachusetts", 
                       "Oregon", "Washington", "California", "Illinois", "Colorado", "Texas"],
    'State': ["GA", "TX", "CA", "NY", "CA", "MA", "OR", "WA", "CA", "IL", "CO", "TX"]}
    
dataframe_1 = pd.DataFrame(city_state)
dataframe_1[['# of Analysts - City', 'Median Salary - City']] = ""

dataframe_1

Unnamed: 0,City,Full State Name,State,# of Analysts - City,Median Salary - City
0,Atlanta,Georgia,GA,,
1,Austin,Texas,TX,,
2,San Francisco,California,CA,,
3,New York City,NewYork,NY,,
4,San Diego,California,CA,,
5,Boston,Massachusetts,MA,,
6,Portland,Oregon,OR,,
7,Seattle,Washington,WA,,
8,Los Angeles,California,CA,,
9,Chicago,Illinois,IL,,


In [160]:
# api_url = f'''https://api.careeronestop.org/v1/occupation/{acct_id}/Data%20Scientists/0?training=false
# &interest=false&videos=false&tasks=false&dwas=false&wages=true
# &alternateOnetTitles=false&projectedEmployment=false&ooh=false
# &stateLMILinks=false&relatedOnetTitles=false&skills=false&knowledge=false
# &ability=false&trainingPrograms=false'''

api_test_url = f'https://api.careeronestop.org/v1/occupation/{acct_id}/Data%20Scientists/Austin%2C%20TX?training=false&interest=false&videos=false&tasks=false&dwas=false&wages=true&alternateOnetTitles=false&projectedEmployment=true&ooh=false&stateLMILinks=false&relatedOnetTitles=false&skills=false&knowledge=false&ability=false&trainingPrograms=false'

headers = {"Authorization": f"Bearer {api_key}"}
response = requests.get(api_test_url, headers=headers).json()
# response

print(json.dumps(response, indent=4, sort_keys=True))

{
    "AutoCorrection": "",
    "DidYouMean": "",
    "OccupationDetail": [
        {
            "AlternateTitles": null,
            "BrightOutlook": "Bright",
            "BrightOutlookCategory": "Rapid Growth",
            "COSVideoURL": "https://www.careeronestop.org/Videos/careeronestop-videos.aspx?videocode=15205100&op=y",
            "Green": "No",
            "Location": "TX",
            "OnetCode": "15-2051.00",
            "OnetDescription": "Develop and implement a set of techniques or analytics applications to transform raw data into meaningful information using data-oriented programming languages and visualization software. Apply data mining, data modeling, natural language processing, and machine learning to extract and analyze information from large structured and unstructured datasets. Visualize, interpret, and report data findings. May create dynamic data reports.",
            "OnetTitle": "Data Scientists",
            "Projections": {
                "EstimatedYea

In [161]:
projected_jobs = response["OccupationDetail"][0]['Projections']['Projections'][0]['ProjectedEmployment']

In [162]:
# Use the city API request but alter the city and state based on the dataframe
# This allows us to search national, state, and city data all from the same call

base_url = city_url = f"https://api.careeronestop.org/v1/occupation/{acct_id}/Data%20Scientists/"
params = "?training=false&interest=true&videos=false&tasks=false&dwas=false&wages=true&alternateOnetTitles=false&projectedEmployment=true&ooh=false&stateLMILinks=false&relatedOnetTitles=false&skills=false&knowledge=false&ability=false&trainingPrograms=false"
headers = {"Authorization": f"Bearer {api_key}"}

In [163]:
for index, row in dataframe_1.iterrows():
    # Get the city and state
    city = dataframe_1.loc[index, "City"]
    state = dataframe_1.loc[index, "State"]
    # Add the current City type to the parameters
    api_url =  f"{base_url}{city}%2C%20{state}{params}"
    
    response_1 = requests.get(api_url, headers = headers).json()
    results = response_1['OccupationDetail'][0]['Wages']['BLSAreaWagesList']
#     results_2 = response["OccupationDetail"][0]['Projections']['Projections'][0]['ProjectedEmployment']

    try:
#         should we use estimated employment OR projected employment?
        projected_jobs = response_1["OccupationDetail"][0]['Projections']['Projections'][0]['EstimatedEmployment']
        dataframe_1.loc[index, '# of Analysts - City'] = projected_jobs
        
#         help from tutor
        median_salary = [x['Median'] for x in results if x['RateType']=='Annual' ][0] 
        dataframe_1.loc[index, 'Median Salary - City'] = median_salary    

    except (KeyError, IndexError):
        dataframe_1.loc[index, 'Median Salary - City'] = "Didnt find anything"
        dataframe_1.loc[index, '# of Analysts - City'] = "Didnt find anything"

dataframe_1

Unnamed: 0,City,Full State Name,State,# of Analysts - City,Median Salary - City
0,Atlanta,Georgia,GA,2250,104060
1,Austin,Texas,TX,4110,105350
2,San Francisco,California,CA,18400,137680
3,New York City,NewYork,NY,3500,130070
4,San Diego,California,CA,18400,123570
5,Boston,Massachusetts,MA,2880,106670
6,Portland,Oregon,OR,760,108130
7,Seattle,Washington,WA,2700,140560
8,Los Angeles,California,CA,18400,103430
9,Chicago,Illinois,IL,3370,104370


In [164]:
#need to figure out how to pull specific city and state names
#maybe use itterow method
cities = ["San Francisco", "San Jose", "Austin"]
states = ["CA"]

base_url = f"https://api.careeronestop.org/v1/occupation/{acct_id}"
job = "Data%20Scientists"
params = "?training=false&interest=true&videos=false&tasks=false&dwas=false&wages=true" \
         "&alternateOnetTitles=false&projectedEmployment=true&ooh=false" \
         "&stateLMILinks=false&relatedOnetTitles=false&skills=false&knowledge=false" \
         "&ability=false&trainingPrograms=false"

api_urls = []

for city in cities:
#    for state in states:
        api_url = f"{base_url}/{job}/{city},%20{states}{params}"
        api_urls.append(api_url)

# for api_url in api_urls:
#     print(api_urls)

In [165]:
#Section 1b
#Linear regression (scatter-plot) - State population (census csv), 
# data analysts, per state, projected growth per state
all_state_pop = pd.read_csv('census_state_pop_20to22.csv', index_col='NAME')

all_state_pop

Unnamed: 0_level_0,SUMLEV,REGION,DIVISION,STATE,ESTIMATESBASE2020,POPESTIMATE2020,POPESTIMATE2021,POPESTIMATE2022,NPOPCHG_2020,NPOPCHG_2021,...,RDEATH2021,RDEATH2022,RNATURALCHG2021,RNATURALCHG2022,RINTERNATIONALMIG2021,RINTERNATIONALMIG2022,RDOMESTICMIG2021,RDOMESTICMIG2022,RNETMIG2021,RNETMIG2022
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
United States,10,0,0,0,331449520,331511512,332031554,333287557,61992,520042,...,10.363828,10.350218,0.434073,0.736729,1.133397,3.038912,0.000000,0.000000,1.133397,3.038912
Northeast Region,20,1,0,0,57609156,57448898,57259257,57040406,-160258,-189641,...,9.780142,9.868918,0.206629,0.511200,1.402708,3.752662,-4.855348,-8.061896,-3.452640,-4.309234
New England,30,1,1,0,15116206,15074473,15121745,15129548,-41733,47272,...,9.530598,9.887115,-0.310502,-0.206669,1.770752,4.655140,1.546021,-3.767839,3.316773,0.887301
Middle Atlantic,30,1,2,0,42492950,42374425,42137512,41910858,-118525,-236913,...,9.869304,9.862369,0.391400,0.769581,1.271205,3.427836,-7.142565,-9.607444,-5.871360,-6.179608
Midwest Region,20,2,0,0,68985537,68961043,68836505,68787595,-24494,-124538,...,11.059195,11.169148,-0.207043,-0.125530,0.802714,2.111084,-2.645374,-2.529339,-1.842660,-0.418255
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Washington,40,4,9,53,7705247,7724031,7740745,7785786,18784,16714,...,8.400898,9.117040,2.264113,1.737928,1.809144,4.831987,-1.990071,-0.461146,-0.180927,4.370841
West Virginia,40,3,5,54,1793755,1791420,1785526,1775156,-2335,-5894,...,15.341020,16.649619,-5.665727,-6.829029,0.406492,0.995877,1.602484,0.266241,2.008976,1.262118
Wisconsin,40,2,3,55,5893725,5896271,5880101,5892539,2546,-16170,...,10.496442,10.770227,-0.305357,-0.298659,0.541593,1.388643,-3.209987,1.300813,-2.668394,2.689456
Wyoming,40,4,8,56,576837,577605,579483,581381,768,1878,...,10.706187,11.506947,-0.063954,-0.844199,0.233344,0.589216,3.085331,3.707583,3.318676,4.296800


In [166]:
pop22 = all_state_pop[['POPESTIMATE2022']]
#pop22
state_pop22_part1 = pop22[14:22]
state_pop22_part2 = pop22[23:65]

state_pop_clean = pd.concat([state_pop22_part1, state_pop22_part2], axis=0, ignore_index=False)
#print(state_pop_clean)

#return a line indicating the number of states included
#I guess this isn't really important since we'll be extracting state population by name
print(f'There are {len(state_pop_clean)} states in this dataset.\n')

#also want to convert state names to abbreviations
state_dict = {
    'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA',
    'Colorado': 'CO', 'Connecticut': 'CT', 'Delaware': 'DE', 'Florida': 'FL', 'Georgia': 'GA',
    'Hawaii': 'HI', 'Idaho': 'ID', 'Illinois': 'IL', 'Indiana': 'IN', 'Iowa': 'IA', 'Kansas': 'KS',
    'Kentucky': 'KY', 'Louisiana': 'LA', 'Maine': 'ME', 'Maryland': 'MD', 'Massachusetts': 'MA',
    'Michigan': 'MI', 'Minnesota': 'MN', 'Mississippi': 'MS', 'Missouri': 'MO', 'Montana': 'MT',
    'Nebraska': 'NE', 'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ', 'New Mexico': 'NM',
    'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND', 'Ohio': 'OH', 'Oklahoma': 'OK',
    'Oregon': 'OR', 'Pennsylvania': 'PA', 'Rhode Island': 'RI', 'South Carolina': 'SC',
    'South Dakota': 'SD', 'Tennessee': 'TN', 'Texas': 'TX', 'Utah': 'UT', 'Vermont': 'VT',
    'Virginia': 'VA', 'Washington': 'WA', 'West Virginia': 'WV', 'Wisconsin': 'WI', 'Wyoming': 'WY'
}

#create new column to hold state abbreviations
state_pop_clean['State'] = ''
index = state_pop_clean.index
for index, row in state_pop_clean.iterrows():
    state_name = index
    state_abb = state_dict.get(state_name, 'Not Found')
    state_pop_clean.loc[index, 'State'] = state_abb

print(state_pop_clean)

There are 50 states in this dataset.

                POPESTIMATE2022 State
NAME                                 
Alabama                 5074296    AL
Alaska                   733583    AK
Arizona                 7359197    AZ
Arkansas                3045637    AR
California             39029342    CA
Colorado                5839926    CO
Connecticut             3626205    CT
Delaware                1018396    DE
Florida                22244823    FL
Georgia                10912876    GA
Hawaii                  1440196    HI
Idaho                   1939033    ID
Illinois               12582032    IL
Indiana                 6833037    IN
Iowa                    3200517    IA
Kansas                  2937150    KS
Kentucky                4512310    KY
Louisiana               4590241    LA
Maine                   1385340    ME
Maryland                6164660    MD
Massachusetts           6981974    MA
Michigan               10034113    MI
Minnesota               5717184    MN
Mississippi 

In [167]:
#need to merge population estimates with analyst count and salary dataframe
analysts_with_pop = pd.merge(dataframe_1, state_pop_clean, how='left', on='State')
analysts_with_pop = analysts_with_pop.rename(columns={'POPESTIMATE2022':'2022 Population Estimates'})
analysts_with_pop

Unnamed: 0,City,Full State Name,State,# of Analysts - City,Median Salary - City,2022 Population Estimates
0,Atlanta,Georgia,GA,2250,104060,10912876
1,Austin,Texas,TX,4110,105350,30029572
2,San Francisco,California,CA,18400,137680,39029342
3,New York City,NewYork,NY,3500,130070,19677151
4,San Diego,California,CA,18400,123570,39029342
5,Boston,Massachusetts,MA,2880,106670,6981974
6,Portland,Oregon,OR,760,108130,4240137
7,Seattle,Washington,WA,2700,140560,7785786
8,Los Angeles,California,CA,18400,103430,39029342
9,Chicago,Illinois,IL,3370,104370,12582032


In [168]:
analysts_with_pop[['Latitude', 'Longitude']] = ""
analysts_with_pop

Unnamed: 0,City,Full State Name,State,# of Analysts - City,Median Salary - City,2022 Population Estimates,Latitude,Longitude
0,Atlanta,Georgia,GA,2250,104060,10912876,,
1,Austin,Texas,TX,4110,105350,30029572,,
2,San Francisco,California,CA,18400,137680,39029342,,
3,New York City,NewYork,NY,3500,130070,19677151,,
4,San Diego,California,CA,18400,123570,39029342,,
5,Boston,Massachusetts,MA,2880,106670,6981974,,
6,Portland,Oregon,OR,760,108130,4240137,,
7,Seattle,Washington,WA,2700,140560,7785786,,
8,Los Angeles,California,CA,18400,103430,39029342,,
9,Chicago,Illinois,IL,3370,104370,12582032,,


In [169]:
dataframe_1['State'] = dataframe_1['State'].astype("str")
dataframe_1.dtypes

City                    object
Full State Name         object
State                   object
# of Analysts - City    object
Median Salary - City    object
dtype: object

In [15]:
# Create a Pandas DataFrame containing state coordinates
state_coordinates = pd.DataFrame()

# Display sample data
state_coordinates

In [16]:
# DataFrame 'state_coordinates' containing latitude, longitude, and Population data

# Create a base map
heatmap_plot = folium.Map(location=[state_coordinates['latitude'].mean(), state_coordinates['longitude'].mean()], zoom_start=4)

# Create a list of heat points (latitude, longitude, weight/population)
heat_data = [[row['latitude'], row['longitude'], row['Population']] for _, row in state_coordinates.iterrows()]

# Plot heatmap using Folium's HeatMap function
HeatMap(heat_data, radius=15).add_to(heatmap_plot)

# Save the map as an HTML file
output_file = "heatmap_with_colorbar.html"
heatmap_plot.save(output_file)

# Create a legend using HTML and CSS
legend_html = '''
<div style="position: fixed;
             bottom: 50px; left: 50px; width: 120px; height: 150px; 
             background-color: rgba(255, 255, 255, 0.7);
             z-index:9999; font-size:14px; border-radius:5px;
             border:1px solid grey; padding: 10px;">
    <strong>Color Legend</strong><br>
    <div style="background: linear-gradient(to bottom, red, yellow, green); 
                width: 100%; height: 80px;"></div>
    <small>Low</small> <small style="float:right;">High</small>
</div>
'''

# Add the legend to the map HTML file
with open(output_file, 'r') as f:
    map_html = f.read()

with open(output_file, 'w') as f:
    map_html = map_html.replace('</body>', legend_html + '</body>')
    f.write(map_html)
    
# Display the map in the notebook
from IPython.display import IFrame

IFrame(output_file, width='100%', height=500)

KeyError: 'latitude'

In [None]:
# Configure the map plot_1
map_plot_1 = state_coordinates.hvplot.points(
    "longitude",
    "latitude",
    geo = True,
    tiles = "EsriNatGeo",
    frame_width = 700,
    frame_height = 500,
    size = "Population",
    scale = 0.01,
    color = "City"
)

# Display the map plot
map_plot_1