Getting set up

In [3]:
#if you have not installed folium yet:
#!pip install folium

import folium
import pandas as pd


In [4]:
utah_df = pd.read_csv('1871-utah-postmaster-salaries.csv')
print(utah_df.sample(5))
utah_df.dtypes


        PO_Name      County State  PM_Salary   Latitude   Longitude
68        Manti     Sanpete    UT        100  39.267462 -111.636585
97   Providence       Cache    UT         38  41.708265 -111.817998
131  Wellsville       Cache    UT        360  41.639099 -111.933279
39      Grafton  Washington    UT         12  37.167205 -113.079943
61     Laketown        Utah    UT         12        NaN         NaN


PO_Name       object
County        object
State         object
PM_Salary      int64
Latitude     float64
Longitude    float64
dtype: object

Base Map

In [7]:
utah_map_empty = folium.Map(location=[40, -111], zoom_start=6)
utah_map_empty


Creating Reusable base map

In [11]:
def create_empty_map():
    return folium.Map(location=[40, -111], zoom_start=6)

utah_map = create_empty_map()
utah_map


Check missing data

In [14]:
# Check for columns with missing values
missing_values = utah_df.isna().sum()
print(missing_values)


PO_Name      0
County       0
State        0
PM_Salary    0
Latitude     4
Longitude    4
dtype: int64


In [16]:
# Filter out post offices that are missing a latitude value (ie. we don't have any location information about it)
utah_df_locations = utah_df[utah_df['Latitude'].notna()]
print(len(utah_df))
print(len(utah_df_locations))


136
132


Adding points on map

In [19]:
folium.Marker(location=[38.41, -112.339], popup="Adamsville Post Office").add_to(utah_map)
utah_map


Defining a marker

In [22]:
# Melanie Walsh function we will adadpt to our dataset:
# def create_map_markers(row, map_name):
#    folium.Marker(location=[row['lat'], row['lon']], popup=row['place']).add_to(map_name)

def create_map_markers(row, map_name):
    folium.Marker(location=[row['Latitude'], row['Longitude']], popup=row['PO_Name']).add_to(map_name)


In [24]:
#create a base empty map
utah_map = create_empty_map()

#generate a random row of data
sample_row = utah_df_locations.sample(1)

#use our function on the random row
create_map_markers(sample_row, utah_map)

#display the map
utah_map


  float(coord)
  if math.isnan(float(coord)):
  return [float(x) for x in coords]


Adding markers for a For Loop

In [29]:
# Method 1: Using a for loop to iterate through our dataframe and add markers sequentially
# initialize an empty map
utah_map = create_empty_map()

# iterrows() allows you to loop through a dataframe row by row and return the index position + the row
for index, row in utah_df_locations.iterrows():
    print(f"Name of post office:", row[0])

#now let's iterate through and call our function for each row
for index, row in utah_df_locations.iterrows():
    create_map_markers(row, utah_map)

utah_map


Name of post office: Adamsville
Name of post office: Alma
Name of post office: Alpine City
Name of post office: American Fork
Name of post office: Bellevue
Name of post office: Bingham Canyon
Name of post office: Brigham City
Name of post office: Bullion
Name of post office: Cedar City
Name of post office: Cedar Valley
Name of post office: Centerville
Name of post office: Central City
Name of post office: Chicken Creek
Name of post office: Clarkston
Name of post office: Clifton
Name of post office: Clover Valley
Name of post office: Coalville
Name of post office: Corinne
Name of post office: Cove Creek
Name of post office: Croydon
Name of post office: Deseret
Name of post office: Diamond
Name of post office: Draper
Name of post office: Duncans Retreat
Name of post office: Echo City
Name of post office: Eden
Name of post office: Emmaville
Name of post office: Ephraim
Name of post office: Eureka
Name of post office: Fair View
Name of post office: Fairfield
Name of post office: Farmington

  print(f"Name of post office:", row[0])


In [31]:
# Method 2: Using .apply() to add markers with our function for all rows
# initialize an empty map
utah_map = utah_map_empty

# Now apply this function to each row in our filtered DataFrame
# For each row, we'll pass:
# 1. The row itself (handled automatically by .apply())
# 2. Our map object (we need to specify this explicitly)
# 3. The "axis" value for .apply() to indicate we want to process row by row
# .apply() allows you to apply a function to each row in the dataframe
utah_df_locations.apply(
    create_map_markers, # The function to apply
    map_name=utah_map, # Additional argument to pass to the function
    axis='columns' # Process row by row instead of column by column
)

utah_map


Creating Circle Markers

In [34]:
# Melanie Walsh function we will edit:
#def create_ICE_map_markers(row, map_name):
#    folium.CircleMarker(location=[row['lat'], row['lon']], raidus=100, fill=True,
#                       popup=folium.Popup(f"{row['Name'].title()} <br> {row['City'].title()}, {row['State']}", max_width=200),
#                       tooltip=f"{row['Name'].title()} <br> {row['City'].title()}, {row['State']}"
#                       ).add_to(map_name)

def create_circle_markers(row, map_name):
    folium.CircleMarker(location=[row['Latitude'], row['Longitude']],
                       radius=10,
                       fill=True,
                       popup=folium.Popup(f"{row['PO_Name'].title()}", max_width=200),
                       tooltip=f"{row['PO_Name'].title()}"
                       ).add_to(map_name)


In [36]:
# initialize an empty map
utah_map = create_empty_map()

# call our function for each row
utah_df_locations.apply(create_circle_markers, map_name=utah_map, axis="columns")

utah_map


Customizing Marker Appearance

In [39]:
# alter map appearance
def create_circle_markers(row, map_name):
    folium.CircleMarker(location=[row['Latitude'], row['Longitude']],
                       radius=14,
                       color='blue',
                       fill=True,
                       fill_color='white',
                       fill_opacity=0.6,
                       popup=folium.Popup(f"Post Office: {row['PO_Name'].title()}", max_width=200),
                       tooltip=f"Postmaster Salary: ${row['PM_Salary']}"
                       ).add_to(map_name)

# initialize an empty map
utah_map = create_empty_map()

# call our function for each row
utah_df_locations.apply(
    create_circle_markers, # The function to apply
    map_name=utah_map, # Additional argument to pass to the function
    axis='columns' # Process row by row instead of column by column
)

utah_map


Using a Function to Size the CirclesPermalink

In [42]:
# make new function to create circle markers sized by postmaster salary
def create_sized_circle_markers(row, map_name):
    folium.CircleMarker(location=[row['Latitude'], row['Longitude']],
                       radius=row['PM_Salary'],
                       fill=True,
                       popup=folium.Popup(f"Post Office: {row['PO_Name'].title()}", max_width=200),
                       tooltip=f"Postmaster Salary: ${row['PM_Salary']}"
                       ).add_to(map_name)

# initialize an empty map
utah_map = create_empty_map()

# call our function for each row
utah_df_locations.apply(
    create_sized_circle_markers, # The function to apply
    map_name=utah_map, # Additional argument to pass to the function
    axis='columns' # Process row by row instead of column by column
)

utah_map


Adjusting Marker Sizes for Better VisibilityPermalink

In [45]:
# make new function to create circle markers sized by postmaster salary - this time adjusting the radius size in pixels to make it more legible
def create_sized_circle_markers(row, map_name):
    folium.CircleMarker(location=[row['Latitude'], row['Longitude']],
                       radius=row['PM_Salary']/100,
                       fill=True,
                       popup=folium.Popup(f"Post Office: {row['PO_Name'].title()}", max_width=200),
                       tooltip=f"Postmaster Salary: ${row['PM_Salary']}"
                       ).add_to(map_name)

# initialize an empty map
utah_map = create_empty_map()

# call our function for each row
utah_df_locations.apply(
    create_sized_circle_markers, # The function to apply
    map_name=utah_map, # Additional argument to pass to the function
    axis='columns' # Process row by row instead of column by column
)

utah_map


Categorizing Salaries Into Buckets

In [48]:
utah_df_locations.describe()


Unnamed: 0,PM_Salary,Latitude,Longitude
count,132.0,132.0,132.0
mean,101.098485,39.907743,-112.16119
std,344.22558,1.505091,0.593117
min,4.0,37.006375,-113.819415
25%,12.0,38.874099,-112.37923
50%,22.0,40.380926,-111.97383
75%,70.0,41.080917,-111.819912
max,3600.0,42.1875,-111.28185


In [50]:
def add_salary_buckets(salary):
    # Create a new column for the salary bucket
    if salary < 50:
        bucket = 'Low Salary'
    elif salary >= 50 and salary < 250:
        bucket = 'Medium Salary'
    elif salary >= 250 and salary < 1000:
        bucket = 'High Salary'
    else:
        bucket = 'Very High Salary'
    return bucket


In [52]:
#test out the function
add_salary_buckets(2000)


'Very High Salary'

In [54]:
utah_df_locations['Salary_Bucket'] = utah_df_locations['PM_Salary'].apply(add_salary_buckets)
utah_df_locations.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  utah_df_locations['Salary_Bucket'] = utah_df_locations['PM_Salary'].apply(add_salary_buckets)


Unnamed: 0,PO_Name,County,State,PM_Salary,Latitude,Longitude,Salary_Bucket
0,Adamsville,Beaver,UT,10,38.258303,-112.793835,Low Salary
1,Alma,Weber,UT,12,41.248833,-112.078275,Low Salary
2,Alpine City,Utah,UT,27,40.453283,-111.777986,Low Salary
3,American Fork,Utah,UT,130,40.375229,-111.79632,Medium Salary
4,Bellevue,Washington,UT,20,37.340815,-113.274116,Low Salary


Creating Marker Sizes Based on Salary Categories

In [57]:
# create a function to add marker sizes based on the salary bucket
def add_marker_sizes(category):
    if category == 'Low Salary':
        return 4
    elif category == 'Medium Salary':
        return 8
    elif category == 'High Salary':
        return 12
    else:
        return 16

#test out the function
add_marker_sizes('High Salary')


12

In [59]:
utah_df_locations['Marker_Size'] = utah_df_locations['Salary_Bucket'].apply(add_marker_sizes)
utah_df_locations.head(10)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  utah_df_locations['Marker_Size'] = utah_df_locations['Salary_Bucket'].apply(add_marker_sizes)


Unnamed: 0,PO_Name,County,State,PM_Salary,Latitude,Longitude,Salary_Bucket,Marker_Size
0,Adamsville,Beaver,UT,10,38.258303,-112.793835,Low Salary,4
1,Alma,Weber,UT,12,41.248833,-112.078275,Low Salary,4
2,Alpine City,Utah,UT,27,40.453283,-111.777986,Low Salary,4
3,American Fork,Utah,UT,130,40.375229,-111.79632,Medium Salary,8
4,Bellevue,Washington,UT,20,37.340815,-113.274116,Low Salary,4
5,Bingham Canyon,Salt Lake,UT,12,40.541613,-112.147997,Low Salary,4
7,Brigham City,Box Elder,UT,400,41.510213,-112.015501,High Salary,12
8,Bullion,Piute,UT,12,38.41,-112.339,Low Salary,4
9,Cedar City,Iron,UT,200,37.676644,-113.057171,Medium Salary,8
10,Cedar Valley,Utah,UT,13,40.327171,-112.104385,Low Salary,4


Visualizing Postmaster Salaries by Categor

In [62]:
# make new function to create circle markers sized by salary category
def create_sized_circle_markers(row, map_name):
    folium.CircleMarker(location=[row['Latitude'], row['Longitude']],
                       radius=row['Marker_Size'],
                       fill=True,
                       opacity=0.6,
                       popup=folium.Popup(f"Post Office: {row['PO_Name'].title()}", max_width=200),
                       tooltip=f"Postmaster Salary: ${row['PM_Salary']}"
                       ).add_to(map_name)

# initialize an empty map
utah_map = create_empty_map()

# call our function for each row
utah_df_locations.apply(
    create_sized_circle_markers, # The function to apply
    map_name=utah_map, # Additional argument to pass to the function
    axis='columns' # Process row by row instead of column by column
)

utah_map


**New Dataset**

In [65]:
post_df = pd.read_csv('1877-official-register.csv')
print(post_df.sample(5))
post_df.dtypes


           Name State                  Department  \
476  Las Cruces    NM         Treasury Department   
76        Boise    ID  Department of the Interior   
465    Victoria    BC         Treasury Department   
454      Yakima    WA                    Judicial   
632        Waco    TX         Treasury Department   

                              Type  People   Latitude   Longitude  
476                Customs Service       1  32.319940 -106.763654  
76   General Land Office Registers       1  43.618710 -116.214607  
465                Customs Service       1  48.428421 -123.365644  
454                          Court       1  46.602071 -120.505899  
632       Internal Revenue_Gaugers       1  31.549333  -97.146670  


Name           object
State          object
Department     object
Type           object
People          int64
Latitude      float64
Longitude     float64
dtype: object

In [67]:
us_map_empty = folium.Map(location=[40, -97], zoom_start=4)
us_map_empty

In [69]:
def create_empty_map():
    return folium.Map(location=[40, -97], zoom_start=4)

us_map = create_empty_map()
us_map


In [71]:
# Check for columns with missing values
missing_values = post_df.isna().sum()
print(missing_values)

Name          0
State         0
Department    0
Type          0
People        0
Latitude      0
Longitude     0
dtype: int64


In [73]:
# Filter out post offices that are missing a latitude value (ie. we don't have any location information about it)
post_df_locations = post_df[post_df['Latitude'].notna()]
print(len(post_df))
print(len(post_df_locations))

903
903


In [75]:
# Melanie Walsh function we will adadpt to our dataset:
# def create_map_markers(row, map_name):
#    folium.Marker(location=[row['lat'], row['lon']], popup=row['place']).add_to(map_name)

def create_map_markers(row, map_name):
    folium.Marker(location=[row['Latitude'], row['Longitude']], popup=row['PO_Name']).add_to(map_name)

In [77]:
# Method 1: Using a for loop to iterate through our dataframe and add markers sequentially
# initialize an empty map
post_map = create_empty_map()

# iterrows() allows you to loop through a dataframe row by row and return the index position + the row
for index, row in post_df_locations.iterrows():
    print(f"Name of post office:", row[0])

#now let's iterate through and call our function for each row
for index, row in post_df_locations.iterrows():
    create_map_markers(row, post_map)

post_map

  print(f"Name of post office:", row[0])


Name of post office: Prescott
Name of post office: Florence
Name of post office: San Francisco
Name of post office: Marysville
Name of post office: Humboldt
Name of post office: Stockton
Name of post office: Visalia
Name of post office: Sacramento
Name of post office: Los Angeles
Name of post office: Shasta
Name of post office: Susanville
Name of post office: Independence
Name of post office: Denver
Name of post office: Fair Play
Name of post office: Central City
Name of post office: Pueblo
Name of post office: Del Norte
Name of post office: Lake City
Name of post office: Boise
Name of post office: Lewiston
Name of post office: Topeka
Name of post office: Salina
Name of post office: Independence
Name of post office: Wichita
Name of post office: Kirwin
Name of post office: Concordia
Name of post office: Larned
Name of post office: Hays City
Name of post office: Helena
Name of post office: Bozeman
Name of post office: Fargo
Name of post office: Bismark
Name of post office: Norfolk
Name o

KeyError: 'PO_Name'