In [None]:
# Generating Maps with Python

#Folium is a powerful Python library that helps you create several types of Leaflet maps. The fact that the Folium results are interactive makes this library very useful for dashboard building.

#From the official Folium documentation page:

#Folium builds on the data wrangling strengths of the Python ecosystem and the mapping strengths of the Leaflet.js library. Manipulate your data in Python, then visualize it in on a Leaflet map via Folium.

#Folium makes it easy to visualize data that's been manipulated in Python on an interactive Leaflet map. It enables both the binding of data to a map for choropleth visualizations as well as passing Vincent/Vega visualizations as markers on the map.

#The library has a number of built-in tilesets from OpenStreetMap, Mapbox, and Stamen, and supports custom tilesets with Mapbox or Cloudmade API keys. Folium supports both GeoJSON and TopoJSON overlays, as well as the binding of data to those overlays to create choropleth maps with color-brewer color schemes.

In [1]:
import numpy as np  # useful for many scientific computing in Python
import pandas as pd # primary data structure library

In [2]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium

print('Folium installed and imported!')

Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

# All requested packages already installed.

Folium installed and imported!


In [3]:
# define the world map
world_map = folium.Map()

# display world map
world_map

In [4]:
# define the world map centered around Canada with a low zoom level
world_map = folium.Map(location=[56.130, -106.35], zoom_start=4)

# display world map
world_map

In [5]:
# California : 37.7749° N, 122.4194° W location: N , E is + and S and W is - 

In [6]:
California_map = folium.Map(location=[ 37.7749, -122.4194], zoom_start=6)

# display world map
California_map

In [7]:
# China map: 35.8617° N, 104.1954° E
China_map=folium.Map(location=[35.8617, 104.1954],zoom_start=5)
# Display map:
China_map
    


In [8]:
# Map of Brazil 14.2350° S, 51.9253° W
Brazil_map=folium.Map(location=[-14.2350,-51.9253], zoom_start= 4)
Brazil_map

In [9]:
# Mexico : 23.6345° N, 102.5528° W
Mexico_map= folium.Map(location=[23.6345, -102.5528], zoom_start= 4) # Try zoom level = 8?
Mexico_map


In [10]:
# Mexico : 23.6345° N, 102.5528° W
Mexico_map= folium.Map(location=[23.6345, -102.5528], zoom_start= 8) # Try zoom level = 8?
Mexico_map

In [11]:
# Map of different styles
# A. These are high-contrast B+W (black and white) maps. They are perfect for data mashups and exploring river meanders
# and coastal zones.



In [12]:
# create a Stamen Toner map of the world centered around Canada with tiles=” “
world_map = folium.Map(location=[56.130, -106.35], zoom_start=4, tiles='Stamen Toner')
# display map
world_map



In [13]:
# B. Stamen Terrain Maps
#These are maps that feature hill shading and natural vegetation colors. 
#They showcase advanced labeling and linework generalization of dual-carriageway roads.
#Let's create a Stamen Terrain map of Canada with zoom level 4.


In [14]:
# # create a Stamen Toner map of the world centered around Canada
world_map = folium.Map(location=[56.130, -106.35], zoom_start=4, tiles='Stamen Terrain')

# display map
world_map

In [15]:
# C. Mapbox Bright Maps
# These are maps that quite similar to the default style, except that the borders are not visible with
#a low zoom level. Furthermore, unlike the default style where country names are displayed in each country's native language, 
# Mapbox Bright style displays all country names in English.

# Let's create a world map with this style.

In [16]:
# create a world map with a Mapbox Bright style.
world_map = folium.Map(location=[56.130, -106.35], zoom_start=4,tiles='Mapbox Bright')

# display the map
world_map

In [17]:
# Mexico : 23.6345° N, 102.5528° W
Mexico_map= folium.Map(location=[23.6345, -102.5528], zoom_start= 6, tiles='stamen terrain')

Mexico_map

In [18]:
# Adding markers to the map 
# add Ontario to Canada with popup as a parameter 

In [19]:
# Generate map of Canada
canada_map=folium.Map(location=[56.130, -106.35], zoom_start=4)
## Add a red marker to Ontario
# Create a feature group with empty parameters :
Ontario= folium.map.FeatureGroup( )
# Style the feature group: 
Ontario.add_child(folium.CircleMarker([51.25,-85.32], # longitude and Lagitude of the circle marker 
                                                               radius= 5,  # size of the circlemarker
                                                               color='red',# color of the circle
                                                               fill=True,  # This is important otherwise won't show fill
                                                               fill_color ='blue',
                                                                fill_opacity=0.6,    # how think is the fill
                                                               #popup = "Ontario"
                                      # inside of the circle with red color   # for the CircleMarker in the folium module 
                                     ) )  # for the child add to Ontario
# add the feature group to the map
canada_map.add_child(Ontario)  



# Use popup text to label the marker and add to the map:
canada_map.add_child(folium.Marker([51.25,-85.32],
              popup = "Hey Ontario"
            ))#.add_to(canada_map)  # Parameters of the marker in folium
#folium.Marker.add_to(canada_map) # Add the marker label to the map

# Display map”
canada_map


In [20]:
# No popoup label, have popoup in the parameters of the child of map.feature group and CircleMarker

In [21]:
# Generate map of Canada
canada_map=folium.Map(location=[56.130, -106.35], zoom_start=4)
## Add a red marker to Ontario
# Create a feature group with empty parameters :
Ontario= folium.map.FeatureGroup( )
# Style the feature group: 
Ontario.add_child(folium.CircleMarker([51.25,-85.32], # longitude and Lagitude of the circle marker 
                                                               radius= 5,  # size of the circlemarker
                                                               color='red',# color of the circle
                                                               fill=True,  # This is important otherwise won't show fill
                                                               fill_color ='blue',
                                                                fill_opacity=0.6,    # how think is the fill
                                                               popup = "Ontario"
                                      # inside of the circle with red color   # for the CircleMarker in the folium module 
                                     ) )  # for the child add to Ontario
# add the feature group to the map
canada_map.add_child(Ontario)  
# Display map”
canada_map

In [22]:
# Map of san Francisco with Markers of Criminal incidents 

In [23]:
df_incidents = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DV0101EN/labs/Data_Files/Police_Department_Incidents_-_Previous_Year__2016_.csv')

print('Dataset downloaded and read into a pandas dataframe!')

Dataset downloaded and read into a pandas dataframe!


In [24]:
df_incidents.head()

Unnamed: 0,IncidntNum,Category,Descript,DayOfWeek,Date,Time,PdDistrict,Resolution,Address,X,Y,Location,PdId
0,120058272,WEAPON LAWS,POSS OF PROHIBITED WEAPON,Friday,01/29/2016 12:00:00 AM,11:00,SOUTHERN,"ARREST, BOOKED",800 Block of BRYANT ST,-122.403405,37.775421,"(37.775420706711, -122.403404791479)",12005827212120
1,120058272,WEAPON LAWS,"FIREARM, LOADED, IN VEHICLE, POSSESSION OR USE",Friday,01/29/2016 12:00:00 AM,11:00,SOUTHERN,"ARREST, BOOKED",800 Block of BRYANT ST,-122.403405,37.775421,"(37.775420706711, -122.403404791479)",12005827212168
2,141059263,WARRANTS,WARRANT ARREST,Monday,04/25/2016 12:00:00 AM,14:59,BAYVIEW,"ARREST, BOOKED",KEITH ST / SHAFTER AV,-122.388856,37.729981,"(37.7299809672996, -122.388856204292)",14105926363010
3,160013662,NON-CRIMINAL,LOST PROPERTY,Tuesday,01/05/2016 12:00:00 AM,23:50,TENDERLOIN,NONE,JONES ST / OFARRELL ST,-122.412971,37.785788,"(37.7857883766888, -122.412970537591)",16001366271000
4,160002740,NON-CRIMINAL,LOST PROPERTY,Friday,01/01/2016 12:00:00 AM,00:30,MISSION,NONE,16TH ST / MISSION ST,-122.419672,37.76505,"(37.7650501214668, -122.419671780296)",16000274071000


In [25]:
df_incidents.shape  # 150500 crimes and 13 features

(150500, 13)

In [26]:
# Reduce to 100 crimes
# get the first 100 crimes in the df_incidents dataframe
limit = 100  # you could change the limit as wantd
df_incidents = df_incidents.iloc[0:limit, :]  # Index with rows from 0 to 100 and columns from 0 to 13

In [27]:
df_incidents.shape

(100, 13)

In [28]:
# Now that we reduced the data a little bit, let's visualize where these crimes took place in the city of San Francisco. 
# We will use the default style and we will initialize the zoom level to 12.

In [29]:
# San Francisco latitude and longitude values
latitude = 37.77
longitude = -122.42

In [30]:
# create map and display it
sanfran_map = folium.Map(location=[latitude, longitude], zoom_start=12)  # zoom level 12 big so close to the center 

# display the map of San Francisco
sanfran_map

In [31]:
# Now let's superimpose the locations of the crimes onto the map. The way to do that in Folium is 
# to create a feature group with its own features and style and then add it to the sanfran_map.

In [32]:
# instantiate a feature group for the incidents in the dataframe
incidents = folium.map.FeatureGroup()

# loop through the 100 crimes and add each to the incidents feature group
for lat, lng, in zip(df_incidents.Y, df_incidents.X):
    incidents.add_child(
        folium.features.CircleMarker(
            [lat, lng],
            radius=5, # define how big you want the circle markers to be
            color='yellow',
            fill=True,
            fill_color='blue',
            fill_opacity=0.6
        )
    )

# add incidents to map
sanfran_map.add_child(incidents)

In [33]:
# Add popup texts to the markers

# instantiate a feature group for the incidents in the dataframe
incidents = folium.map.FeatureGroup()

# loop through the 100 crimes and add each to the incidents feature group
for lat, lng, in zip(df_incidents.Y, df_incidents.X):
    incidents.add_child(
        folium.features.CircleMarker(
            [lat, lng],
            radius=5, # define how big you want the circle markers to be
            color='yellow',
            fill=True,
            fill_color='blue',
            fill_opacity=0.6
        )
    )

# add pop-up text to each marker on the map
latitudes = list(df_incidents.Y)
longitudes = list(df_incidents.X)
labels = list(df_incidents.Category)

for lat, lng, label in zip(latitudes, longitudes, labels):
    folium.Marker([lat, lng], popup=label).add_to(sanfran_map)    
    
# add incidents to map
sanfran_map.add_child(incidents)

In [34]:
# If you find the map to be so congested will all these markers, there are two remedies to this problem.
# The simpler solution is to remove these location markers and just add the text to the circle markers themselves as follows:

In [35]:
# create map and display it
sanfran_map = folium.Map(location=[latitude, longitude], zoom_start=12)

# loop through the 100 crimes and add each to the map
for lat, lng, label in zip(df_incidents.Y, df_incidents.X, df_incidents.Category):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5, # define how big you want the circle markers to be
        color='yellow',
        fill=True,
        popup=label,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(sanfran_map)

# show map
sanfran_map

In [36]:
# the other proper remedy is to group the markers into different clusters.
#Each cluster is then represented by the number of crimes in each neighborhood. These clusters can be thought of as pockets of San Francisco which you can then analyze separately.

# To implement this, we start off by instantiating a MarkerCluster object and adding all the data points
# in the dataframe to this object.

In [37]:
from folium import plugins

# let's start again with a clean copy of the map of San Francisco
sanfran_map = folium.Map(location = [latitude, longitude], zoom_start = 12)

# instantiate a mark cluster object for the incidents in the dataframe
incidents = plugins.MarkerCluster().add_to(sanfran_map)

# loop through the dataframe and add each data point to the mark cluster
for lat, lng, label, in zip(df_incidents.Y, df_incidents.X, df_incidents.Category):
    folium.Marker(
        location=[lat, lng],
        icon=None,
        popup=label,
    ).add_to(incidents)

# display map
sanfran_map

In [38]:
# Notice how when you zoom out all the way, all markers are grouped into one cluster, the global cluster, 
# of 100 markers or crimes, which is the total number of crimes in our dataframe. Once you start zooming in,
#the global cluster will start breaking up into smaller clusters. 
#Zooming in all the way will result in individual markers.

In [39]:
# A Choropleth map is a thematic map in which areas are shaded or patterned in proportion to 
#the measurement of the statistical variable being displayed on the map, such as population density or per-capita income. 
#The choropleth map provides an easy way to visualize how a measurement varies across a geographic area or it 
#shows the level of variability within a region. Below is a Choropleth map of the US depicting the population by square mile per state.


In [40]:
df_can = pd.read_excel('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DV0101EN/labs/Data_Files/Canada.xlsx',
                     sheet_name='Canada by Citizenship',
                     skiprows=range(20),
                     skipfooter=2)

print('Data downloaded and read into a dataframe!')

Data downloaded and read into a dataframe!


In [41]:
df_can.head()

Unnamed: 0,Type,Coverage,OdName,AREA,AreaName,REG,RegName,DEV,DevName,1980,...,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
0,Immigrants,Foreigners,Afghanistan,935,Asia,5501,Southern Asia,902,Developing regions,16,...,2978,3436,3009,2652,2111,1746,1758,2203,2635,2004
1,Immigrants,Foreigners,Albania,908,Europe,925,Southern Europe,901,Developed regions,1,...,1450,1223,856,702,560,716,561,539,620,603
2,Immigrants,Foreigners,Algeria,903,Africa,912,Northern Africa,902,Developing regions,80,...,3616,3626,4807,3623,4005,5393,4752,4325,3774,4331
3,Immigrants,Foreigners,American Samoa,909,Oceania,957,Polynesia,902,Developing regions,0,...,0,0,1,0,0,0,0,0,0,0
4,Immigrants,Foreigners,Andorra,908,Europe,925,Southern Europe,901,Developed regions,0,...,0,0,1,1,0,0,0,0,1,1


In [42]:
# print the dimensions of the dataframe
print(df_can.shape)

(195, 43)


In [43]:
# clean up the dataset to remove unnecessary columns (eg. REG) 
df_can.drop(['AREA','REG','DEV','Type','Coverage'], axis=1, inplace=True)

# let's rename the columns so that they make sense
df_can.rename(columns={'OdName':'Country', 'AreaName':'Continent','RegName':'Region'}, inplace=True)

# for sake of consistency, let's also make all column labels of type string
df_can.columns = list(map(str, df_can.columns))

# add total column
df_can['Total'] = df_can.sum(axis=1)

# years that we will be using in this lesson - useful for plotting later on
years = list(map(str, range(1980, 2014)))
print ('data dimensions:', df_can.shape)

data dimensions: (195, 39)


In [44]:
df_can.head()

Unnamed: 0,Country,Continent,Region,DevName,1980,1981,1982,1983,1984,1985,...,2005,2006,2007,2008,2009,2010,2011,2012,2013,Total
0,Afghanistan,Asia,Southern Asia,Developing regions,16,39,39,47,71,340,...,3436,3009,2652,2111,1746,1758,2203,2635,2004,58639
1,Albania,Europe,Southern Europe,Developed regions,1,0,0,0,0,0,...,1223,856,702,560,716,561,539,620,603,15699
2,Algeria,Africa,Northern Africa,Developing regions,80,67,71,69,63,44,...,3626,4807,3623,4005,5393,4752,4325,3774,4331,69439
3,American Samoa,Oceania,Polynesia,Developing regions,0,1,0,0,0,0,...,0,1,0,0,0,0,0,0,0,6
4,Andorra,Europe,Southern Europe,Developed regions,0,0,0,0,0,0,...,0,1,1,0,0,0,0,1,1,15


In [45]:
# download countries geojson file
#import wget
#!wget --quiet https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DV0101EN/labs/Data_Files/world_countries.json -O world_countries.json
    
#print('GeoJSON file downloaded!')

ModuleNotFoundError: No module named 'wget'

In [68]:
import requests
r = requests.get('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DV0101EN/labs/Data_Files/world_countries.json')


In [69]:
# Now that we have the GeoJSON file, let's create a world map, centered around [0, 0] latitude and longitude values,
# with an intial zoom level of 2, and using Mapbox Bright style.

In [70]:
world_geo = r # geojson file

# create a plain world map
world_map = folium.Map(location=[0, 0], zoom_start=2, tiles='Mapbox Bright')

In [71]:
# generate choropleth map using the total immigration of each country to Canada from 1980 to 2013
world_map.choropleth(
    geo_data=world_geo,
    data=df_can,
    columns=['Country', 'Total'],
    key_on='feature.properties.name',
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Immigration to Canada'
)

# display map
world_map

ValueError: Unhandled object <Response [404]>.

In [None]:
# As per our Choropleth map legend, the darker the color of a country and 
# the closer the color to red, the higher the number of immigrants from that country. Accordingly, 
# the highest immigration over the course of 33 years (from 1980 to 2013) was from China, India, and the Philippines, 
# followed by Poland, Pakistan, and interestingly, the US.

# Notice how the legend is displaying a negative boundary or threshold. Let's fix that by defining 
# our own thresholds and starting with 0 instead of -6,918!

In [57]:
world_geo = r'world_countries.json'

# create a numpy array of length 6 and has linear spacing from the minium total immigration to the maximum total immigration
threshold_scale = np.linspace(df_can['Total'].min(),
                              df_can['Total'].max(),
                              6, dtype=int)
threshold_scale = threshold_scale.tolist() # change the numpy array to a list
threshold_scale[-1] = threshold_scale[-1] + 1 # make sure that the last value of the list is greater than the maximum immigration

# let Folium determine the scale.
world_map = folium.Map(location=[0, 0], zoom_start=2, tiles='Mapbox Bright')
world_map.choropleth(
    geo_data=world_geo,
    data=df_can,
    columns=['Country', 'Total'],
    key_on='feature.properties.name',
    threshold_scale=threshold_scale,
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Immigration to Canada',
    reset=True
)
world_map

FileNotFoundError: [Errno 2] No such file or directory: 'world_countries.json'