In [3]:
# # Introduction to Folium
# # Folium is a powerful Python library that helps you create several types of Leaflet maps. The fact that the 
# # Folium results are interactive makes this library very useful for dashboard building.


# Folium builds on the data wrangling strengths of the Python ecosystem and the mapping strengths of the 
# Leaflet.js library. Manipulate your data in Python, then visualize it in on a Leaflet map via Folium.

# Folium makes it easy to visualize data that's been manipulated in Python on an interactive Leaflet map. 
# It enables both the binding of data to a map for choropleth visualizations as well as passing Vincent/Vega 
# visualizations as markers on the map.

# The library has a number of built-in tilesets from OpenStreetMap, Mapbox, and Stamen, and supports custom 
# tilesets with Mapbox or Cloudmade API keys. Folium supports both GeoJSON and TopoJSON overlays, as well as 
# the binding of data to those overlays to create choropleth maps with color-brewer color schemes.





In [4]:
# pip install folium

In [5]:
import folium

In [6]:
# Generating the world map is straightforward in Folium. You simply create a Folium Map object, and then 
# you display it. What is attractive about Folium maps is that they are interactive, so you can zoom into 
# any region of interest despite the initial zoom level.

In [7]:
# define the world map
world_map = folium.Map()

In [8]:
# display world map
world_map

In [9]:
#  creating a map centered around Canada and play with the zoom level to see how it affects the rendered map.
# define the world map centered around Canada with a low zoom level
world_map = folium.Map(location=[56.130, -106.35], zoom_start=4)

In [10]:
# displaying the world map 
world_map

In [11]:
# creating the map again with a higher zoom level
# define the world map centered around Canada with a higher zoom level
world_map = folium.Map(location=[56.130, -106.35], zoom_start=8)

# display world map
world_map

In [12]:
# creating a map of Mexico with a zoom level of 4


#define Mexico's geolocation coordinates
mexico_latitude = 23.6345 
mexico_longitude = -102.5528


In [13]:
 # define the world map centered around Canada with a higher zoom level
mexico_map = folium.Map(location=[mexico_latitude, mexico_longitude], zoom_start=4)

In [14]:
# display world map
mexico_map

In [15]:
# # with folium we can generate different map styles
# A. Stamen Toner Maps¶
# These are high-contrast B+W (black and white) maps. They are perfect for data mashups and exploring 
# river meanders and coastal zones.

In [16]:
# creating a Stamen Toner map of canada with a zoom level of 4.
# create a Stamen Toner map of the world centered around Canada
world_map = folium.Map(location=[56.130, -106.35], zoom_start=4, tiles='Stamen Toner')

# display map
world_map

In [17]:
# B. Stamen Terrain Maps
# These are maps that feature hill shading and natural vegetation colors. They showcase advanced labeling 
# and linework generalization of dual-carriageway roads.

In [18]:
# creating a Stamen Terrain map of Canada with zoom level 4

# create a Stamen Toner map of the world centered around Canada
world_map = folium.Map(location=[56.130, -106.35], zoom_start=4, tiles='Stamen Terrain')

# display map
world_map

In [19]:
# creating a map of Mexico to visualize its hill shading and natural vegetation. Use a zoom level of 6
#defining Mexico's geolocation coordinates
mexico_latitude = 23.6345 
mexico_longitude = -102.5528

In [20]:
# defining the world map centered around Canada with a higher zoom level
mexico_map = folium.Map(location=[mexico_latitude, mexico_longitude], zoom_start=6, tiles='Stamen Terrain')

In [21]:
 # displaying world map
mexico_map

In [22]:
# Maps with Markers
# reading the data set 
import pandas as pd
df = pd.read_csv('Police_Department_Incidents_-_Previous_Year__2016_.csv')
df.head()

Unnamed: 0,IncidntNum,Category,Descript,DayOfWeek,Date,Time,PdDistrict,Resolution,Address,X,Y,Location,PdId
0,120058272,WEAPON LAWS,POSS OF PROHIBITED WEAPON,Friday,01/29/2016 12:00:00 AM,11:00,SOUTHERN,"ARREST, BOOKED",800 Block of BRYANT ST,-122.403405,37.775421,"(37.775420706711, -122.403404791479)",12005827212120
1,120058272,WEAPON LAWS,"FIREARM, LOADED, IN VEHICLE, POSSESSION OR USE",Friday,01/29/2016 12:00:00 AM,11:00,SOUTHERN,"ARREST, BOOKED",800 Block of BRYANT ST,-122.403405,37.775421,"(37.775420706711, -122.403404791479)",12005827212168
2,141059263,WARRANTS,WARRANT ARREST,Monday,04/25/2016 12:00:00 AM,14:59,BAYVIEW,"ARREST, BOOKED",KEITH ST / SHAFTER AV,-122.388856,37.729981,"(37.7299809672996, -122.388856204292)",14105926363010
3,160013662,NON-CRIMINAL,LOST PROPERTY,Tuesday,01/05/2016 12:00:00 AM,23:50,TENDERLOIN,NONE,JONES ST / OFARRELL ST,-122.412971,37.785788,"(37.7857883766888, -122.412970537591)",16001366271000
4,160002740,NON-CRIMINAL,LOST PROPERTY,Friday,01/01/2016 12:00:00 AM,00:30,MISSION,NONE,16TH ST / MISSION ST,-122.419672,37.76505,"(37.7650501214668, -122.419671780296)",16000274071000


In [23]:
# finding the entities in data set 
df.shape

(150500, 13)

In [24]:
# getting the first 100 crimes in the df_incidents dataframe
limit = 100
df_incidents = df.iloc[0:limit, :]

In [25]:
# confirming that our dataframe now consists only of 100 crimes.
df_incidents.shape

(100, 13)

In [26]:
#visualizing where these crimes took place in the city of San Francisco. 
# We will use the default style, and we will initialize the zoom level to 12.

In [27]:
# San Francisco latitude and longitude values
latitude = 37.77
longitude = -122.42

In [28]:
# creating map and display it
sanfran_map = folium.Map(location=[latitude, longitude], zoom_start=12)

In [29]:
# displaying the map of San Francisco
sanfran_map

In [30]:
# superimposing the locations of the crimes onto the map. The way to do that in Folium is to create a feature 
# group with its own features and style and then add it to the sanfran_map.


In [31]:
# instantiate a feature group for the incidents in the dataframe
incidents = folium.map.FeatureGroup()

In [32]:
# loop through the 100 crimes and add each to the incidents feature group
for lat, lng, in zip(df_incidents.Y, df_incidents.X):
    incidents.add_child(
        folium.features.CircleMarker(
            [lat, lng],
            radius=5, # define how big you want the circle markers to be
            color='yellow',
            fill=True,
            fill_color='blue',
            fill_opacity=0.6
        )
    )

In [33]:
# adding incidents to map
sanfran_map.add_child(incidents)

In [34]:
# adding some pop-up text that would get displayed when you hover over a marker. making each marker display the 
# category of the crime when hovered over.

In [35]:
# instantiate a feature group for the incidents in the dataframe
incidents = folium.map.FeatureGroup()

In [36]:
# loop through the 100 crimes and add each to the incidents feature group
for lat, lng, in zip(df_incidents.Y, df_incidents.X):
    incidents.add_child(
        folium.features.CircleMarker(
            [lat, lng],
            radius=5, # define how big you want the circle markers to be
            color='yellow',
            fill=True,
            fill_color='blue',
            fill_opacity=0.6
        )
    )

In [37]:
# adding pop-up text to each marker on the map
latitudes = list(df_incidents.Y)
longitudes = list(df_incidents.X)
labels = list(df_incidents.Category)

for lat, lng, label in zip(latitudes, longitudes, labels):
    folium.Marker([lat, lng], popup=label).add_to(sanfran_map) 

In [38]:
# adding incidents to map
sanfran_map.add_child(incidents)

In [39]:
# adding the text to the circle markers themselves

In [40]:
# creating map and displaying it
sanfran_map = folium.Map(location=[latitude, longitude], zoom_start=12)

In [41]:
# loop through the 100 crimes and add each to the map
for lat, lng, label in zip(df_incidents.Y, df_incidents.X, df_incidents.Category):
    folium.features.CircleMarker(
        [lat, lng],
        radius=5, # define how big you want the circle markers to be
        color='yellow',
        fill=True,
        popup=label,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(sanfran_map)

In [42]:
sanfran_map

In [43]:
# The other proper remedy is to group the markers into different clusters. Each cluster is then represented 
# by the number of crimes in each neighborhood. These clusters can be thought of as pockets of San Francisco 
# which you can then analyze separately.

# To implement this, we start off by instantiating a MarkerCluster object and adding all the data points in 
# the dataframe to this object.

In [44]:
# grouping the markers into different clusters

from folium import plugins

In [45]:
#starting with a clean copy of the map of San Francisco
sanfran_map = folium.Map(location = [latitude, longitude], zoom_start = 12)

In [46]:
# instantiate a mark cluster object for the incidents in the dataframe
incidents = plugins.MarkerCluster().add_to(sanfran_map)

In [47]:
# loop through the dataframe and add each data point to the mark cluster
for lat, lng, label, in zip(df_incidents.Y, df_incidents.X, df_incidents.Category):
    folium.Marker(
        location=[lat, lng],
        icon=None,
        popup=label,
    ).add_to(incidents)

In [48]:
# display map
sanfran_map

In [49]:
# Choropleth Maps
# A Choropleth map is a thematic map in which areas are shaded or patterned in proportion to the measurement 
# of the statistical variable being displayed on the map, such as population density or per-capita income. 
# The choropleth map provides an easy way to visualize how a measurement varies across a geographic area, or it 
# shows the level of variability within a region. Below is a Choropleth map of the US depicting the population by 
# square mile per state




In [50]:
# reading the dataset 
df_can = pd.read_excel('Canada.xlsx', sheet_name='Canada by Citizenship', skiprows=range(20), skipfooter=2, engine='openpyxl')
df_can.head()

Unnamed: 0,Type,Coverage,OdName,AREA,AreaName,REG,RegName,DEV,DevName,1980,...,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
0,Immigrants,Foreigners,Afghanistan,935,Asia,5501,Southern Asia,902,Developing regions,16,...,2978,3436,3009,2652,2111,1746,1758,2203,2635,2004
1,Immigrants,Foreigners,Albania,908,Europe,925,Southern Europe,901,Developed regions,1,...,1450,1223,856,702,560,716,561,539,620,603
2,Immigrants,Foreigners,Algeria,903,Africa,912,Northern Africa,902,Developing regions,80,...,3616,3626,4807,3623,4005,5393,4752,4325,3774,4331
3,Immigrants,Foreigners,American Samoa,909,Oceania,957,Polynesia,902,Developing regions,0,...,0,0,1,0,0,0,0,0,0,0
4,Immigrants,Foreigners,Andorra,908,Europe,925,Southern Europe,901,Developed regions,0,...,0,0,1,1,0,0,0,0,1,1


In [51]:
# printing the dimensions of the dataframe
print(df_can.shape)

(195, 43)


In [52]:
# Clean up data. We will make some modifications to the original dataset to make it easier to create our visualizations.
# cleaning up the dataset to remove unnecessary columns (eg. REG) 
df_can.drop(['AREA','REG','DEV','Type','Coverage'], axis=1, inplace=True)