# Geo Visualisation


In [108]:
import pandas as pd
import numpy as np
import plotly.express as px
import folium as fl
from folium.plugins import MarkerCluster

In [109]:
pd.set_option('display.max_columns', None)

In [110]:
# load the excel file 
df = pd.read_excel('Canada.xlsx' , sheet_name='Canada by Citizenship', skiprows=20,skipfooter=2 )
df.head()

Unnamed: 0,Type,Coverage,OdName,AREA,AreaName,REG,RegName,DEV,DevName,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
0,Immigrants,Foreigners,Afghanistan,935,Asia,5501,Southern Asia,902,Developing regions,16,39,39,47,71,340,496,741,828,1076,1028,1378,1170,713,858,1537,2212,2555,1999,2395,3326,4067,3697,3479,2978,3436,3009,2652,2111,1746,1758,2203,2635,2004
1,Immigrants,Foreigners,Albania,908,Europe,925,Southern Europe,901,Developed regions,1,0,0,0,0,0,1,2,2,3,3,21,56,96,71,63,113,307,574,1264,1816,1602,1021,853,1450,1223,856,702,560,716,561,539,620,603
2,Immigrants,Foreigners,Algeria,903,Africa,912,Northern Africa,902,Developing regions,80,67,71,69,63,44,69,132,242,434,491,872,795,717,595,1106,2054,1842,2292,2389,2867,3418,3406,3072,3616,3626,4807,3623,4005,5393,4752,4325,3774,4331
3,Immigrants,Foreigners,American Samoa,909,Oceania,957,Polynesia,902,Developing regions,0,1,0,0,0,0,0,1,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
4,Immigrants,Foreigners,Andorra,908,Europe,925,Southern Europe,901,Developed regions,0,0,0,0,0,0,2,0,0,0,3,0,1,0,0,0,0,0,2,0,0,1,0,2,0,0,1,1,0,0,0,0,1,1


In [111]:
# clean the dataset
cols_to_drop = ['Type','Coverage','AREA','REG','DEV']
df.drop(columns=cols_to_drop, inplace=True) # remove unnecessary columns
df.rename(mapper={
      'OdName': 'Country',
      'AreaName': 'Continent',
      'DevName': 'Status',
      'Regname': 'Region',
}, axis=1, inplace=True)

# set the country as index 
df.set_index('Country',inplace=True)  # since the Country Columns have unique value that why its made as an index Columns 
years = list(range(1980,2014))

# add the total column
df['Total'] = df[years].sum(axis=1)
# Add the totasl columns 
df.head()

Unnamed: 0_level_0,Continent,RegName,Status,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,Total
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
Afghanistan,Asia,Southern Asia,Developing regions,16,39,39,47,71,340,496,741,828,1076,1028,1378,1170,713,858,1537,2212,2555,1999,2395,3326,4067,3697,3479,2978,3436,3009,2652,2111,1746,1758,2203,2635,2004,58639
Albania,Europe,Southern Europe,Developed regions,1,0,0,0,0,0,1,2,2,3,3,21,56,96,71,63,113,307,574,1264,1816,1602,1021,853,1450,1223,856,702,560,716,561,539,620,603,15699
Algeria,Africa,Northern Africa,Developing regions,80,67,71,69,63,44,69,132,242,434,491,872,795,717,595,1106,2054,1842,2292,2389,2867,3418,3406,3072,3616,3626,4807,3623,4005,5393,4752,4325,3774,4331,69439
American Samoa,Oceania,Polynesia,Developing regions,0,1,0,0,0,0,0,1,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,6
Andorra,Europe,Southern Europe,Developed regions,0,0,0,0,0,0,2,0,0,0,3,0,1,0,0,0,0,0,2,0,0,1,0,2,0,0,1,1,0,0,0,0,1,1,15


# ChoroPleth Map


In [112]:
px.choropleth(
      data_frame=df,
      locationmode= 'country names',
      locations=df.index, # here index is Country Form our Data Frame
      projection='natural earth',
      color='Total',
      hover_name=df.Continent,
      height=600,
      color_continuous_scale='temps'
)

In [113]:
# px.choropleth?


# Map Markers and Clusters

In [114]:
incdf = pd.read_csv('incidents_small.csv')
incdf.head()


Columns (1,2,3,4,5,6,7,8,11) have mixed types. Specify dtype option on import or set low_memory=False.



Unnamed: 0,IncidntNum,Category,Descript,DayOfWeek,Date,Time,PdDistrict,Resolution,Address,X,Y,Location,PdId
0,120058272.0,WEAPON LAWS,POSS OF PROHIBITED WEAPON,Friday,01/29/2016 12:00:00 AM,11:00,SOUTHERN,"ARREST, BOOKED",800 Block of BRYANT ST,-122.403405,37.775421,"(37.775420706711, -122.403404791479)",12005800000000.0
1,120058272.0,WEAPON LAWS,"FIREARM, LOADED, IN VEHICLE, POSSESSION OR USE",Friday,01/29/2016 12:00:00 AM,11:00,SOUTHERN,"ARREST, BOOKED",800 Block of BRYANT ST,-122.403405,37.775421,"(37.775420706711, -122.403404791479)",12005800000000.0
2,141059263.0,WARRANTS,WARRANT ARREST,Monday,04/25/2016 12:00:00 AM,14:59,BAYVIEW,"ARREST, BOOKED",KEITH ST / SHAFTER AV,-122.388856,37.729981,"(37.7299809672996, -122.388856204292)",14105900000000.0
3,160013662.0,NON-CRIMINAL,LOST PROPERTY,Tuesday,01-05-2016 00:00,23:50,TENDERLOIN,NONE,JONES ST / OFARRELL ST,-122.412971,37.785788,"(37.7857883766888, -122.412970537591)",16001400000000.0
4,160002740.0,NON-CRIMINAL,LOST PROPERTY,Friday,01-01-2016 00:00,00:30,MISSION,NONE,16TH ST / MISSION ST,-122.419672,37.76505,"(37.7650501214668, -122.419671780296)",16000300000000.0


In [115]:
fl.Map(tiles='Stamen Terrain', zoom_start=2)

In [116]:
fl.Map(tiles='Stamen Toner', zoom_start=2)

In [117]:
fl.Map(tiles='Stamen Watercolor')

In [118]:
fl.Map(tiles='CartoDB Positron', zoom_start=2)

In [119]:
incdf.dropna(inplace=True)

In [120]:
print(incdf.columns.to_list())

['IncidntNum', 'Category', 'Descript', 'DayOfWeek', 'Date', 'Time', 'PdDistrict', 'Resolution', 'Address', 'X', 'Y', 'Location', 'PdId']


In [121]:
map = fl.Map(location=(37.7749,-122.4194),zoom_start=12)
s = incdf.head(100).copy()
for lat, lng, incident in zip(s.Y,s.X,s.Category):
      fl.Marker([lat,lng],popup=incident).add_to(map)
map


In [122]:
map = fl.Map(location=(37.7749,-122.4194),zoom_start=12)
s = incdf.head(100).copy()
for lat, lng, incident in zip(s.Y,s.X,s.Category):
      fl.CircleMarker([lat,lng],radius=5,
      color= 'red', fill=True ,
      fill_color = 'black',fill_opacity=0.6,
      popup=incident).add_to(map)
map


In [123]:
map = fl.Map(location=(37.7749,-122.4194),zoom_start=12)

marker_cluster = MarkerCluster().add_child(map)

for lat, lng, incident in zip(incdf.Y,incdf.X,incdf.Category):
      fl.Marker([lat,lng],popup=incident).add_to(marker_cluster)
map


AttributeError: 'MarkerCluster' object has no attribute '_repr_html_'

<folium.folium.Map at 0x1f31a555340>