## Task 2: Kepler GL

In [1]:
import pandas as pd
import geopandas as gpd
from keplergl import KeplerGl

In [2]:
#Create a basemap 
m = KeplerGl(height=600, width=800)

#show the map
m

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(height=600)

In [3]:
# Create a Dataframe
df = pd.DataFrame(
    {
        "City": ["Shanghai", "Sao paulo", "Cairo", "London", "Toronto", "Sydney"],
        "Country": ["China", "Brazil", "Egypt", "England", "Canada", "Australia"],
        "Latitude": [31.045556, -23.473293, 30.05, 51.514125, 43.666667, -33.861481],
        "Longitude": [121.399722, -46.665803, 31.25, -0.093689, -79.416667, 151.205475],
        "Population": [14608512, 10021437, 7734602, 7421228, 4612187, 4394585],
    }
)


df

Unnamed: 0,City,Country,Latitude,Longitude,Population
0,Shanghai,China,31.045556,121.399722,14608512
1,Sao paulo,Brazil,-23.473293,-46.665803,10021437
2,Cairo,Egypt,30.05,31.25,7734602
3,London,England,51.514125,-0.093689,7421228
4,Toronto,Canada,43.666667,-79.416667,4612187
5,Sydney,Australia,-33.861481,151.205475,4394585


In [4]:
m.add_data(data=df, name='cities') # Add cities to the map

In [5]:
m

KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [], 'layers': [{'id': 'baaehm', 'type': '…

## Task 3: Customizing Point Map

In [6]:
import pandas as pd
import geopandas as gpd
from keplergl import KeplerGl


In [7]:
# Create A dataframe

df = pd.DataFrame(
    {
        "City": ["Shanghai", "Sao paulo", "Cairo", "London", "Toronto", "Sydney"],
        "Country": ["China", "Brazil", "Egypt", "England", "Canada", "Australia"],
        "Latitude": [31.045556, -23.473293, 30.05, 51.514125, 43.666667, -33.861481],
        "Longitude": [121.399722, -46.665803, 31.25, -0.093689, -79.416667, 151.205475],
        "Population": [14608512, 10021437, 7734602, 7421228, 4612187, 4394585],
    }
)


# Create a Kepler Map
m = KeplerGl(height=600, width=800)

# Add Dataframe to the map
m.add_data(data=df, name='cities')

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


In [8]:
m

KeplerGl(data={'cities': {'index': [0, 1, 2, 3, 4, 5], 'columns': ['City', 'Country', 'Latitude', 'Longitude',…

## Task 4: Adding Geographic Data

In [9]:
import pandas as pd
import geopandas as gpd
from keplergl import KeplerGl

In [10]:
parking_df = pd.read_csv("data/parking_violations_2015.csv")
parking_df.head() # display the first 5 rows

Unnamed: 0,anon_ticket_number,issue_datetime,anon_plate_id,violation_desc,fine,issuing_agency,lat,lon
0,4674379,01/01/2015 00:00,934383,BLOCKNG MASS TRANSIT,101,SEPTA,39.975789,-75.163471
1,4707189,01/01/2015 00:00,1065037,BLOCKNG MASS TRANSIT,101,SEPTA,39.975789,-75.163471
2,4584526,01/01/2015 00:01,1262953,SIDEWALK CC,76,POLICE,39.954927,-75.140262
3,4669046,01/01/2015 00:01,47082,SIDEWALK CC,76,POLICE,39.954927,-75.140262
4,4588341,01/01/2015 00:02,1509569,SIDEWALK CC,76,POLICE,39.954927,-75.140262


In [11]:
parking_df.shape # display the number of rows and columns

(119910, 8)

In [12]:
parking_df.dropna(subset=["lat", "lon"], how='all', inplace=True) # drop rows with NaN values in the lat and lon columns

In [13]:
parking_df.shape

(107710, 8)

In [14]:
# create a GeoDataFrame from the dataframe and the lat and lon columns using the points_from_xy function
gdf = gpd.GeoDataFrame(parking_df, geometry=gpd.points_from_xy(parking_df.lon, parking_df.lat), crs="EPSG:4326") 
gdf.head()

Unnamed: 0,anon_ticket_number,issue_datetime,anon_plate_id,violation_desc,fine,issuing_agency,lat,lon,geometry
0,4674379,01/01/2015 00:00,934383,BLOCKNG MASS TRANSIT,101,SEPTA,39.975789,-75.163471,POINT (-75.16347 39.97579)
1,4707189,01/01/2015 00:00,1065037,BLOCKNG MASS TRANSIT,101,SEPTA,39.975789,-75.163471,POINT (-75.16347 39.97579)
2,4584526,01/01/2015 00:01,1262953,SIDEWALK CC,76,POLICE,39.954927,-75.140262,POINT (-75.14026 39.95493)
3,4669046,01/01/2015 00:01,47082,SIDEWALK CC,76,POLICE,39.954927,-75.140262,POINT (-75.14026 39.95493)
4,4588341,01/01/2015 00:02,1509569,SIDEWALK CC,76,POLICE,39.954927,-75.140262,POINT (-75.14026 39.95493)


In [16]:
gdf.to_file("data/output/parking_2015.geojson",  driver='GeoJSON') # save the GeoDataFrame to a geojson file



In [17]:
m = KeplerGl(height=900) 
m.add_data(data=gdf, name='parking_points') # add the GeoDataFrame to the map
m

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(data={'parking_points': {'index': [0, 1, 2, 3, 4, 5, 7, 8, 10, 12, 13, 14, 15, 17, 20, 21, 22, 23, 24…

## Task 5: 3D Hexagon 

In [20]:
import geopandas as gpd
from keplergl import KeplerGl

In [19]:
gdf = gpd.read_file("data/output/parking_2015.geojson")
gdf.head()

Unnamed: 0,anon_ticket_number,issue_datetime,anon_plate_id,violation_desc,fine,issuing_agency,lat,lon,geometry
0,4674379,01/01/2015 00:00,934383,BLOCKNG MASS TRANSIT,101,SEPTA,39.975789,-75.163471,POINT (-75.16347 39.97579)
1,4707189,01/01/2015 00:00,1065037,BLOCKNG MASS TRANSIT,101,SEPTA,39.975789,-75.163471,POINT (-75.16347 39.97579)
2,4584526,01/01/2015 00:01,1262953,SIDEWALK CC,76,POLICE,39.954927,-75.140262,POINT (-75.14026 39.95493)
3,4669046,01/01/2015 00:01,47082,SIDEWALK CC,76,POLICE,39.954927,-75.140262,POINT (-75.14026 39.95493)
4,4588341,01/01/2015 00:02,1509569,SIDEWALK CC,76,POLICE,39.954927,-75.140262,POINT (-75.14026 39.95493)


In [21]:
m = KeplerGl(height=900)
m.add_data(data=gdf, name='3dHexagon')
m

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(data={'3dHexagon': {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2…

## Task 6: Animation

In [22]:
import geopandas as gpd
import pandas as pd
from keplergl import KeplerGl

In [23]:
df = gpd.read_file("data/output/parking_2015.geojson", parse_dates=True) # read the geojson file
gdf.head()

Unnamed: 0,anon_ticket_number,issue_datetime,anon_plate_id,violation_desc,fine,issuing_agency,lat,lon,geometry
0,4674379,01/01/2015 00:00,934383,BLOCKNG MASS TRANSIT,101,SEPTA,39.975789,-75.163471,POINT (-75.16347 39.97579)
1,4707189,01/01/2015 00:00,1065037,BLOCKNG MASS TRANSIT,101,SEPTA,39.975789,-75.163471,POINT (-75.16347 39.97579)
2,4584526,01/01/2015 00:01,1262953,SIDEWALK CC,76,POLICE,39.954927,-75.140262,POINT (-75.14026 39.95493)
3,4669046,01/01/2015 00:01,47082,SIDEWALK CC,76,POLICE,39.954927,-75.140262,POINT (-75.14026 39.95493)
4,4588341,01/01/2015 00:02,1509569,SIDEWALK CC,76,POLICE,39.954927,-75.140262,POINT (-75.14026 39.95493)


In [24]:
gdf["issue_datetime"] = pd.to_datetime(gdf["issue_datetime"], dayfirst=True) # convert the issue_datetime column to a datetime object


In [25]:
gdf.sample(10) # display the first 10 rows
#difference between sample and head is that sample returns a dataframe and head returns a series

Unnamed: 0,anon_ticket_number,issue_datetime,anon_plate_id,violation_desc,fine,issuing_agency,lat,lon,geometry
11090,4759774,2015-01-05 17:15:00,787573,METER EXPIRED CC,36,PPA,39.944543,-75.172694,POINT (-75.17269 39.94454)
12927,4704527,2015-01-06 12:23:00,702840,METER EXPIRED,26,PPA,39.926228,-75.16775,POINT (-75.16775 39.92623)
90026,4703443,2015-01-28 00:00:00,1289611,METER EXPIRED CC,36,PPA,39.947719,-75.155945,POINT (-75.15594 39.94772)
54362,4523016,2015-01-17 09:44:00,1448828,METER EXPIRED CC,36,PPA,39.962379,-75.172224,POINT (-75.17222 39.96238)
76785,4561323,2015-01-23 09:12:00,1553449,PARKING PROHBITED CC,51,PPA,39.954207,-75.161522,POINT (-75.16152 39.95421)
83393,4654041,2015-01-24 17:43:00,500292,PARKING PROHBITED,41,POLICE,39.978578,-75.225907,POINT (-75.22591 39.97858)
32265,4531553,2015-01-12 10:59:00,1699706,STOPPING PROHIBITED,51,PPA,39.961157,-75.143334,POINT (-75.14333 39.96116)
8751,4671187,2015-01-05 12:00:00,1252553,METER EXPIRED,26,PPA,39.961575,-75.240842,POINT (-75.24084 39.96157)
67379,4787297,2015-01-21 07:19:00,996376,METER EXPIRED,26,PPA,39.960616,-75.141956,POINT (-75.14196 39.96062)
29302,4555002,2015-01-10 17:35:00,204838,HP RESERVED SPACE,301,PPA,39.947416,-75.166411,POINT (-75.16641 39.94742)


In [26]:
m = KeplerGl(height=900)
m.add_data(data=gdf, name='animation')
m

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(data={'animation': {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2…

## Task 7: Choropleth Map

In [2]:
import geopandas as gpd
from keplergl import KeplerGl

In [4]:
neighborhoods = gpd.read_file("data/neighbrhoods.geojson") # read the geojson file
neighborhoods.head()

Unnamed: 0,NAME,LISTNAME,MAPNAME,Shape_Leng,Shape_Area,NUMPOINTS,geometry
0,BRIDESBURG,Bridesburg,Bridesburg,27814.546521,44586260.0,149.0,"MULTIPOLYGON (((2719789.837 256235.538, 271981..."
1,BUSTLETON,Bustleton,Bustleton,48868.458365,114050400.0,228.0,"MULTIPOLYGON (((2733378.171 289259.945, 273281..."
2,CEDARBROOK,Cedarbrook,Cedarbrook,20021.415802,24871740.0,129.0,"MULTIPOLYGON (((2685267.950 279747.336, 268527..."
3,CHESTNUT_HILL,Chestnut Hill,Chestnut Hill,56394.297195,79664980.0,7783.0,"MULTIPOLYGON (((2678490.151 284400.400, 267851..."
4,EAST_FALLS,East Falls,East Falls,27400.776417,40576890.0,3715.0,"MULTIPOLYGON (((2686769.727 263625.367, 268692..."


In [5]:
neighborhoods.crs # display the coordinate reference system

<Projected CRS: EPSG:2272>
Name: NAD83 / Pennsylvania South (ftUS)
Axis Info [cartesian]:
- X[east]: Easting (US survey foot)
- Y[north]: Northing (US survey foot)
Area of Use:
- name: United States (USA) - Pennsylvania - counties of Adams; Allegheny; Armstrong; Beaver; Bedford; Berks; Blair; Bucks; Butler; Cambria; Chester; Cumberland; Dauphin; Delaware; Fayette; Franklin; Fulton; Greene; Huntingdon; Indiana; Juniata; Lancaster; Lawrence; Lebanon; Lehigh; Mifflin; Montgomery; Northampton; Perry; Philadelphia; Schuylkill; Snyder; Somerset; Washington; Westmoreland; York.
- bounds: (-80.53, 39.71, -74.72, 41.18)
Coordinate Operation:
- name: SPCS83 Pennsylvania South zone (US Survey feet)
- method: Lambert Conic Conformal (2SP)
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

In [6]:
neighborhoods = neighborhoods.to_crs("EPSG:4326") #``

In [7]:
m = KeplerGl(height=900)
m.add_data(data=neighborhoods, name='choropleth_map') 
m

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(data={'choropleth_map': {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, …

## Task 8: Spatial Join Preprocessing

In [8]:
import pandas as pd
import geopandas as gpd
from keplergl import KeplerGl

In [9]:
gdf = gpd.read_file("data/output/parking_2015.geojson")
gdf.head()

Unnamed: 0,anon_ticket_number,issue_datetime,anon_plate_id,violation_desc,fine,issuing_agency,lat,lon,geometry
0,4674379,01/01/2015 00:00,934383,BLOCKNG MASS TRANSIT,101,SEPTA,39.975789,-75.163471,POINT (-75.16347 39.97579)
1,4707189,01/01/2015 00:00,1065037,BLOCKNG MASS TRANSIT,101,SEPTA,39.975789,-75.163471,POINT (-75.16347 39.97579)
2,4584526,01/01/2015 00:01,1262953,SIDEWALK CC,76,POLICE,39.954927,-75.140262,POINT (-75.14026 39.95493)
3,4669046,01/01/2015 00:01,47082,SIDEWALK CC,76,POLICE,39.954927,-75.140262,POINT (-75.14026 39.95493)
4,4588341,01/01/2015 00:02,1509569,SIDEWALK CC,76,POLICE,39.954927,-75.140262,POINT (-75.14026 39.95493)


In [10]:
neighborhoods = gpd.read_file("data/neighbrhoods.geojson")
neighborhoods.head()

Unnamed: 0,NAME,LISTNAME,MAPNAME,Shape_Leng,Shape_Area,NUMPOINTS,geometry
0,BRIDESBURG,Bridesburg,Bridesburg,27814.546521,44586260.0,149.0,"MULTIPOLYGON (((2719789.837 256235.538, 271981..."
1,BUSTLETON,Bustleton,Bustleton,48868.458365,114050400.0,228.0,"MULTIPOLYGON (((2733378.171 289259.945, 273281..."
2,CEDARBROOK,Cedarbrook,Cedarbrook,20021.415802,24871740.0,129.0,"MULTIPOLYGON (((2685267.950 279747.336, 268527..."
3,CHESTNUT_HILL,Chestnut Hill,Chestnut Hill,56394.297195,79664980.0,7783.0,"MULTIPOLYGON (((2678490.151 284400.400, 267851..."
4,EAST_FALLS,East Falls,East Falls,27400.776417,40576890.0,3715.0,"MULTIPOLYGON (((2686769.727 263625.367, 268692..."


In [11]:
gdf.crs == neighborhoods.crs

False

In [12]:
neighborhoods = neighborhoods.to_crs("EPSG:4326")

In [13]:
gdf.crs == neighborhoods.crs

True

In [14]:
location_join = gpd.sjoin(gdf, neighborhoods, op="within")

In [15]:
location_join.head()

Unnamed: 0,anon_ticket_number,issue_datetime,anon_plate_id,violation_desc,fine,issuing_agency,lat,lon,geometry,index_right,NAME,LISTNAME,MAPNAME,Shape_Leng,Shape_Area,NUMPOINTS
0,4674379,01/01/2015 00:00,934383,BLOCKNG MASS TRANSIT,101,SEPTA,39.975789,-75.163471,POINT (-75.16347 39.97579),81,NORTH_CENTRAL,North Central,North Central,19020.10092,19847850.0,13938.0
1,4707189,01/01/2015 00:00,1065037,BLOCKNG MASS TRANSIT,101,SEPTA,39.975789,-75.163471,POINT (-75.16347 39.97579),81,NORTH_CENTRAL,North Central,North Central,19020.10092,19847850.0,13938.0
70,4711899,01/01/2015 02:31,462325,BLOCKING DRIVEWAY,51,POLICE,39.977186,-75.163167,POINT (-75.16317 39.97719),81,NORTH_CENTRAL,North Central,North Central,19020.10092,19847850.0,13938.0
80,4821765,01/01/2015 03:50,784437,FIRE HYDRANT,76,POLICE,39.982581,-75.164014,POINT (-75.16401 39.98258),81,NORTH_CENTRAL,North Central,North Central,19020.10092,19847850.0,13938.0
101,4554244,01/01/2015 08:15,634127,SIDEWALK,51,PENN,39.983382,-75.164248,POINT (-75.16425 39.98338),81,NORTH_CENTRAL,North Central,North Central,19020.10092,19847850.0,13938.0


In [16]:
# fill in the missing values in the neighborhood column with the neighborhood name from the neighborhoods GeoDataFrame.
def count_numpoints(joined, polygons, merge_on): # create a function to count the number of points in each neighborhood
    grouped = joined.groupby(merge_on).size() # group the data by the neighborhood column
    df = grouped.to_frame().reset_index() # convert the grouped data to a dataframe
    df.columns = [merge_on, "Count"] # rename the columns
    return polygons.merge(df, on=merge_on, how="outer") # merge the polygons dataframe with the dataframe containing the counts

numpoints_per_neigh = count_numpoints(location_join, neighborhoods, "NAME") # call the function to count the number of points in each neighborhood
numpoints_per_neigh.head()

Unnamed: 0,NAME,LISTNAME,MAPNAME,Shape_Leng,Shape_Area,NUMPOINTS,geometry,Count
0,BRIDESBURG,Bridesburg,Bridesburg,27814.546521,44586260.0,149.0,"MULTIPOLYGON (((-75.06773 40.00540, -75.06765 ...",22.0
1,BUSTLETON,Bustleton,Bustleton,48868.458365,114050400.0,228.0,"MULTIPOLYGON (((-75.01560 40.09487, -75.01768 ...",59.0
2,CEDARBROOK,Cedarbrook,Cedarbrook,20021.415802,24871740.0,129.0,"MULTIPOLYGON (((-75.18848 40.07273, -75.18846 ...",10.0
3,CHESTNUT_HILL,Chestnut Hill,Chestnut Hill,56394.297195,79664980.0,7783.0,"MULTIPOLYGON (((-75.21221 40.08604, -75.21210 ...",716.0
4,EAST_FALLS,East Falls,East Falls,27400.776417,40576890.0,3715.0,"MULTIPOLYGON (((-75.18478 40.02837, -75.18426 ...",553.0


In [17]:
m = KeplerGl(height=900)
m.add_data(data=numpoints_per_neigh, name='choropleth_map')
m

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(data={'choropleth_map': {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, …