# Plotting Geospatial Data on a Map

<b> Import the dependencies and load the world_cities_pop.csv dataset </b>

In [1]:
import pandas as pd
import numpy as np
import geoplotlib

In [2]:
df = pd.read_csv('Datasets/world_cities_pop.csv', dtype={'Region':np.str})
df.head()

Unnamed: 0,Country,City,AccentCity,Region,Population,Latitude,Longitude
0,ad,aixas,Aixàs,6,,42.483333,1.466667
1,ad,aixirivali,Aixirivali,6,,42.466667,1.5
2,ad,aixirivall,Aixirivall,6,,42.466667,1.5
3,ad,aixirvall,Aixirvall,6,,42.466667,1.5
4,ad,aixovall,Aixovall,6,,42.466667,1.483333


<b> List all the datatypes that are present in it and verify that they are correct </b>

In [3]:
df.dtypes

Country        object
City           object
AccentCity     object
Region         object
Population    float64
Latitude      float64
Longitude     float64
dtype: object

In [4]:
print(df.shape)

(3173958, 7)


In [5]:
df['lat'] = df['Latitude']
df['lon'] = df['Longitude']

<b> Plot the data points on a dot density plot </b>

In [6]:
geoplotlib.dot(df)
geoplotlib.show()

<b> Use the agg method of pandas to get the average number of cities per country </b>

In [7]:
df.groupby(['Country']).size()

Country
ad       92
ae      446
af    88749
ag      183
ai       42
      ...  
yt      122
za    12693
zm    13027
zr    23012
zw     1341
Length: 234, dtype: int64

In [8]:
df.groupby(['Country']).size().agg('mean')

13563.923076923076

<b> Obtain the number of cities per country (the first 20 entries) and extract the countries that have a population of greater than zero </b>

In [9]:
df_with_pop = df[df['Population'] > 0]
df_with_pop.head()

Unnamed: 0,Country,City,AccentCity,Region,Population,Latitude,Longitude,lat,lon
6,ad,andorra la vella,Andorra la Vella,7,20430.0,42.5,1.516667,42.5,1.516667
20,ad,canillo,Canillo,2,3292.0,42.566667,1.6,42.566667,1.6
32,ad,encamp,Encamp,3,11224.0,42.533333,1.583333,42.533333,1.583333
49,ad,la massana,La Massana,4,7211.0,42.55,1.516667,42.55,1.516667
53,ad,les escaldes,Les Escaldes,8,15854.0,42.5,1.533333,42.5,1.533333


In [10]:
print(df_with_pop.shape)

(47980, 9)


In [11]:
print(f'Total of cities: {df.shape[0]}')
print(f'Cities with population: {df_with_pop.shape[0]}')

Total of cities: 3173958
Cities with population: 47980


<b> Plot the remaining data on a dot plot </b>

In [12]:
geoplotlib.dot(df_with_pop)
geoplotlib.show()

<b> Filter your remaining data for cities with a population of greater than 100,000 </b>

In [13]:
df_with_pop = df_with_pop[df_with_pop['Population'] > 100000]

In [14]:
print(df_with_pop.shape)

(3527, 9)


In [15]:
print(f'There are {df_with_pop.shape[0]} cities with more than 100,000 people')

There are 3527 cities with more than 100,000 people


In [16]:
geoplotlib.dot(df_with_pop)
geoplotlib.show()

<b> To get a better understanding of the density of our data points on the map, use a Voronoi tessellation layer </b>

In [17]:
geoplotlib.voronoi(df_with_pop, cmap='hot_r', max_area=1e3, alpha=255)
geoplotlib.show()

<b> Filter down the data even further to only cities in countries such as Germany and Great Britain </b>

In [18]:
df_europe = df_with_pop[(df_with_pop['Country'] == 'de') | (df_with_pop['Country'] == 'gb')]

In [19]:
print(f'There are {df_europe.shape[0]} cities in Germany or GB with more than 100,000 people')

There are 150 cities in Germany or GB with more than 100,000 people


<b> Use a Delaunay triangulation layer to find the most densely populated areas </b>

In [20]:
geoplotlib.delaunay(df_europe, cmap='hot_r')
geoplotlib.show()

# Visualizing City Density by the First Letter Using an Interactive Custom Layer

<b> Import the dependencies </b>

In [21]:
import numpy as np
import pandas as pd
import geoplotlib

<b> Load the world_cities_pop.csv </b>

In [22]:
dataset = pd.read_csv('Datasets/world_cities_pop.csv', dtype={'Region':np.str})
dataset.head()

Unnamed: 0,Country,City,AccentCity,Region,Population,Latitude,Longitude
0,ad,aixas,Aixàs,6,,42.483333,1.466667
1,ad,aixirivali,Aixirivali,6,,42.466667,1.5
2,ad,aixirivall,Aixirivall,6,,42.466667,1.5
3,ad,aixirvall,Aixirvall,6,,42.466667,1.5
4,ad,aixovall,Aixovall,6,,42.466667,1.483333


<b> Map the Latitude and Longitude columns to lat and lon </b>

In [23]:
dataset['lat'] = dataset['Latitude']
dataset['lon'] = dataset['Longitude']

<b> Filter the dataset to only contain European cities </b>

In [24]:
# 2 letter country codes of europe without russia
europe_country_codes = ['al', 'ad', 'at', 'by', 'be', 'ba', 'bg', 'hr', 'cy', 'cz', 'dk', 'ee', 'fo',
                        'fi', 'fr', 'de', 'gi', 'gr', 'hu', 'is', 'ie', 'im', 'it', 'xk', 'lv', 'li',
                        'lt', 'lu', 'mk', 'mt', 'md', 'mc', 'me', 'nl', 'no', 'pl', 'pt', 'ro', 'sm',
                        'rs', 'sk', 'si', 'es', 'se', 'ch', 'ua', 'gb', 'va']

<b> Compare the length of all data with the filtered data of Europe </b>

In [25]:
europe_dataset = dataset[dataset['Country'].isin(europe_country_codes)]

In [26]:
print('Whole World Data', len(dataset))
print('Europe Data', len(europe_dataset))

Whole World Data 3173958
Europe Data 682348


<b> Filter down the European dataset to get a dataset that only contains cities that start with the letter Z </b>

In [27]:
cities_starting_z = europe_dataset[europe_dataset['AccentCity'].str.startswith('Z')]

In [28]:
print('Cities starting with Z:', len(cities_starting_z))

Cities starting with Z: 13218


<b> Create a dot density plot with a tooltip that shows the country code and the name of the city separated by a - </b>

In [29]:
from geoplotlib.utils import DataAccessObject
geoplotlib_data = DataAccessObject(cities_starting_z)
geoplotlib.dot(geoplotlib_data, f_tooltip=lambda d: '{} - {}'.format(d['Country'].upper(), d['City']).title())
geoplotlib.show()

<b> Create a Voronoi plot with the same dataset that only contains cities that start with Z </b>

In [30]:
geoplotlib.voronoi(cities_starting_z, cmap='Reds_r', max_area=1e5, alpha=50)
geoplotlib.show()

<b> Create a custom layer that plots all the cities in Europe dataset that starts with the provided letter. Make it interactive so that by using the left and right arrow keys, we can switch between the letters </b>

In [31]:
import pyglet
import geoplotlib
from geoplotlib.layers import BaseLayer
from geoplotlib.core import BatchPainter
from geoplotlib.utils import BoundingBox

In [32]:
start_letters = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'H', 'K', 'L',
                 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']

In [33]:
class FilterLayer (BaseLayer):
    def __init__(self, dataset, bbox=BoundingBox.WORLD):
        self.data = dataset
        self.start_letter = 0
        self.view = bbox
        
    def invalidate(self, proj):
        start_letter_data =  self.data[self.data['AccentCity'].str.startswith(start_letters[self.start_letter])]
        
        self.painter = BatchPainter()
        x, y = proj.lonlat_to_screen(start_letter_data['lon'], start_letter_data['lat'])
        self.painter.points(x, y, 2)
        
    def draw(self, proj, mouse_x, mouse_y, ui_manager):  
        self.painter.batch_draw()
        ui_manager.info('Displaying cities starting with {}'.format(start_letters[self.start_letter]))
        
    def on_key_release(self, key, modifiers):
        if key == pyglet.window.key.RIGHT:
            self.start_letter = (self.start_letter + 1) % len(start_letters)
            return True
        elif key == pyglet.window.key.LEFT:
            self.start_letter = (self.start_letter - 1) % len(start_letters)
            return True
            
        return False
        
    # bounding box that gets used when layer is created
    def bbox(self):
        return self.view