# Geospatial Features
In this notebook we will learn about geographic data and how to work with them

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

%matplotlib inline
sns.set(rc={'figure.figsize':(10, 10)}, font_scale=1.2)

### 1) Work with Lat and Long

we will use this library: https://pypi.org/project/geopy/

**1) Measuring Distance**

In [2]:
from geopy.distance import great_circle

my_home = (30.109919, 31.308797) # (lat, long)
my_cafe = (30.120982, 31.322026)

great_circle(my_home, my_cafe).kilometers

1.7698508016026915

***So Lets use it with Our data***

In [3]:
df = pd.read_csv('../dastasets/sendy_logistics.csv')
df.head()

Unnamed: 0,Order No,User Id,Vehicle Type,Platform Type,Personal or Business,Placement - Day of Month,Placement - Weekday (Mo = 1),Placement - Time,Confirmation - Day of Month,Confirmation - Weekday (Mo = 1),...,Arrival at Destination - Time,Distance (KM),Temperature,Precipitation in millimeters,Pickup Lat,Pickup Long,Destination Lat,Destination Long,Rider Id,Time from Pickup to Arrival
0,Order_No_4211,User_Id_633,Bike,3,Business,9,5,9:35:46 AM,9,5,...,10:39:55 AM,4,20.4,,-1.317755,36.83037,-1.300406,36.829741,Rider_Id_432,745
1,Order_No_25375,User_Id_2285,Bike,3,Personal,12,5,11:16:16 AM,12,5,...,12:17:22 PM,16,26.4,,-1.351453,36.899315,-1.295004,36.814358,Rider_Id_856,1993
2,Order_No_1899,User_Id_265,Bike,3,Business,30,2,12:39:25 PM,30,2,...,1:00:38 PM,3,,,-1.308284,36.843419,-1.300921,36.828195,Rider_Id_155,455
3,Order_No_9336,User_Id_1402,Bike,3,Business,15,5,9:25:34 AM,15,5,...,10:05:27 AM,9,19.2,,-1.281301,36.832396,-1.257147,36.795063,Rider_Id_855,1341
4,Order_No_27883,User_Id_1737,Bike,1,Personal,13,1,9:55:18 AM,13,1,...,10:25:37 AM,9,15.4,,-1.266597,36.792118,-1.295041,36.809817,Rider_Id_770,1214


In [4]:
def get_dist(x):
    loc_1 = (x['Pickup Lat'], x['Pickup Long'])
    loc_2 = (x['Destination Lat'], x['Destination Long'])
    return great_circle(loc_1, loc_2).kilometers

df['Dist_Pick_Dest'] = df.apply(get_dist, axis=1)
df.head()

Unnamed: 0,Order No,User Id,Vehicle Type,Platform Type,Personal or Business,Placement - Day of Month,Placement - Weekday (Mo = 1),Placement - Time,Confirmation - Day of Month,Confirmation - Weekday (Mo = 1),...,Distance (KM),Temperature,Precipitation in millimeters,Pickup Lat,Pickup Long,Destination Lat,Destination Long,Rider Id,Time from Pickup to Arrival,Dist_Pick_Dest
0,Order_No_4211,User_Id_633,Bike,3,Business,9,5,9:35:46 AM,9,5,...,4,20.4,,-1.317755,36.83037,-1.300406,36.829741,Rider_Id_432,745,1.930336
1,Order_No_25375,User_Id_2285,Bike,3,Personal,12,5,11:16:16 AM,12,5,...,16,26.4,,-1.351453,36.899315,-1.295004,36.814358,Rider_Id_856,1993,11.339865
2,Order_No_1899,User_Id_265,Bike,3,Business,30,2,12:39:25 PM,30,2,...,3,,,-1.308284,36.843419,-1.300921,36.828195,Rider_Id_155,455,1.880081
3,Order_No_9336,User_Id_1402,Bike,3,Business,15,5,9:25:34 AM,15,5,...,9,19.2,,-1.281301,36.832396,-1.257147,36.795063,Rider_Id_855,1341,4.943465
4,Order_No_27883,User_Id_1737,Bike,1,Personal,13,1,9:55:18 AM,13,1,...,9,15.4,,-1.266597,36.792118,-1.295041,36.809817,Rider_Id_770,1214,3.724834


### 2) Geocoding features to get the address from lat, long or reverse.

we will use this library: https://pypi.org/project/geopy/

In [5]:
from geopy.geocoders import Nominatim

geolocator = Nominatim(user_agent="Data Science course")

In [6]:
location = geolocator.geocode("Cairo Festival, Cairo, Egypt")
location.raw

{'place_id': 182256806,
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
 'osm_type': 'way',
 'osm_id': 391188133,
 'boundingbox': ['29.9788956', '29.9793241', '31.3509681', '31.3514603'],
 'lat': '29.979109899999997',
 'lon': '31.351214207251118',
 'display_name': 'Malvern College Egypt, B2-B3, الطريق الدائري, المشتل, القاهرة, محافظة القاهرة, 11771, مصر',
 'class': 'building',
 'type': 'garage',
 'importance': 0.32419316294807804}

In [7]:
location.raw['lat'], location.raw['lon']

('29.979109899999997', '31.351214207251118')

In [8]:
location = geolocator.reverse("29.981650417493046, 31.428900499194086")
location.raw

{'place_id': 169546911,
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
 'osm_type': 'way',
 'osm_id': 338687680,
 'lat': '29.981712255603348',
 'lon': '31.428962900166752',
 'display_name': 'التجمع الثالث, مدينة القاهرة الجديدة, محافظة القاهرة, 11835, مصر',
 'address': {'neighbourhood': 'التجمع الثالث',
  'city': 'مدينة القاهرة الجديدة',
  'state': 'محافظة القاهرة',
  'postcode': '11835',
  'country': 'مصر',
  'country_code': 'eg'},
 'boundingbox': ['29.9785046', '29.9825297', '31.4280647', '31.4292666']}

In [9]:
location.raw['address']['country']

'مصر'

In [10]:
location.raw['address']['city']

'مدينة القاهرة الجديدة'

***So Lets use it with Our data***

In [11]:
df = pd.read_csv('../dastasets/sendy_logistics.csv').sample(20)
df.head()

Unnamed: 0,Order No,User Id,Vehicle Type,Platform Type,Personal or Business,Placement - Day of Month,Placement - Weekday (Mo = 1),Placement - Time,Confirmation - Day of Month,Confirmation - Weekday (Mo = 1),...,Arrival at Destination - Time,Distance (KM),Temperature,Precipitation in millimeters,Pickup Lat,Pickup Long,Destination Lat,Destination Long,Rider Id,Time from Pickup to Arrival
7766,Order_No_26997,User_Id_3201,Bike,3,Business,12,2,2:27:41 PM,12,2,...,2:53:50 PM,2,26.9,,-1.255189,36.782203,-1.258414,36.8048,Rider_Id_647,342
10038,Order_No_21267,User_Id_2701,Bike,3,Business,24,5,11:31:27 AM,24,5,...,12:11:01 PM,4,21.3,,-1.276574,36.851365,-1.290894,36.822971,Rider_Id_338,976
430,Order_No_13783,User_Id_2330,Bike,1,Business,6,6,4:30:11 PM,6,6,...,5:23:31 PM,12,25.3,,-1.292,36.904002,-1.228432,36.875706,Rider_Id_397,673
1237,Order_No_2372,User_Id_1500,Bike,3,Business,28,4,12:56:43 PM,28,4,...,1:35:26 PM,8,27.3,,-1.300406,36.829741,-1.263818,36.793006,Rider_Id_953,17
553,Order_No_4854,User_Id_2946,Bike,2,Personal,18,3,12:39:16 PM,18,3,...,1:13:24 PM,8,19.8,,-1.274966,36.765878,-1.298346,36.814,Rider_Id_634,1051


In [12]:
def get_city(x):
    location = geolocator.reverse(f"{x['Pickup Lat']}, {x['Pickup Long']}")
    return location.raw['address']['city']

df['Country'] = df.apply(get_city, axis=1)
df.head()

Unnamed: 0,Order No,User Id,Vehicle Type,Platform Type,Personal or Business,Placement - Day of Month,Placement - Weekday (Mo = 1),Placement - Time,Confirmation - Day of Month,Confirmation - Weekday (Mo = 1),...,Distance (KM),Temperature,Precipitation in millimeters,Pickup Lat,Pickup Long,Destination Lat,Destination Long,Rider Id,Time from Pickup to Arrival,Country
7766,Order_No_26997,User_Id_3201,Bike,3,Business,12,2,2:27:41 PM,12,2,...,2,26.9,,-1.255189,36.782203,-1.258414,36.8048,Rider_Id_647,342,Nairobi
10038,Order_No_21267,User_Id_2701,Bike,3,Business,24,5,11:31:27 AM,24,5,...,4,21.3,,-1.276574,36.851365,-1.290894,36.822971,Rider_Id_338,976,Nairobi
430,Order_No_13783,User_Id_2330,Bike,1,Business,6,6,4:30:11 PM,6,6,...,12,25.3,,-1.292,36.904002,-1.228432,36.875706,Rider_Id_397,673,Nairobi
1237,Order_No_2372,User_Id_1500,Bike,3,Business,28,4,12:56:43 PM,28,4,...,8,27.3,,-1.300406,36.829741,-1.263818,36.793006,Rider_Id_953,17,Nairobi
553,Order_No_4854,User_Id_2946,Bike,2,Personal,18,3,12:39:16 PM,18,3,...,8,19.8,,-1.274966,36.765878,-1.298346,36.814,Rider_Id_634,1051,Nairobi


In [13]:
df['Country'].value_counts()

Nairobi                      19
Central Business District     1
Name: Country, dtype: int64

# Great Work!