# Geospatial Features
In this notebook we will learn about geographic data and how to work with them

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

%matplotlib inline
sns.set(rc={'figure.figsize':(10, 10)}, font_scale=1.2)

### 1) Work with Lat and Long

we will use this library: https://pypi.org/project/geopy/

**1) Measuring Distance**

In [4]:
from geopy.distance import great_circle

my_home = (30.109919, 31.308797) # (lat, long)
my_cafe = (30.120982, 31.322026)

great_circle(my_home, my_cafe).kilometers  # miles

1.7698508016026915

***So Lets use it with Our data***

In [3]:
df = pd.read_csv('../dastasets/sendy_logistics.csv')
df.head()

Unnamed: 0,Order No,User Id,Vehicle Type,Platform Type,Personal or Business,Placement - Day of Month,Placement - Weekday (Mo = 1),Placement - Time,Confirmation - Day of Month,Confirmation - Weekday (Mo = 1),...,Arrival at Destination - Time,Distance (KM),Temperature,Precipitation in millimeters,Pickup Lat,Pickup Long,Destination Lat,Destination Long,Rider Id,Time from Pickup to Arrival
0,Order_No_4211,User_Id_633,Bike,3,Business,9,5,9:35:46 AM,9,5,...,10:39:55 AM,4,20.4,,-1.317755,36.83037,-1.300406,36.829741,Rider_Id_432,745
1,Order_No_25375,User_Id_2285,Bike,3,Personal,12,5,11:16:16 AM,12,5,...,12:17:22 PM,16,26.4,,-1.351453,36.899315,-1.295004,36.814358,Rider_Id_856,1993
2,Order_No_1899,User_Id_265,Bike,3,Business,30,2,12:39:25 PM,30,2,...,1:00:38 PM,3,,,-1.308284,36.843419,-1.300921,36.828195,Rider_Id_155,455
3,Order_No_9336,User_Id_1402,Bike,3,Business,15,5,9:25:34 AM,15,5,...,10:05:27 AM,9,19.2,,-1.281301,36.832396,-1.257147,36.795063,Rider_Id_855,1341
4,Order_No_27883,User_Id_1737,Bike,1,Personal,13,1,9:55:18 AM,13,1,...,10:25:37 AM,9,15.4,,-1.266597,36.792118,-1.295041,36.809817,Rider_Id_770,1214


In [4]:
def get_dist(x):
    loc_1 = (x['Pickup Lat'], x['Pickup Long'])
    loc_2 = (x['Destination Lat'], x['Destination Long'])
    return great_circle(loc_1, loc_2).kilometers

df['Dist_Pick_Dest'] = df.apply(get_dist, axis=1)
df.head()

Unnamed: 0,Order No,User Id,Vehicle Type,Platform Type,Personal or Business,Placement - Day of Month,Placement - Weekday (Mo = 1),Placement - Time,Confirmation - Day of Month,Confirmation - Weekday (Mo = 1),...,Distance (KM),Temperature,Precipitation in millimeters,Pickup Lat,Pickup Long,Destination Lat,Destination Long,Rider Id,Time from Pickup to Arrival,Dist_Pick_Dest
0,Order_No_4211,User_Id_633,Bike,3,Business,9,5,9:35:46 AM,9,5,...,4,20.4,,-1.317755,36.83037,-1.300406,36.829741,Rider_Id_432,745,1.930336
1,Order_No_25375,User_Id_2285,Bike,3,Personal,12,5,11:16:16 AM,12,5,...,16,26.4,,-1.351453,36.899315,-1.295004,36.814358,Rider_Id_856,1993,11.339865
2,Order_No_1899,User_Id_265,Bike,3,Business,30,2,12:39:25 PM,30,2,...,3,,,-1.308284,36.843419,-1.300921,36.828195,Rider_Id_155,455,1.880081
3,Order_No_9336,User_Id_1402,Bike,3,Business,15,5,9:25:34 AM,15,5,...,9,19.2,,-1.281301,36.832396,-1.257147,36.795063,Rider_Id_855,1341,4.943465
4,Order_No_27883,User_Id_1737,Bike,1,Personal,13,1,9:55:18 AM,13,1,...,9,15.4,,-1.266597,36.792118,-1.295041,36.809817,Rider_Id_770,1214,3.724834


### 2) Geocoding features to get the address from lat, long or reverse.

we will use this library: https://pypi.org/project/geopy/

In [6]:
from geopy.geocoders import Nominatim

geolocator = Nominatim(user_agent="Data Science course")

In [6]:
location = geolocator.geocode("Cairo Festival, Cairo, Egypt")
location.raw

{'place_id': 182256806,
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
 'osm_type': 'way',
 'osm_id': 391188133,
 'boundingbox': ['29.9788956', '29.9793241', '31.3509681', '31.3514603'],
 'lat': '29.979109899999997',
 'lon': '31.351214207251118',
 'display_name': 'Malvern College Egypt, B2-B3, الطريق الدائري, المشتل, القاهرة, محافظة القاهرة, 11771, مصر',
 'class': 'building',
 'type': 'garage',
 'importance': 0.32419316294807804}

In [7]:
location.raw['lat'], location.raw['lon']

('29.979109899999997', '31.351214207251118')

In [8]:
location = geolocator.reverse("29.981650417493046, 31.428900499194086")
location.raw

{'place_id': 169546911,
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
 'osm_type': 'way',
 'osm_id': 338687680,
 'lat': '29.981712255603348',
 'lon': '31.428962900166752',
 'display_name': 'التجمع الثالث, مدينة القاهرة الجديدة, محافظة القاهرة, 11835, مصر',
 'address': {'neighbourhood': 'التجمع الثالث',
  'city': 'مدينة القاهرة الجديدة',
  'state': 'محافظة القاهرة',
  'postcode': '11835',
  'country': 'مصر',
  'country_code': 'eg'},
 'boundingbox': ['29.9785046', '29.9825297', '31.4280647', '31.4292666']}

In [9]:
location.raw['address']['country']

'مصر'

In [10]:
location.raw['address']['city']

'مدينة القاهرة الجديدة'

***So Lets use it with Our data***

In [11]:
df = pd.read_csv('../dastasets/sendy_logistics.csv').sample(200)
df.head()

Unnamed: 0,Order No,User Id,Vehicle Type,Platform Type,Personal or Business,Placement - Day of Month,Placement - Weekday (Mo = 1),Placement - Time,Confirmation - Day of Month,Confirmation - Weekday (Mo = 1),...,Arrival at Destination - Time,Distance (KM),Temperature,Precipitation in millimeters,Pickup Lat,Pickup Long,Destination Lat,Destination Long,Rider Id,Time from Pickup to Arrival
19799,Order_No_3576,User_Id_478,Bike,3,Business,29,3,11:50:59 AM,29,3,...,12:48:59 PM,7,18.3,,-1.292419,36.802439,-1.257147,36.795063,Rider_Id_217,349
12458,Order_No_11757,User_Id_3599,Bike,3,Business,10,3,11:42:10 AM,10,3,...,12:54:41 PM,18,28.0,,-1.3303,36.870793,-1.232337,36.864817,Rider_Id_855,2790
8960,Order_No_12795,User_Id_3689,Bike,3,Business,20,4,9:28:23 AM,20,4,...,9:49:05 AM,1,18.5,0.3,-1.258414,36.8048,-1.252796,36.800313,Rider_Id_56,249
9307,Order_No_1193,User_Id_391,Bike,3,Business,24,1,1:03:59 PM,24,1,...,2:00:36 PM,13,22.7,,-1.286745,36.825902,-1.205495,36.898595,Rider_Id_726,1734
6296,Order_No_10072,User_Id_22,Bike,2,Personal,8,3,9:08:51 AM,8,3,...,10:09:36 AM,7,17.4,,-1.311233,36.814632,-1.276489,36.818078,Rider_Id_700,1784


In [12]:
def get_city(x):
    try:
        location = geolocator.reverse(f"{x['Pickup Lat']}, {x['Pickup Long']}")
        return location.raw['address']['city']
    except:
        return np.nan

df['City'] = df.apply(get_city, axis=1)
df.head()

Unnamed: 0,Order No,User Id,Vehicle Type,Platform Type,Personal or Business,Placement - Day of Month,Placement - Weekday (Mo = 1),Placement - Time,Confirmation - Day of Month,Confirmation - Weekday (Mo = 1),...,Distance (KM),Temperature,Precipitation in millimeters,Pickup Lat,Pickup Long,Destination Lat,Destination Long,Rider Id,Time from Pickup to Arrival,City
19799,Order_No_3576,User_Id_478,Bike,3,Business,29,3,11:50:59 AM,29,3,...,7,18.3,,-1.292419,36.802439,-1.257147,36.795063,Rider_Id_217,349,Nairobi
12458,Order_No_11757,User_Id_3599,Bike,3,Business,10,3,11:42:10 AM,10,3,...,18,28.0,,-1.3303,36.870793,-1.232337,36.864817,Rider_Id_855,2790,Central Business District
8960,Order_No_12795,User_Id_3689,Bike,3,Business,20,4,9:28:23 AM,20,4,...,1,18.5,0.3,-1.258414,36.8048,-1.252796,36.800313,Rider_Id_56,249,Nairobi
9307,Order_No_1193,User_Id_391,Bike,3,Business,24,1,1:03:59 PM,24,1,...,13,22.7,,-1.286745,36.825902,-1.205495,36.898595,Rider_Id_726,1734,Central Business District
6296,Order_No_10072,User_Id_22,Bike,2,Personal,8,3,9:08:51 AM,8,3,...,7,17.4,,-1.311233,36.814632,-1.276489,36.818078,Rider_Id_700,1784,Nairobi


In [13]:
df['City'].value_counts()

Nairobi                      191
Central Business District      8
Name: City, dtype: int64

# Great Work!