In [1]:
import pandas as pd
from hana_ml import dataframe as dfh

In [2]:
hana_cloud_endpoint="8e1a286a-21d7-404d-8d7a-8c77d2a77050.hana.trial-eu10.hanacloud.ondemand.com:443"

In [3]:
hana_cloud_host, hana_cloud_port=hana_cloud_endpoint.split(":")

cchc=dfh.ConnectionContext(port=hana_cloud_port,
                                         address=hana_cloud_host,
                                         user='HANAML',
                                         password='Super$ecr3t!',
                                         encrypt=True
                                        )

In [4]:
cchc.connection.isconnected()

True

## Find the nearest airport

In [5]:
mylat, mylon = (51.1, 16.9)

In [6]:
import requests, json
s = requests.Session()

mylat, mylon = [json.loads
                (
                    s.get("https://ifconfig.co/json").text
                )
                .get(key) for key in ['latitude', 'longitude']
               ]
s.close()

In [7]:
print(mylat, mylon)

51.0791 16.9599


In [8]:
dfh_nearport=(cchc
              .table("PORTS")
              .select("CODE", "DESC", 
                      ('New ST_Point("LON", "LAT").ST_SRID(4326)', "PORT_LOC"),
                      (
                          'New ST_POINT({lon}, {lat}).ST_SRID(4326).ST_Distance(New ST_Point("LON", "LAT").ST_SRID(4326))'
                          .format(lon=mylon, lat=mylat)
                          , "DIST_FROMME"
                      )
                     )
              .sort("DIST_FROMME").
              head(1)
             )

In [9]:
dfh_nearport.select_statement

'SELECT TOP 1 * FROM (SELECT * FROM (SELECT "CODE", "DESC", New ST_Point("LON", "LAT").ST_SRID(4326) AS "PORT_LOC", New ST_POINT(16.9599, 51.0791).ST_SRID(4326).ST_Distance(New ST_Point("LON", "LAT").ST_SRID(4326)) AS "DIST_FROMME" FROM (SELECT * FROM "PORTS") AS "DT_0") AS "DT_2" ORDER BY "DIST_FROMME" ASC) dt'

In [10]:
dfp_nearport=dfh_nearport.collect()
dfp_nearport

Unnamed: 0,CODE,DESC,PORT_LOC,DIST_FROMME
0,WRO,Copernicus Wroclaw Airport,"[1, 1, 0, 0, 0, 0, 0, 0, 192, 195, 226, 48, 64...",5817.630776


In [11]:
dfp_nearport=(cchc
        .sql(
            '''SELECT TOP 1
            CODE, DESC,
            New ST_Point(LON, LAT).ST_SRID(4326) as "PORT_LOC",
            New ST_POINT(16.9599, 51.0791).ST_SRID(4326).ST_Distance(New ST_Point(LON, LAT).ST_SRID(4326)) as "DIST_FROMME"
            FROM "PORTS"
            ORDER BY "DIST_FROMME" ASC
            '''.format(mylon, mylat),
            geo_cols=["PORT_LOC"],
            srid=4326
        )
        .collect())

display(dfp_nearport)

Unnamed: 0,CODE,DESC,PORT_LOC,DIST_FROMME
0,WRO,Copernicus Wroclaw Airport,POINT (16.88579940795898 51.10269927978516),5817.630776


In [12]:
dfp_nearport=(cchc
              .sql(dfh_nearport.select_statement,
                       geo_cols={"PORT_LOC": 4326})
              .collect()
             )
display(dfp_nearport)

Unnamed: 0,CODE,DESC,PORT_LOC,DIST_FROMME
0,WRO,Copernicus Wroclaw Airport,POINT (16.88579940795898 51.10269927978516),5817.630776


## Load input data with a geospatial column included

#### Create DFP with airports only

In [13]:
dfp_nodes=pd.read_csv('https://github.com/krlawrence/graph/raw/master/sample-data/air-routes-latest-nodes.csv')

dfp_ports=(
           dfp_nodes[dfp_nodes['~label'].isin(['airport'])]
           .drop(['~label','type:string','author:string','date:string'], axis=1)
           .convert_dtypes()
          )

dfp_ports.columns=(dfp_ports.columns
                   .str.replace('~','')
                   .str.replace(':.*','')
                   .str.upper()
                  )

#### Persist in SAP HANA Cloud

In [14]:
dfh_ports=dfh.create_dataframe_from_pandas(cchc,
                                           dfp_ports, "PORTS",
                                           geo_cols=[("LON", "LAT")],
                                           srid=4326,
                                           force=True
                                          )

100%|██████████| 1/1 [00:00<00:00,  1.24it/s]


In [15]:
dfh_ports.columns

['ID',
 'CODE',
 'ICAO',
 'DESC',
 'REGION',
 'RUNWAYS',
 'LONGEST',
 'ELEV',
 'COUNTRY',
 'CITY',
 'LAT',
 'LON',
 'POINT_LON_LAT_GEO']

#### Use the new spatial column

In [16]:
dfh_nearport=(cchc
              .table("PORTS")
              .select("CODE", "DESC", 
                      ("POINT_LON_LAT_GEO", "PORT_LOC"),
                      (
                          'New ST_POINT({}, {}).ST_SRID(4326).ST_Distance("POINT_LON_LAT_GEO")'
                          .format(mylon, mylat)
                          , "DIST_FROMME"
                      )
                     )
              .sort("DIST_FROMME").
              head(1)
             )

In [17]:
dfp_nearport=(cchc.sql(dfh_nearport.select_statement,
                       geo_cols={"PORT_LOC": 4326})
        .collect())
display(dfp_nearport)

Unnamed: 0,CODE,DESC,PORT_LOC,DIST_FROMME
0,WRO,Copernicus Wroclaw Airport,POINT (16.885799408 51.10269927979999),5817.630774


## Load data from Esri shapefiles

#### Data is courtesy of https://thematicmapping.org/downloads/world_borders.php

In [18]:
!mkdir -p ./Shapes
!wget https://thematicmapping.org/downloads/TM_WORLD_BORDERS-0.3.zip -O ./Shapes/TM_WORLD_BORDERS-0.3.zip

--2020-11-17 21:49:56--  https://thematicmapping.org/downloads/TM_WORLD_BORDERS-0.3.zip
Resolving thematicmapping.org (thematicmapping.org)... 67.20.120.230
Connecting to thematicmapping.org (thematicmapping.org)|67.20.120.230|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3464707 (3.3M) [application/zip]
Saving to: ‘./Shapes/TM_WORLD_BORDERS-0.3.zip’


2020-11-17 21:50:01 (1006 KB/s) - ‘./Shapes/TM_WORLD_BORDERS-0.3.zip’ saved [3464707/3464707]



In [19]:
dfh_countries = dfh.create_dataframe_from_shapefile(
        connection_context=cchc, 
        shp_file='./Shapes/TM_WORLD_BORDERS-0.3.zip',
        table_name="TM_WORLD_BORDERS",
        geo_cols=["SHAPE"],
        srid=4326
)

In [20]:
dfh_countries.select("ISO2", "NAME", "SHAPE").filter("NAME='Poland'").collect()

Unnamed: 0,ISO2,NAME,SHAPE
0,PL,Poland,"POLYGON ((18.851246 49.517357, 18.968609 49.48..."


In [21]:
(
    dfh_ports.alias("P")
    .join
    (
        dfh_countries.filter("NAME='Poland'").alias("C"),
        condition='"P"."POINT_LON_LAT_GEO".ST_CoveredBy("C"."SHAPE")=1',
        select=["P.*"]
    )
).count()

13