# Exercise 1.3 - Polling for real-time vehicle positions
data is pulled from [https://gtfs.adelaidemetro.com.au/v1/realtime/vehicle_positions](https://gtfs.adelaidemetro.com.au/v1/realtime/vehicle_positions)


In [1]:
# import required libraries
from google.transit import gtfs_realtime_pb2
import pandas as pd
import requests
from protobuf_to_dict import protobuf_to_dict
import hana_ml
from hana_ml.dataframe import ConnectionContext
from hana_ml.dataframe import create_dataframe_from_pandas

In [2]:
# Connect to SAP HANA Cloud
host = '[YourHostName]'
port = 443
user = '[YourUser]'
passwd = '[YourUserPassword]'
cc= ConnectionContext(address=host, port=port, user=user, password=passwd, encrypt='true' ,sslValidateCertificate='false')

schema="TECHED_USER_000"
print('HANA version:', cc.hana_version())
print('hana-ml version:', hana_ml.__version__)
print('pandas version:', pd.__version__)

HANA version: 4.00.000.00.1663064250 (fa/CE2022.30)
hana-ml version: 2.14.22092300
pandas version: 1.2.5


In [3]:
# function that get vehicle positions and returns data as pandas dataframe
def get_locations_as_df() -> pd.DataFrame:
    feed = gtfs_realtime_pb2.FeedMessage()
    response = requests.get('https://gtfs.adelaidemetro.com.au/v1/realtime/vehicle_positions')
    feed.ParseFromString(response.content)
    dict = protobuf_to_dict(feed)
    df = pd.json_normalize(dict['entity'])
    df['vehicle.timestamp'] = pd.to_datetime(df['vehicle.timestamp'], unit='s')
    return df

In [4]:
# inspect the data
df = get_locations_as_df()
df.head(3)

Unnamed: 0,id,vehicle.trip.trip_id,vehicle.trip.start_date,vehicle.trip.schedule_relationship,vehicle.trip.route_id,vehicle.trip.direction_id,vehicle.position.latitude,vehicle.position.longitude,vehicle.position.bearing,vehicle.position.speed,vehicle.timestamp,vehicle.vehicle.id,vehicle.vehicle.label
0,V550114101,550114,20221014,0,GLNELG,0,-34.927074,138.599747,4.32,6.8,2022-10-14 12:41:15,101,101
1,V6522671026,652267,20221014,0,H22,0,-34.924629,138.59375,87.0,0.0,2022-10-14 12:41:07,1026,1026
2,V7260941027,726094,20221014,0,281,0,-34.920921,138.611359,261.079987,9.2,2022-10-14 12:41:10,1027,1027


In [5]:
# function that calls get_locations_as_df() and stores the data in HANA tables
def store_locations():
    df = get_locations_as_df()
    hdf_locations_history = create_dataframe_from_pandas(
        connection_context=cc,
        pandas_df=df, 
        geo_cols=[("vehicle.position.longitude", "vehicle.position.latitude")], srid=4326,
        schema=schema,
        table_name='LOC_RT_HISTORY', 
        force=False, append=True
    )
    hdf_locations = create_dataframe_from_pandas(
        connection_context=cc,
        pandas_df=df, 
        geo_cols=[("vehicle.position.longitude", "vehicle.position.latitude")], srid=4326,
        schema=schema,
        table_name='LOC_RT', 
        primary_key=['id','trip_id','route_id'],
        force=True
    )


In [8]:
# try
store_locations()

100%|██████████| 1/1 [00:00<00:00, 13.01it/s]
100%|██████████| 1/1 [00:00<00:00, 10.77it/s]


In [6]:
# periodically import vehicle position data
import time
while True:
  store_locations()
  time.sleep(15)


100%|██████████| 1/1 [00:00<00:00, 12.03it/s]
100%|██████████| 1/1 [00:00<00:00, 13.41it/s]
100%|██████████| 1/1 [00:00<00:00, 19.23it/s]
100%|██████████| 1/1 [00:00<00:00, 13.89it/s]
100%|██████████| 1/1 [00:00<00:00, 16.67it/s]
100%|██████████| 1/1 [00:00<00:00, 15.38it/s]
100%|██████████| 1/1 [00:00<00:00, 15.87it/s]
100%|██████████| 1/1 [00:00<00:00,  9.89it/s]
100%|██████████| 1/1 [00:00<00:00, 12.97it/s]
100%|██████████| 1/1 [00:00<00:00, 15.69it/s]
100%|██████████| 1/1 [00:00<00:00, 16.13it/s]
100%|██████████| 1/1 [00:00<00:00, 14.69it/s]
100%|██████████| 1/1 [00:00<00:00, 16.12it/s]
100%|██████████| 1/1 [00:00<00:00, 14.10it/s]
100%|██████████| 1/1 [00:00<00:00, 18.84it/s]
100%|██████████| 1/1 [00:00<00:00, 15.99it/s]
100%|██████████| 1/1 [00:00<00:00, 16.73it/s]
100%|██████████| 1/1 [00:00<00:00, 11.29it/s]
100%|██████████| 1/1 [00:00<00:00, 18.51it/s]
100%|██████████| 1/1 [00:00<00:00, 11.91it/s]
100%|██████████| 1/1 [00:00<00:00,  7.75it/s]
100%|██████████| 1/1 [00:00<00:00,

KeyboardInterrupt: 

In [15]:

feed = gtfs_realtime_pb2.FeedMessage()
#response = requests.get('https://bct.tmix.se/gtfs-realtime/vehicleupdates.pb?operatorIds=12')
#response = requests.get('http://www.myridebarrie.ca/gtfs/GTFS_VehiclePositions.pb')
#response = requests.get('http://comox.mapstrat.com/current/gtfrealtime_VehiclePositions.bin')
#response = requests.get('https://ridemvgo.org/gtfs-rt/vehiclepositions')
response = requests.get('https://gtfs.adelaidemetro.com.au/v1/realtime/vehicle_positions')
feed.ParseFromString(response.content)



53644

In [80]:
#dct = dict['entity'][0]['vehicle']['position']
#for ind in feed.entity
#    print(ind)
#data = {entity.id: [entity.id, entity.vehicle.trip.trip_id, entity.vehicle.trip.direction_id, entity.vehicle.position.longitude, entity.vehicle.position.latitude, entity.vehicle.timestamp, entity.vehicle.vehicle.id]}
#df = pd.DataFrame.from_dict(data, orient='index', columns=['id', 'trip_id', 'direciton_id', 'lon', 'lat', 'timestamp', 'vehicle_id'])
#df

In [None]:
for entity in feed.entity:
  #if entity.HasField('trip_update'):
    print (entity.id, entity.vehicle.trip.trip_id, entity.vehicle.trip.direction_id, entity.vehicle.position.longitude, entity.vehicle.position.latitude, entity.vehicle.timestamp, entity.vehicle.vehicle.id)

In [25]:
#feed.entity[0].vehicle.position
#for ent in feed.entity:
#    print(ent.id, ent)
dict = protobuf_to_dict(feed)
#for key in dict:
#    print(key, '-', dict[key])
    #dict['entity']
df = pd.json_normalize(dict['entity'])
df['vehicle.timestamp'] = pd.to_datetime(df['vehicle.timestamp'], unit='s')
df


Unnamed: 0,id,vehicle.trip.trip_id,vehicle.trip.start_date,vehicle.trip.schedule_relationship,vehicle.trip.route_id,vehicle.trip.direction_id,vehicle.position.latitude,vehicle.position.longitude,vehicle.position.bearing,vehicle.position.speed,vehicle.timestamp,vehicle.vehicle.id,vehicle.vehicle.label
0,V6051371002,605137,20220930,0,401,0,-34.762089,138.642471,184.910004,6.2,2022-09-30 08:43:04,1002,1002
1,V6462491005,646249,20220930,0,147,1,-34.927505,138.612076,176.350006,7.2,2022-09-30 08:43:09,1005,1005
2,V758368101,758368,20220930,0,GLNELG,1,-34.978870,138.531250,250.460007,8.4,2022-09-30 08:43:17,101,101
3,V6887081011,688708,20220930,0,G10,1,-34.849319,138.591446,177.289993,0.0,2022-09-30 08:43:13,1011,1011
4,V6882021013,688202,20220930,0,190B,1,-34.914875,138.598831,169.550003,5.0,2022-09-30 08:43:14,1013,1013
...,...,...,...,...,...,...,...,...,...,...,...,...,...
560,V605680845,605680,20220930,0,451,1,-34.715363,138.671417,3.430000,6.3,2022-09-30 08:43:07,845,845
561,V750716851,750716,20220930,0,224,1,-34.885277,138.602707,356.660004,16.4,2022-09-30 08:43:13,851,851
562,V688817959,688817,20220930,0,G10A,1,-34.989666,138.592712,174.440002,0.0,2022-09-30 08:43:16,959,959
563,V688981961,688981,20220930,0,G22X,1,-34.926598,138.599884,181.380005,0.0,2022-09-30 08:43:08,961,961


In [26]:
# upsert?
from hana_ml.dataframe import create_dataframe_from_pandas
hdf_trips = create_dataframe_from_pandas(
    connection_context=cc,
    pandas_df=df, 
    schema='TECHED_USER_000',
    table_name='LOC_RT_UPSERT_TEST', 
    force=True
    )


100%|██████████| 1/1 [00:00<00:00,  8.84it/s]


In [28]:
cc.upsert_streams_data(schema = 'TECHED_USER_000', 
    table_name="LOC_RT_UPSERT_TEST", 
    key="id", 
    data={"id":'V6051371002', "vehicle.trip.trip_id":'1'}
)

ProgrammingError: (0, ':vehicle is not set')

In [17]:
from hana_ml.dataframe import create_dataframe_from_pandas
hdf_trips = create_dataframe_from_pandas(
    connection_context=cc,
    pandas_df=df, 
    geo_cols=[("vehicle.position.longitude", "vehicle.position.latitude")], srid=4326,
    schema='TECHED_USER_000',
    table_name='LOC_RT_UPSERT', 
    #force=False, append=True
    upsert=True
    )

100%|██████████| 1/1 [00:00<00:00,  7.41it/s]


In [48]:
#dict['entity'][0]['vehicle']['position']

{'latitude': -35.17271041870117,
 'longitude': 138.4895782470703,
 'bearing': 357.70001220703125,
 'speed': 0.0}

In [26]:
coll = create_collection_from_elements(
    connection_context = cc,
    schema = 'TECHED_USER_000',
    collection_name = 'BUSRTPB',
    elements = dict['entity'],
    drop_exist_coll=True
    )