# Annotating tourism activities

Load the necessary libraries.

In [1]:
import json
import os
from concurrent.futures import ThreadPoolExecutor

import datablox_od
import geopandas as gpd
import numpy as np
import pandas as pd

pd.set_option("display.max_rows", 5)

%load_ext autoreload
%autoreload 2

Folder names

In [2]:
SAMPLE_DATA_DIRECTORY = os.path.join("..", "sample_data")
SAMPLE_OUTPUT_DIRECTORY = os.path.join("..", "sample_output")

STAYPOINTS_DIRECTORY = os.path.join(SAMPLE_OUTPUT_DIRECTORY, "tourism", "staypoints")
TOURISM_TRIPS_DIRECTORY = os.path.join(
    SAMPLE_OUTPUT_DIRECTORY, "tourism", "tourism_trips"
)
ANNOTATED_TOURISM_TRIPS_DIRECTORY = os.path.join(
    SAMPLE_OUTPUT_DIRECTORY, "tourism", "annotated_tourism_trips"
)
os.makedirs(ANNOTATED_TOURISM_TRIPS_DIRECTORY, exist_ok=True)

Constants

In [3]:
PROJECTED_CRS = 32647
WGS84_EPSG = 4326

# We define "nearby" as being within a 2-km radius
NEARBY_IN_METERS = 2000

Throughout this tutorial, we will be using DataBlox-OD's `datablox_od.preprocessing.get_nearby_places()` to associate the staypoints in each tourism trip with nearby main tourist attractions, communities, and tourism industries.

<hr>

## I. Main tourist attractions

We refer to the list of tourist attractions from the [website of the Tourism Authority of Thailand](https://www.tourismthailand.org/Destinations).

In [4]:
with open(
    os.path.join(SAMPLE_DATA_DIRECTORY, "tourism", "tourist_attraction_categories.json")
) as f:
    typology = json.load(f)


def classify_tourist_attraction(tourist_attraction_types):
    """
    Classify the tourist attractions into four categories: entertainment, nature, cultural, and services.
    """
    attraction_categories = set()
    for tourist_attraction_type in tourist_attraction_types.split("/"):
        tourist_attraction_type = tourist_attraction_type.strip()
        if tourist_attraction_type in typology:
            attraction_categories.add(typology[tourist_attraction_type])

    return attraction_categories


def process_main_tourist_attractions():
    main_tourist_attractions = pd.read_parquet(
        os.path.join(
            SAMPLE_DATA_DIRECTORY, "tourism", "thailand_tourist_attractions.parquet"
        )
    )

    main_tourist_attractions = (
        gpd.GeoDataFrame(
            main_tourist_attractions,
            geometry=gpd.points_from_xy(
                main_tourist_attractions["longitude"],
                main_tourist_attractions["latitude"],
            ),
            crs=WGS84_EPSG,
        )
        .drop(["longitude", "latitude"], axis=1)
        .reset_index()
    )

    # For the purposes of this tutorial, we exclude tourist attractions with type "Other"
    # We also exclude those with type "Suburban Living" since we are handling communities separately
    main_tourist_attractions = main_tourist_attractions[
        ~main_tourist_attractions["type"].isin(["Other", "Suburban Living"])
    ]
    main_tourist_attractions["category"] = main_tourist_attractions["type"].apply(
        classify_tourist_attraction
    )

    return main_tourist_attractions


def annotate_tourism_trips_with_main_tourist_attractions(
    tourism_trips_file,
    main_tourist_attractions,
):
    # Skip statistics files
    if "statistics" in tourism_trips_file:
        return

    main_tourist_attractions = main_tourist_attractions[
        ["attraction", "geometry", "type", "category"]
    ].rename(
        {
            "attraction": "nearby_main_tourist_attractions",
            "type": "nearby_main_tourist_attraction_types",
            "category": "nearby_main_tourist_attraction_categories",
        },
        axis=1,
    )

    tourism_trip = gpd.read_parquet(
        os.path.join(TOURISM_TRIPS_DIRECTORY, tourism_trips_file)
    )

    tourism_trip = datablox_od.preprocessing.get_nearby_places(
        tourism_trip,
        main_tourist_attractions,
        distance_threshold=NEARBY_IN_METERS,
        projected_crs=PROJECTED_CRS,
    )

    tourism_trip.to_parquet(
        os.path.join(ANNOTATED_TOURISM_TRIPS_DIRECTORY, tourism_trips_file)
    )

    return tourism_trip

In [5]:
main_tourist_attractions = process_main_tourist_attractions()
main_tourist_attractions

Unnamed: 0,attraction,url,address,type,geometry,category
0,The Grand Palace,https://www.tourismthailand.org/Attraction/the...,Phra Borom Maha Ratchawang Phra Nakhon Bangkok...,Historical Sites & Monuments/Royal Palace/Temple,POINT (100.49129 13.74983),{Cultural}
1,Mon Cham,https://www.tourismthailand.org/Attraction/mon...,Mae Raem Mae Rim Chiang Mai 50180,Mountain (Doi),POINT (98.8228 18.93591),{Nature}
...,...,...,...,...,...,...
3457,Phu Sang National Park,https://www.tourismthailand.org/Attraction/phu...,Moo 10 Thung Kluai Phu Sang Phayao 56110,National Parks & Marine Reserves/Green Travel,POINT (100.41191 19.61597),{Nature}
3460,Wat Wang Khanai Thayikaram Hot Well,https://www.tourismthailand.org/Attraction/wat...,Wang Khanai Tha Muang Kanchanaburi 71110,Hot Spring,POINT (99.64955 13.95123),{Nature}


Just for illustrative purposes, we annotate the tourism trips of the device with ID `C8AF1957-7F70-421E-AB7D-817C5F9BA71E`.

In [6]:
sample = annotate_tourism_trips_with_main_tourist_attractions(
    "C8AF1957-7F70-421E-AB7D-817C5F9BA71E.parquet",
    main_tourist_attractions,
)
sample[
    sample["nearby_main_tourist_attractions"].apply(
        lambda x: x != [np.nan] and x != [None]
    )
]

Unnamed: 0_level_0,time_arrive,time_depart,num_pings_in_staypoint,num_minutes_stayed,geometry,device_id,h3_cell,ADM1_EN,ADM2_EN,ADM3_EN,is_staypoint_in_district_of_residence,distance_from_origin_in_km,nearby_main_tourist_attractions,nearby_main_tourist_attraction_types,nearby_main_tourist_attraction_categories
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
5,2023-08-08 17:57:47,2023-08-10 06:57:52,219,2220.083333,POINT (100.32378 13.81158),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,8864a4a013fffff,Nonthaburi,Nonthaburi#Bang Kruai,Nonthaburi#Bang Kruai#Sala Klang,True,0.000000,"[The Salaya, Kanchanapisek Non-Formal Educatio...","[Shopping/Museums/Art & Craft Centres/River, C...","[{Entertainment, Services, Nature, Cultural}, ..."
5,2023-08-10 07:02:37,2023-08-10 08:30:55,16,88.300000,POINT (100.58301 13.72792),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,8864a48489fffff,Bangkok,Bangkok#Vadhana,Bangkok#Vadhana#Khlong Tan Nuea,False,29.522016,"[Stanley Mini Venture, Science Center For Educ...","[Art Galleries , Educational museums, Sports V...","[{Cultural}, {Cultural}, {Nature, Entertainmen..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,2024-01-22 10:25:04,2024-01-22 15:52:16,33,327.200000,POINT (100.32374 13.81153),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,8864a4a013fffff,Nonthaburi,Nonthaburi#Bang Kruai,Nonthaburi#Bang Kruai#Sala Klang,True,0.000000,"[The Salaya, Kanchanapisek Non-Formal Educatio...","[Shopping/Museums/Art & Craft Centres/River, C...","[{Entertainment, Services, Nature, Cultural}, ..."
71,2024-01-22 21:40:43,2024-01-23 09:15:34,34,694.850000,POINT (100.32376 13.81156),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,8864a4a013fffff,Nonthaburi,Nonthaburi#Bang Kruai,Nonthaburi#Bang Kruai#Sala Klang,True,0.003369,"[The Salaya, Kanchanapisek Non-Formal Educatio...","[Shopping/Museums/Art & Craft Centres/River, C...","[{Entertainment, Services, Nature, Cultural}, ..."


For the actual tutorial, we are going to annotate the tourism trips taken by all the devices in our dataset. To speed this up, we employ multithreading using Python's built-in [`ThreadPoolExecutor`](https://docs.python.org/3/library/concurrent.futures.html).

In [7]:
# Adjust max_workers depending on your machine's available resources
with ThreadPoolExecutor(max_workers=2048) as executor:
    _ = [
        executor.submit(
            annotate_tourism_trips_with_main_tourist_attractions,
            file,
            main_tourist_attractions,
        )
        for file in os.listdir(TOURISM_TRIPS_DIRECTORY)
    ]

<hr>

## II. Communities

We refer to the list of communities from the Tourism Authority of Thailand.

Since we do not have the exact coordinates of the communities, we associate the staypoints with the communities in their respective subdistricts.

In [8]:
def process_communities():
    communities = pd.read_csv(
        os.path.join(SAMPLE_DATA_DIRECTORY, "tourism", "thailand_communities.csv")
    ).rename(
        {
            "Subdistrict": "ADM3_EN",
            "District": "ADM2_EN",
            "Province": "ADM1_EN",
            "Community": "community",
        },
        axis=1,
    )

    communities["ADM2_EN"] = communities["ADM1_EN"] + "#" + communities["ADM2_EN"]
    communities["ADM3_EN"] = communities["ADM2_EN"] + "#" + communities["ADM3_EN"]

    return communities


def annotate_tourism_trips_with_communities(
    tourism_trips_file,
    communities,
):
    # Skip statistics files
    if "statistics" in tourism_trips_file:
        return

    communities = communities[["community", "ADM3_EN"]]

    # Create a unique index for each staypoint
    tourism_trip_orig = gpd.read_parquet(
        os.path.join(ANNOTATED_TOURISM_TRIPS_DIRECTORY, tourism_trips_file)
    ).reset_index()

    tourism_trip = pd.merge(tourism_trip_orig, communities, how="left").rename(
        {"community": "communities_in_subdistrict"},
        axis=1,
    )
    tourism_trip = tourism_trip.groupby(tourism_trip.index)[
        ["communities_in_subdistrict"]
    ].agg(list)

    tourism_trip = tourism_trip_orig.join(tourism_trip).set_index("trip_id")
    tourism_trip.to_parquet(
        os.path.join(ANNOTATED_TOURISM_TRIPS_DIRECTORY, tourism_trips_file)
    )

    return tourism_trip

In [9]:
communities = process_communities()
communities

Unnamed: 0,Region,community,ADM3_EN,ADM2_EN,ADM1_EN,Reference
0,Northern,Ontai Community,Chiang Mai#San Kamphaeng#On Tai,Chiang Mai#San Kamphaeng,Chiang Mai,https://www.businesseventsthailand.com/en/supp...
1,Northern,Ban Rai Kong Khing community,Chiang Mai#Hang Dong#Nong Khwai,Chiang Mai#Hang Dong,Chiang Mai,https://7greens.tourismthailand.org/en/2020/08...
...,...,...,...,...,...,...
41,Southern,Bang Rong Community,Phuket#Thalang#Pa Khlok,Phuket#Thalang,Phuket,https://visitandamanlocal.com/provinces/destin...
42,Southern,Ban Wang Hon Community,Nakhon Si Thammarat#Cha-Uat#Wang Ang,Nakhon Si Thammarat#Cha-Uat,Nakhon Si Thammarat,https://www.tourismthailand.org/Attraction/ban...


Just for illustrative purposes, we annotate the tourism trips of the device with ID `C8AF1957-7F70-421E-AB7D-817C5F9BA71E`.

In [10]:
sample = annotate_tourism_trips_with_communities(
    "C8AF1957-7F70-421E-AB7D-817C5F9BA71E.parquet",
    communities,
)
sample[
    sample["communities_in_subdistrict"].apply(lambda x: x != [np.nan] and x != [None])
]

Unnamed: 0_level_0,time_arrive,time_depart,num_pings_in_staypoint,num_minutes_stayed,geometry,device_id,h3_cell,ADM1_EN,ADM2_EN,ADM3_EN,is_staypoint_in_district_of_residence,distance_from_origin_in_km,nearby_main_tourist_attractions,nearby_main_tourist_attraction_types,nearby_main_tourist_attraction_categories,communities_in_subdistrict
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
28,2023-10-06 08:30:42,2023-10-06 09:07:07,22,36.416667,POINT (99.99068 13.4223),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,88658b673dfffff,Samut Songkhram,Samut Songkhram#Mueang Samut Songkhram,Samut Songkhram#Mueang Samut Songkhram#Ban Prok,False,56.157152,[Wat Ban Laem (Wat Phet Samut Worawihan)],[Temple],[[Cultural]],[Ban Rim Klong Homestay]
29,2023-10-08 11:00:29,2023-10-08 11:42:49,10,42.333333,POINT (99.6978 12.92731),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,88658b3491fffff,Phetchaburi,Phetchaburi#Kaeng Krachan,Phetchaburi#Kaeng Krachan#Kaeng Krachan,False,119.025993,[None],[None],[None],[Ban Thamsua Community]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29,2023-10-09 09:11:17,2023-10-09 10:08:00,53,56.716667,POINT (99.64734 12.90825),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,886416c95bfffff,Phetchaburi,Phetchaburi#Kaeng Krachan,Phetchaburi#Kaeng Krachan#Kaeng Krachan,False,123.918340,[None],[None],[None],[Ban Thamsua Community]
30,2023-10-10 06:18:10,2023-10-10 06:49:47,4,31.616667,POINT (99.65283 12.89677),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,886416c865fffff,Phetchaburi,Phetchaburi#Kaeng Krachan,Phetchaburi#Kaeng Krachan#Kaeng Krachan,False,124.598334,[None],[None],[None],[Ban Thamsua Community]


For the actual tutorial, we are going to annotate the tourism trips taken by all the devices in our dataset. To speed this up, we again employ multithreading using Python's built-in [`ThreadPoolExecutor`](https://docs.python.org/3/library/concurrent.futures.html).

In [11]:
# Adjust max_workers depending on your machine's available resources
with ThreadPoolExecutor(max_workers=2048) as executor:
    _ = [
        executor.submit(
            annotate_tourism_trips_with_communities,
            file,
            communities,
        )
        for file in os.listdir(ANNOTATED_TOURISM_TRIPS_DIRECTORY)
    ]

<hr>

## III. Tourism industries

We identify nearby tourism-related infrastructures and establishments, based on a subset of the tourism industries listed in Annex 3 of the United Nations' [_International Recommendation for Tourism Statistics 2008_](https://unstats.un.org/unsd/publication/seriesm/seriesm_83rev1e.pdf).

To this end, we refer to maps from the [Humanitarian OpenStreetMap Team](https://data.humdata.org/organization/hot). For the purposes of this tutorial, we forgo preprocessing the maps and performing data quality checks; we use the maps as is.

In [12]:
def process_openstreetmap(file):
    osm = gpd.read_file(
        os.path.join(SAMPLE_DATA_DIRECTORY, "tourism", "openstreetmap", file)
    )
    osm["osm_id"] = osm["osm_id"].astype("category")
    return osm

### Transportation

First, we identify nearby sea ports based on this [map](https://data.humdata.org/dataset/hotosm_tha_sea_ports).

In [13]:
def annotate_tourism_trips_with_seaports(
    tourism_trips_file,
    seaports,
):
    # Skip statistics files
    if "statistics" in tourism_trips_file:
        return

    seaports = seaports[
        ["osm_id", "name", "name:en", "name:th", "geometry", "amenity"]
    ].rename(
        {
            "osm_id": "nearby_seaport_osm_ids",
            "name": "nearby_seaport_names",
            "name:en": "nearby_seaport_names_en",
            "name:th": "nearby_seaport_names_th",
            "amenity": "nearby_seaport_types",
        },
        axis=1,
    )

    tourism_trip = gpd.read_parquet(
        os.path.join(ANNOTATED_TOURISM_TRIPS_DIRECTORY, tourism_trips_file)
    )

    tourism_trip = datablox_od.preprocessing.get_nearby_places(
        tourism_trip,
        seaports,
        distance_threshold=NEARBY_IN_METERS,
        projected_crs=PROJECTED_CRS,
    )

    tourism_trip.to_parquet(
        os.path.join(ANNOTATED_TOURISM_TRIPS_DIRECTORY, tourism_trips_file)
    )

    return tourism_trip

In [14]:
seaports = process_openstreetmap("hotosm_tha_sea_ports_polygons_geojson.geojson")
seaports

Unnamed: 0,name,name:en,amenity,building,port,operator:type,addr:full,addr:city,source,name:th,osm_id,osm_type,geometry
0,Chiang Sean,,ferry_terminal,yes,,,,,,,1010325033,ways_poly,"POLYGON ((100.09004 20.2673, 100.0901 20.2672,..."
1,Din Deng Noi FERRY,,ferry_terminal,yes,,,,,,,686512115,ways_poly,"POLYGON ((98.74882 8.122, 98.74883 8.1218, 98...."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,Chedi Pier,,ferry_terminal,yes,,,,,,,29976999,ways_poly,"POLYGON ((100.58677 13.59921, 100.5868 13.5992..."
10,ທ່າເຮືອສາມຫຼ່ຽມຄຳ 金三角码头,Golden Triangle Port,ferry_terminal,yes,,,,,,,1350362672,ways_poly,"POLYGON ((100.08605 20.35466, 100.08605 20.354..."


In [15]:
sample = annotate_tourism_trips_with_seaports(
    "24920418-326B-4091-A94B-C3C726B3771F.parquet",
    seaports,
)
sample[sample["nearby_seaport_osm_ids"].apply(lambda x: not np.isnan(x[0]))]

Unnamed: 0_level_0,time_arrive,time_depart,num_pings_in_staypoint,num_minutes_stayed,geometry,device_id,h3_cell,ADM1_EN,ADM2_EN,ADM3_EN,...,distance_from_origin_in_km,nearby_main_tourist_attractions,nearby_main_tourist_attraction_types,nearby_main_tourist_attraction_categories,communities_in_subdistrict,nearby_seaport_osm_ids,nearby_seaport_names,nearby_seaport_names_en,nearby_seaport_names_th,nearby_seaport_types
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
6,2023-12-06 13:25:18,2023-12-06 16:53:45,42,208.45,POINT (100.59712 13.59933),24920418-326B-4091-A94B-C3C726B3771F,88658b4823fffff,Samut Prakan,Samut Prakan#Mueang Samut Prakan,Samut Prakan#Mueang Samut Prakan#Pak Nam,...,0.0,"[Phi Suea Samut Fortress, Wat Phichai Songkhra...","[Museums, Historical Sites & Monuments/Temple,...","[[Cultural], [Cultural], [Cultural], [Cultural...",[None],[29976999],[Chedi Pier],[None],[None],[ferry_terminal]
6,2023-12-07 14:37:32,2023-12-07 17:12:53,15,155.35,POINT (100.59677 13.60096),24920418-326B-4091-A94B-C3C726B3771F,88658b49c9fffff,Samut Prakan,Samut Prakan#Mueang Samut Prakan,Samut Prakan#Mueang Samut Prakan#Pak Nam,...,0.184793,"[Phi Suea Samut Fortress, Wat Phichai Songkhra...","[Museums, Historical Sites & Monuments/Temple,...","[[Cultural], [Cultural], [Cultural], [Cultural...",[None],[29976999],[Chedi Pier],[None],[None],[ferry_terminal]
29,2024-01-28 15:35:17,2024-01-28 16:10:31,5,35.233333,POINT (100.59705 13.60097),24920418-326B-4091-A94B-C3C726B3771F,88658b49c9fffff,Samut Prakan,Samut Prakan#Mueang Samut Prakan,Samut Prakan#Mueang Samut Prakan#Pak Nam,...,2.514819,"[Phi Suea Samut Fortress, Wat Phichai Songkhra...","[Museums, Historical Sites & Monuments/Temple,...","[[Cultural], [Cultural], [Cultural], [Cultural...",[None],[29976999],[Chedi Pier],[None],[None],[ferry_terminal]


In [16]:
# Adjust max_workers depending on your machine's available resources
with ThreadPoolExecutor(max_workers=2048) as executor:
    _ = [
        executor.submit(
            annotate_tourism_trips_with_seaports,
            file,
            seaports,
        )
        for file in os.listdir(ANNOTATED_TOURISM_TRIPS_DIRECTORY)
    ]

Next, we identify nearby airports based on this [map](https://data.humdata.org/dataset/hotosm_tha_airports).

In [17]:
airports = process_openstreetmap("hotosm_tha_airports_polygons_geojson.geojson")
airports

Unnamed: 0,name,name:en,aeroway,building,emergency,emergency:helipad,operator:type,capacity:persons,addr:full,addr:city,source,name:th,osm_id,osm_type,geometry
0,,,terminal,yes,,,,,,,,,310109679,ways_poly,"POLYGON ((99.10216 18.67672, 99.10216 18.67665..."
1,,,apron,,,,,,,,,,564448340,ways_poly,"POLYGON ((99.94036 8.53438, 99.94109 8.5343, 9..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
668,ลานบินเกาะช้าง,Airfield Ko Chang,aerodrome,,,,,,,,,ลานบินเกาะช้าง,7061956,relations,"POLYGON ((102.29464 12.05044, 102.29517 12.050..."
669,ท่าอากาศยานขอนแก่น,Khon Kaen Airport,aerodrome,,,,,,,,,ท่าอากาศยานขอนแก่น,1442532,relations,"POLYGON ((102.77448 16.45496, 102.77525 16.454..."


In [18]:
def annotate_tourism_trips_with_airports(
    tourism_trips_file,
    airports,
):
    # Skip statistics files
    if "statistics" in tourism_trips_file:
        return

    airports = airports[
        ["osm_id", "name", "name:en", "name:th", "geometry", "aeroway"]
    ].rename(
        {
            "osm_id": "nearby_airport_osm_ids",
            "name": "nearby_airport_names",
            "name:en": "nearby_airport_names_en",
            "name:th": "nearby_airport_names_th",
            "aeroway": "nearby_airport_types",
        },
        axis=1,
    )

    tourism_trip = gpd.read_parquet(
        os.path.join(ANNOTATED_TOURISM_TRIPS_DIRECTORY, tourism_trips_file)
    )

    tourism_trip = datablox_od.preprocessing.get_nearby_places(
        tourism_trip,
        airports,
        distance_threshold=NEARBY_IN_METERS,
        projected_crs=PROJECTED_CRS,
    )

    tourism_trip.to_parquet(
        os.path.join(ANNOTATED_TOURISM_TRIPS_DIRECTORY, tourism_trips_file)
    )

    return tourism_trip

In [19]:
sample = annotate_tourism_trips_with_airports(
    "02D5C053-F289-42BF-82CE-BB585958ABB3.parquet",
    airports,
)
sample[sample["nearby_airport_osm_ids"].apply(lambda x: not np.isnan(x[0]))]

Unnamed: 0_level_0,time_arrive,time_depart,num_pings_in_staypoint,num_minutes_stayed,geometry,device_id,h3_cell,ADM1_EN,ADM2_EN,ADM3_EN,...,nearby_seaport_osm_ids,nearby_seaport_names,nearby_seaport_names_en,nearby_seaport_names_th,nearby_seaport_types,nearby_airport_osm_ids,nearby_airport_names,nearby_airport_names_en,nearby_airport_names_th,nearby_airport_types
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,2023-10-12 04:57:50,2023-10-12 06:40:59,35,103.15,POINT (100.7549 13.6909),02D5C053-F289-42BF-82CE-BB585958ABB3,8864a495adfffff,Samut Prakan,Samut Prakan#Bang Phli,Samut Prakan#Bang Phli#Nong Prue,...,[None],[None],[None],[None],[None],"[388844064, 1253121854, 763466015, 90990291, 1...","[None, None, อาคารเทียบเครื่องบินรองหลังที่ 1,...","[None, None, Satellite Terminal 1, None, None,...","[None, None, อาคารเทียบเครื่องบินรองหลังที่ 1,...","[helipad, apron, terminal, apron, runway, apro..."
1,2023-10-12 08:31:40,2023-10-12 09:02:17,14,30.616667,POINT (100.3941 6.93655),02D5C053-F289-42BF-82CE-BB585958ABB3,8865183951fffff,Songkhla,Songkhla#Hat Yai,Songkhla#Hat Yai#Thung Tam Sao,...,[None],[None],[None],[None],[None],"[1041194858, 206157754, 1041192243, 814830511,...","[ท่าอากาศยานนานาชาติหาดใหญ่, None, None, HATYA...","[Hat Yai International Airport, None, None, No...","[ท่าอากาศยานนานาชาติหาดใหญ่, None, None, None,...","[aerodrome, apron, apron, navigationaid, apron..."
1,2023-10-13 18:05:25,2023-10-13 18:46:56,55,41.516667,POINT (100.5908 7.2017),02D5C053-F289-42BF-82CE-BB585958ABB3,8865187ac3fffff,Songkhla,Songkhla#Mueang Songkhla,Songkhla#Mueang Songkhla#Bo Yang,...,[None],[None],[None],[None],[None],"[988784908, 86055088, 530317963]","[ท่าอากาศยานสงขลา, None, None]","[Songkhla Airport, None, None]","[ท่าอากาศยานสงขลา, None, None]","[aerodrome, runway, apron]"


In [20]:
# Adjust max_workers depending on your machine's available resources
with ThreadPoolExecutor(max_workers=2048) as executor:
    _ = [
        executor.submit(
            annotate_tourism_trips_with_airports,
            file,
            airports,
        )
        for file in os.listdir(ANNOTATED_TOURISM_TRIPS_DIRECTORY)
    ]

Lastly, we identify railways based on this [map](https://data.humdata.org/dataset/hotosm_tha_railways).

In [21]:
def annotate_tourism_trips_with_railways(
    tourism_trips_file,
    railways,
):
    # Skip statistics files
    if "statistics" in tourism_trips_file:
        return

    railways = railways[
        ["osm_id", "name", "name:en", "name:th", "geometry", "railway"]
    ].rename(
        {
            "osm_id": "nearby_railway_osm_ids",
            "name": "nearby_railway_names",
            "name:en": "nearby_railway_names_en",
            "name:th": "nearby_railway_names_th",
            "railway": "nearby_railway_types",
        },
        axis=1,
    )

    tourism_trip = gpd.read_parquet(
        os.path.join(ANNOTATED_TOURISM_TRIPS_DIRECTORY, tourism_trips_file)
    )

    tourism_trip = datablox_od.preprocessing.get_nearby_places(
        tourism_trip,
        railways,
        distance_threshold=NEARBY_IN_METERS,
        projected_crs=PROJECTED_CRS,
    )

    tourism_trip.to_parquet(
        os.path.join(ANNOTATED_TOURISM_TRIPS_DIRECTORY, tourism_trips_file)
    )

    return tourism_trip

In [22]:
railways = process_openstreetmap("hotosm_tha_railways_lines_geojson.geojson")
railways

Unnamed: 0,name,name:en,railway,ele,operator:type,layer,addr:full,addr:city,source,name:th,osm_id,osm_type,geometry
0,,,rail,,,,,,,,952687468,ways_line,"LINESTRING (99.95995 8.43732, 99.9599 8.43803,..."
1,,,rail,,,,,,,,952687470,ways_line,"LINESTRING (99.95997 8.43717, 99.95988 8.43758..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3209,,,rail,,,,,,,,1210513275,ways_line,"LINESTRING (100.60227 13.98916, 100.60198 13.9..."
3210,ทางรถไฟสายเหนือ/ตะวันออกเฉียงเหนือ,,rail,,,,,,GPS,,1210513282,ways_line,"LINESTRING (100.60203 13.99192, 100.60209 13.9..."


In [23]:
sample = annotate_tourism_trips_with_railways(
    "C8AF1957-7F70-421E-AB7D-817C5F9BA71E.parquet",
    railways,
)
sample[sample["nearby_railway_osm_ids"].apply(lambda x: not np.isnan(x[0]))]

Unnamed: 0_level_0,time_arrive,time_depart,num_pings_in_staypoint,num_minutes_stayed,geometry,device_id,h3_cell,ADM1_EN,ADM2_EN,ADM3_EN,...,nearby_airport_osm_ids,nearby_airport_names,nearby_airport_names_en,nearby_airport_names_th,nearby_airport_types,nearby_railway_osm_ids,nearby_railway_names,nearby_railway_names_en,nearby_railway_names_th,nearby_railway_types
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5,2023-08-08 17:57:47,2023-08-10 06:57:52,219,2220.083333,POINT (100.32378 13.81158),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,8864a4a013fffff,Nonthaburi,Nonthaburi#Bang Kruai,Nonthaburi#Bang Kruai#Sala Klang,...,[nan],[None],[None],[None],[None],"[891436025, 869051522, 891436021, 891436023, 8...","[สายใต้, สายใต้, สายใต้, สายใต้, สายใต้, สายใต...","[Southern Line, Southern Line, Southern Line, ...","[สายใต้, สายใต้, สายใต้, สายใต้, สายใต้, สายใต...","[rail, rail, rail, rail, rail, rail, rail, rai..."
5,2023-08-10 07:02:37,2023-08-10 08:30:55,16,88.300000,POINT (100.58301 13.72792),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,8864a48489fffff,Bangkok,Bangkok#Vadhana,Bangkok#Vadhana#Khlong Tan Nuea,...,"[326785809.0, 324275286.0]","[None, None]","[None, None]","[None, None]","[helipad, helipad]","[156966523, 210643824, 210645284, 210643840, 2...","[สายท่าเรือ, None, None, None, None, None, Non...","[None, None, None, None, None, None, None, Non...","[สายท่าเรือ, None, None, None, None, None, Non...","[rail, rail, rail, rail, rail, rail, rail, rai..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,2024-01-22 16:15:58,2024-01-22 16:46:47,18,30.816667,POINT (100.48246 13.78123),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,8864a4bad1fffff,Bangkok,Bangkok#Bang Phlat,Bangkok#Bang Phlat#Bang Bamru,...,[nan],[None],[None],[None],[None],"[971385290, 199808433, 971385293, 199808436, 1...","[None, สายใต้เดิม, None, สายใต้เดิม, สายใต้เดิ...","[None, Former Southern Line, None, Former Sout...","[None, สายใต้เดิม, None, สายใต้เดิม, สายใต้เดิ...","[rail, rail, rail, rail, rail, rail, rail, rai..."
71,2024-01-22 21:40:43,2024-01-23 09:15:34,34,694.850000,POINT (100.32376 13.81156),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,8864a4a013fffff,Nonthaburi,Nonthaburi#Bang Kruai,Nonthaburi#Bang Kruai#Sala Klang,...,[nan],[None],[None],[None],[None],"[891436025, 869051522, 891436021, 891436023, 8...","[สายใต้, สายใต้, สายใต้, สายใต้, สายใต้, สายใต...","[Southern Line, Southern Line, Southern Line, ...","[สายใต้, สายใต้, สายใต้, สายใต้, สายใต้, สายใต...","[rail, rail, rail, rail, rail, rail, rail, rai..."


In [24]:
# Adjust max_workers depending on your machine's available resources
with ThreadPoolExecutor(max_workers=2048) as executor:
    _ = [
        executor.submit(
            annotate_tourism_trips_with_railways,
            file,
            railways,
        )
        for file in os.listdir(ANNOTATED_TOURISM_TRIPS_DIRECTORY)
    ]

### Accommodation

We filter the [points of interest](https://data.humdata.org/dataset/hotosm_tha_points_of_interest) to include only those with tags with key `tourism` and value `hotel`, `motel`, `guest_house`, `hostel`, `alpine_hut`, `chalet`, `camp_site`, `caravan_site`, `apartment`, or `bed_and_breakfast`. Note that this is a non-exhaustive list of possible establishments that provide accommodation.

In [25]:
ACCOMMODATION = [
    "hotel",
    "motel",
    "guest_house",
    "hostel",
    "alpine_hut",
    "chalet",
    "camp_site",
    "caravan_site",
    "apartment",
    "bed_and_breakfast",
]


def annotate_tourism_trips_with_accommodation(
    tourism_trips_file,
    points_of_interest,
):
    # Skip statistics files
    if "statistics" in tourism_trips_file:
        return

    # Include only related points of interest
    accommodation = points_of_interest[
        points_of_interest["tourism"].isin(ACCOMMODATION)
    ]

    accommodation = accommodation[
        ["osm_id", "name", "name:en", "name:th", "geometry", "tourism"]
    ].rename(
        {
            "osm_id": "nearby_accommodation_osm_ids",
            "name": "nearby_accommodation_names",
            "name:en": "nearby_accommodation_names_en",
            "name:th": "nearby_accommodation_names_th",
            "tourism": "nearby_accommodation_types",
        },
        axis=1,
    )

    tourism_trip = gpd.read_parquet(
        os.path.join(ANNOTATED_TOURISM_TRIPS_DIRECTORY, tourism_trips_file)
    )

    tourism_trip = datablox_od.preprocessing.get_nearby_places(
        tourism_trip,
        accommodation,
        distance_threshold=NEARBY_IN_METERS,
        projected_crs=PROJECTED_CRS,
    )

    tourism_trip.to_parquet(
        os.path.join(ANNOTATED_TOURISM_TRIPS_DIRECTORY, tourism_trips_file)
    )

    return tourism_trip

In [26]:
points_of_interest = process_openstreetmap(
    "hotosm_tha_points_of_interest_polygons_geojson.geojson"
)
points_of_interest

Unnamed: 0,name,name:en,amenity,man_made,shop,tourism,opening_hours,beds,rooms,addr:full,addr:housenumber,addr:street,addr:city,source,name:th,osm_id,osm_type,geometry
0,Fuel Station,,fuel,,,,,,,,,,,Bing,,1028295046,ways_poly,"POLYGON ((99.7086 14.48278, 99.70864 14.48268,..."
1,,Fuel Station,fuel,,,,,,,,,,,Bing,,1028295071,ways_poly,"POLYGON ((99.71662 14.63935, 99.71677 14.63928..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62487,พิพิธภัณฑ์สถานแห่งชาติราชบุรี,,,,,museum,We-Su 09:00-16:00,,,,,,,,,12906115,relations,"POLYGON ((99.81736 13.54094, 99.8179 13.54082,..."
62488,มณีจันท์ รีสอร์ท,Maneechan Resort,,,,hotel,,,,,,,,,มณีจันท์ รีสอร์ท,7887772,relations,"POLYGON ((102.13189 12.60664, 102.1319 12.6060..."


In [27]:
sample = annotate_tourism_trips_with_accommodation(
    "C8AF1957-7F70-421E-AB7D-817C5F9BA71E.parquet",
    points_of_interest,
)
sample[sample["nearby_accommodation_osm_ids"].apply(lambda x: not np.isnan(x[0]))]

Unnamed: 0_level_0,time_arrive,time_depart,num_pings_in_staypoint,num_minutes_stayed,geometry,device_id,h3_cell,ADM1_EN,ADM2_EN,ADM3_EN,...,nearby_railway_osm_ids,nearby_railway_names,nearby_railway_names_en,nearby_railway_names_th,nearby_railway_types,nearby_accommodation_osm_ids,nearby_accommodation_names,nearby_accommodation_names_en,nearby_accommodation_names_th,nearby_accommodation_types
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5,2023-08-08 17:57:47,2023-08-10 06:57:52,219,2220.083333,POINT (100.32378 13.81158),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,8864a4a013fffff,Nonthaburi,Nonthaburi#Bang Kruai,Nonthaburi#Bang Kruai#Sala Klang,...,"[891436025.0, 869051522.0, 891436021.0, 891436...","[สายใต้, สายใต้, สายใต้, สายใต้, สายใต้, สายใต...","[Southern Line, Southern Line, Southern Line, ...","[สายใต้, สายใต้, สายใต้, สายใต้, สายใต้, สายใต...","[rail, rail, rail, rail, rail, rail, rail, rai...",[1049501436],[MWIT guest house (หอ 14)],[None],[None],[hostel]
5,2023-08-10 07:02:37,2023-08-10 08:30:55,16,88.300000,POINT (100.58301 13.72792),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,8864a48489fffff,Bangkok,Bangkok#Vadhana,Bangkok#Vadhana#Khlong Tan Nuea,...,"[156966523.0, 210643824.0, 210645284.0, 210643...","[สายท่าเรือ, None, None, None, None, None, Non...","[None, None, None, None, None, None, None, Non...","[สายท่าเรือ, None, None, None, None, None, Non...","[rail, rail, rail, rail, rail, rail, rail, rai...","[318748790, 325030906, 318728683, 318728681, 1...","[Hope Land Executive Residence, Beat Hotel, Li...","[None, None, None, None, None, None, None, Non...","[None, None, None, None, None, None, None, Non...","[hotel, hotel, hotel, hotel, hotel, hotel, hot..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,2024-01-22 18:00:28,2024-01-22 20:16:21,13,135.883333,POINT (100.65025 13.62046),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,88658b490dfffff,Samut Prakan,Samut Prakan#Bang Phli,Samut Prakan#Bang Phli#Bang Kaeo,...,[nan],[None],[None],[None],[None],[221578618],[Yamato Hotel],[None],[None],[hotel]
71,2024-01-22 21:40:43,2024-01-23 09:15:34,34,694.850000,POINT (100.32376 13.81156),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,8864a4a013fffff,Nonthaburi,Nonthaburi#Bang Kruai,Nonthaburi#Bang Kruai#Sala Klang,...,"[891436025.0, 869051522.0, 891436021.0, 891436...","[สายใต้, สายใต้, สายใต้, สายใต้, สายใต้, สายใต...","[Southern Line, Southern Line, Southern Line, ...","[สายใต้, สายใต้, สายใต้, สายใต้, สายใต้, สายใต...","[rail, rail, rail, rail, rail, rail, rail, rai...",[1049501436],[MWIT guest house (หอ 14)],[None],[None],[hostel]


In [28]:
# Adjust max_workers depending on your machine's available resources
with ThreadPoolExecutor(max_workers=2048) as executor:
    _ = [
        executor.submit(
            annotate_tourism_trips_with_accommodation,
            file,
            points_of_interest,
        )
        for file in os.listdir(ANNOTATED_TOURISM_TRIPS_DIRECTORY)
    ]

### Food and beverages

We filter the [points of interest](https://data.humdata.org/dataset/hotosm_tha_points_of_interest) to include only those with tags with key `amenity` and value `restaurant`, `fast_food`, `cafe`, `bar`, `pub`, `canteen`, `food_court`, `ice_cream`, or `biergarten`. Note that this is a non-exhaustive list of possible establishments that provide food and beverages.

In [29]:
FOOD_AND_BEVERAGES = [
    "restaurant",
    "fast_food",
    "cafe",
    "bar",
    "pub",
    "canteen",
    "food_court",
    "ice_cream",
    "biergarten",
]


def annotate_tourism_trips_with_food_and_beverages(
    tourism_trips_file,
    points_of_interest,
):
    # Skip statistics files
    if "statistics" in tourism_trips_file:
        return

    # Include only related points of interest
    food_and_beverages = points_of_interest[
        points_of_interest["amenity"].isin(FOOD_AND_BEVERAGES)
    ]

    food_and_beverages = food_and_beverages[
        ["osm_id", "name", "name:en", "name:th", "geometry", "amenity"]
    ].rename(
        {
            "osm_id": "nearby_food_and_beverages_osm_ids",
            "name": "nearby_food_and_beverages_names",
            "name:en": "nearby_food_and_beverages_names_en",
            "name:th": "nearby_food_and_beverages_names_th",
            "amenity": "nearby_food_and_beverages_types",
        },
        axis=1,
    )

    tourism_trip = gpd.read_parquet(
        os.path.join(ANNOTATED_TOURISM_TRIPS_DIRECTORY, tourism_trips_file)
    )

    tourism_trip = datablox_od.preprocessing.get_nearby_places(
        tourism_trip,
        food_and_beverages,
        distance_threshold=NEARBY_IN_METERS,
        projected_crs=PROJECTED_CRS,
    )

    tourism_trip.to_parquet(
        os.path.join(ANNOTATED_TOURISM_TRIPS_DIRECTORY, tourism_trips_file)
    )

    return tourism_trip

In [30]:
sample = annotate_tourism_trips_with_food_and_beverages(
    "C8AF1957-7F70-421E-AB7D-817C5F9BA71E.parquet",
    points_of_interest,
)
sample[sample["nearby_food_and_beverages_osm_ids"].apply(lambda x: not np.isnan(x[0]))]

Unnamed: 0_level_0,time_arrive,time_depart,num_pings_in_staypoint,num_minutes_stayed,geometry,device_id,h3_cell,ADM1_EN,ADM2_EN,ADM3_EN,...,nearby_accommodation_osm_ids,nearby_accommodation_names,nearby_accommodation_names_en,nearby_accommodation_names_th,nearby_accommodation_types,nearby_food_and_beverages_osm_ids,nearby_food_and_beverages_names,nearby_food_and_beverages_names_en,nearby_food_and_beverages_names_th,nearby_food_and_beverages_types
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
5,2023-08-08 17:57:47,2023-08-10 06:57:52,219,2220.083333,POINT (100.32378 13.81158),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,8864a4a013fffff,Nonthaburi,Nonthaburi#Bang Kruai,Nonthaburi#Bang Kruai#Sala Klang,...,[1049501436.0],[MWIT guest house (หอ 14)],[None],[None],[hostel],"[1144432327, 1144432326, 1144432329, 114443233...","[None, None, None, None, None, None, None, Non...","[None, None, None, None, None, None, None, Non...","[None, None, None, None, None, None, None, Non...","[restaurant, restaurant, restaurant, restauran..."
5,2023-08-10 07:02:37,2023-08-10 08:30:55,16,88.300000,POINT (100.58301 13.72792),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,8864a48489fffff,Bangkok,Bangkok#Vadhana,Bangkok#Vadhana#Khlong Tan Nuea,...,"[318748790.0, 325030906.0, 318728683.0, 318728...","[Hope Land Executive Residence, Beat Hotel, Li...","[None, None, None, None, None, None, None, Non...","[None, None, None, None, None, None, None, Non...","[hotel, hotel, hotel, hotel, hotel, hotel, hot...","[824392602, 317966604, 1391250730, 575459596, ...","[T-REX BBQ ทีเร็กซ์ย่างมันส์, W Market, None, ...","[None, None, None, None, An An Lao, Savoey Sea...","[None, None, None, None, อันอันเหลา, เสวย, Non...","[restaurant, biergarten, cafe, cafe, restauran..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,2024-01-22 16:15:58,2024-01-22 16:46:47,18,30.816667,POINT (100.48246 13.78123),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,8864a4bad1fffff,Bangkok,Bangkok#Bang Phlat,Bangkok#Bang Phlat#Bang Bamru,...,"[1343838845.0, 1273993729.0, 376303742.0, 3425...","[โรงแรมธำรงค์อิน, Sunset Residence Pinklao, Ro...","[Thamrongin Hotel, None, Rose Town Hotel, None...","[None, None, โรสทาวน์ โฮเต็ล, None, None, None...","[hotel, hotel, hotel, hotel, hotel, hotel, hotel]","[630829443, 630831418, 630890934, 630984546, 3...","[ต้นตำรับไทย, รอยัล ปาร์ค, พรชัย, อินทนิล, Non...","[None, Royal Park, None, Inthanin, None, None,...","[ต้นตำรับไทย, รอยัล ปาร์ค, พรชัย, อินทนิล, Non...","[restaurant, restaurant, restaurant, cafe, res..."
71,2024-01-22 21:40:43,2024-01-23 09:15:34,34,694.850000,POINT (100.32376 13.81156),C8AF1957-7F70-421E-AB7D-817C5F9BA71E,8864a4a013fffff,Nonthaburi,Nonthaburi#Bang Kruai,Nonthaburi#Bang Kruai#Sala Klang,...,[1049501436.0],[MWIT guest house (หอ 14)],[None],[None],[hostel],"[1144432328, 1144432327, 1144432326, 114443232...","[None, None, None, None, None, None, None, Non...","[None, None, None, None, None, None, None, Non...","[None, None, None, None, None, None, None, Non...","[restaurant, restaurant, restaurant, restauran..."


In [31]:
# Adjust max_workers depending on your machine's available resources
with ThreadPoolExecutor(max_workers=2048) as executor:
    _ = [
        executor.submit(
            annotate_tourism_trips_with_food_and_beverages,
            file,
            points_of_interest,
        )
        for file in os.listdir(ANNOTATED_TOURISM_TRIPS_DIRECTORY)
    ]