In [8]:
import json
import pandas as pd
import os
import tqdm

In [9]:
outing_direct_keys = [
    "document_id",
    "quality",
    "access_condition",
    "avalanche_signs",
    "condition_rating",
    "date_end",
    "date_start",
    "elevation_access",
    "elevation_down_snow",
    "elevation_max",
    "elevation_min",
    "elevation_up_snow",
    "frequentation",
    "glacier_rating",
    "height_diff_down",
    "height_diff_up",
    "hut_status",
    "length_total",
    "lift_status",
    "partial_trip",
    "participant_count",
    "public_transport",
    "hiking_rating",
    "snow_quality",
    "snow_quantity",
    "global_rating",
    "height_diff_difficulties",
    "engagement_rating",
    "ski_rating",
    "labande_global_rating"
]
cooked_keys = [
    "lang",
    "title",
    "description",
    "summary",
    "access_comment",
    "avalanches",
    "conditions",
    "conditions_levels",
    "hut_comment",
    "participants",
    "route_description",
    "timing",
    "weather"
]

In [10]:
outings = []
for outing_file in tqdm.tqdm(os.listdir("./data/outings")):
    # Open outings
    outing_path = os.path.join("./data/outings", outing_file)
    with open(outing_path) as f:
        json_outing = json.load(f)
    outing = {}
    # General infos
    for outing_direct_key in outing_direct_keys:
        outing[outing_direct_key] = json_outing.get(outing_direct_key, None)
    # Activities
    for activity in json_outing["activities"]:
        outing[f"activity_{activity}"] = True
    # Position
    outing["geom"] = json_outing["geometry"]["geom"]
    # Associated routes
    outing["associated_route_ids"] = []
    for associated_route in json_outing["associations"]["routes"]:
        outing["associated_route_ids"].append(associated_route["document_id"])
    # Associated users
    outing["associated_forum_usernames"] = []
    for associated_user in json_outing["associations"]["users"]:
        outing["associated_forum_usernames"].append(associated_user["forum_username"])
    # Postition
    for area in json_outing["areas"]:
        key = area["area_type"]
        value = area["locales"][0]["title"]
        outing[key] = value
    # Text
    for cooked_key in cooked_keys:
        outing[f"cooked_{cooked_key}"] = json_outing["cooked"].get(cooked_key, None)
    # Add outing link
    outing["link"] = f"https://www.camptocamp.org/outings/{outing['document_id']}"
    # Save
    outings.append(outing)
    # break

100%|██████████| 6089/6089 [00:03<00:00, 1740.06it/s]


In [11]:
df = pd.DataFrame(outings)

In [12]:
df

Unnamed: 0,document_id,quality,access_condition,avalanche_signs,condition_rating,date_end,date_start,elevation_access,elevation_down_snow,elevation_max,...,link,activity_hiking,activity_skitouring,activity_mountain_climbing,activity_ice_climbing,activity_paragliding,activity_snowshoeing,activity_rock_climbing,activity_mountain_biking,activity_via_ferrata
0,978489,fine,,[no],good,2018-03-10,2018-03-10,,,2810.0,...,https://www.camptocamp.org/outings/978489,,,,,,,,,
1,641687,medium,cleared,,good,2015-06-12,2015-06-12,1135.0,,2339.0,...,https://www.camptocamp.org/outings/641687,True,,,,,,,,
2,879144,medium,,[no],good,2017-04-28,2017-04-28,,1735.0,2831.0,...,https://www.camptocamp.org/outings/879144,,True,,,,,,,
3,185409,medium,,,,2008-04-14,2008-04-14,1970.0,,2702.0,...,https://www.camptocamp.org/outings/185409,True,,,,,,,,
4,1531141,fine,,[no],good,2023-05-01,2023-05-01,1730.0,2170.0,2529.0,...,https://www.camptocamp.org/outings/1531141,,True,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6084,1603048,fine,,[no],average,2023-12-29,2023-12-29,1428.0,,2712.0,...,https://www.camptocamp.org/outings/1603048,,,,,,,,,
6085,1070679,great,,,good,2019-01-13,2019-01-13,1000.0,,1989.0,...,https://www.camptocamp.org/outings/1070679,True,,,,,,,,
6086,1472715,fine,,,excellent,2022-10-05,2022-10-05,1800.0,,1941.0,...,https://www.camptocamp.org/outings/1472715,True,,,,,,True,,
6087,797869,medium,,,,2016-09-02,2016-09-02,1300.0,,2939.0,...,https://www.camptocamp.org/outings/797869,True,,,,,,,,


In [13]:
if not os.path.isdir(os.path.join("./data", "loader")):
    os.mkdir(os.path.join("./data", "loader"))

In [14]:
df.to_csv("./data/loader/outings.csv", index=False)