# Hiking difficulty auto-estimation

The goal is to make an AI that can tell how hard a hike is based on the position or a image we provide. (And obviously a path)

### Step 1 : Import modules

In [4]:
import pandas as pd
from bs4 import BeautifulSoup
import json

### Step 2: Fetch all the hiking data from a local .csv file

The csv file can be downloaded from kaggle, here's the link : https://www.kaggle.com/datasets/roccoli/gpx-hike-tracks/data

In [5]:
data = pd.read_csv("gpx-tracks-from-hikr.org.csv")

### Step 3: Transform the data into usable data

We decided to use a dictionary to arrange our data. The data we fetched was already a kind of dictionary but not exactly thus, converting it into a dictionary was the most efficient and rapid way.

In [6]:
dict_data = {
    "_id": data["_id"].to_dict(),
    "length_3d": data["length_3d"].to_dict(),
    "user": data["user"].to_dict(),
    "start_time": data["start_time"].to_dict(),
    "max_elevation": data["max_elevation"].to_dict(),
    "bounds": data["bounds"].to_dict(),
    "uphill": data["uphill"].to_dict(),
    "moving_time": data["moving_time"].to_dict(),
    "end_time": data["end_time"].to_dict(),
    "max_speed": data["max_speed"].to_dict(),
    "gpx": data["gpx"].to_dict(),
    "difficulty": data["difficulty"].to_dict(),
    "min_elevation": data["min_elevation"].to_dict(),
    "url": data["url"].to_dict(),
    "downhill": data["downhill"].to_dict(),
    "name": data["name"].to_dict(),
    "length_2d": data["length_2d"].to_dict(),
}

### Step 4: Create a class to manipulate every Hike

In order to manipulate easily every hike we get from our dictionary data, we will create a class with every attributes of the dictionary and different methods for the hike. We will for example define a method that will compute the average coordinates of the hike so that we could then fetch these coordinates on a map and easily get the position.

In [7]:
class Hike():
    def __init__(self, number):
        self.number        = number
        self._id           = dict_data["_id"][number]
        self.length_3d     = dict_data["length_3d"][number]
        self.user          = dict_data["user"][number]
        self.start_time    = dict_data["start_time"][number]
        self.max_elevation = dict_data["max_elevation"][number]
        self.bounds        = dict_data["bounds"][number]
        self.uphill        = dict_data["uphill"][number]
        self.moving_time   = dict_data["moving_time"][number]
        self.end_time      = dict_data["end_time"][number]
        self.max_speed     = dict_data["max_speed"][number]
        self.gpx           = dict_data["gpx"][number]
        self.difficulty    = dict_data["difficulty"][number]
        self.min_elevation = dict_data["min_elevation"][number]
        self.url           = dict_data["url"][number]
        self.downhill      = dict_data["downhill"][number]
        self.name          = dict_data["name"][number]
        self.length_2d     = dict_data["length_2d"][number]

    def get_coordinates(self):
        two_str = self.bounds.split("]")
        left_str, right_str = two_str[0], two_str[1]
        min_coordinates = left_str.split("[")[1]
        max_coordinates = right_str.split("[")[1]

        min_long, min_lat = min_coordinates.split(",")
        max_long, max_lat = max_coordinates.split(",")

        print(min_lat, min_long)

        latitude = (float(min_lat) + float(max_lat)) / 2
        longitude = (float(min_long) + float(max_long)) / 2

        return {
            "latitude": latitude,
            "longitude": longitude
        }
    
    def get_elevations(self):
        elev_list = self.gpx.split("ele")
        return_list = []
        for i in range(1, len(elev_list), 2):
            return_list.append(float(elev_list[i][1:-2]))
        return return_list
    
    def get_times(self):
        time__list = self.gpx.split("time")
        return_list = []
        for i in range(1, len(time__list), 2):
            return_list.append(time__list[i][12:-3])
        return return_list[1:]
    
    def get_relative_times(self):
        time_list = self.get_times()
        return_list = []
        for i in range(1, len(time_list)):
            return_list.append(self._substract_times(self._compute_time(time_list[i]), self._compute_time(time_list[i-1])))
        return [(0, 0, 0)] + return_list
    
    def get_cumulative_times(self):
        start_time = self._compute_time(self.get_times()[0])
        return_list = []
        for time in self.get_times():
            return_list.append(self._substract_times(self._compute_time(time), start_time))
        return return_list
    
    def get_path_coordinates(self):
        path_list = self.gpx.split("trkpt")
        return_list = []
        for i in range(1, len(path_list), 2):
            double_split = path_list[i].split('"')
            return_list.append((float(double_split[1]), float(double_split[3])))
        return return_list
    
    def _compute_time(self, string):
        return int(string.split(":")[0]), int(string.split(":")[1]), int(string.split(":")[2])
    
    def _substract_times(self, time1, time2):
        seconds = time1[2] - time2[2]
        minutes = time1[1] - time2[1]
        hours   = time1[0] - time2[0]
        if seconds < 0:
            seconds += 60
            minutes -= 1
        if minutes < 0:
            minutes += 60
            hours -= 1
        return (hours, minutes, seconds)

### Step 5: Exporting the data

We realized that Jupyter is not really good for webscraping because it is working asynchronously by default. The use of playwright, which will allow us to fetch data from a website, is not meant to be used on a notebook and we will then export our data into a .json file and use it on a different .py file, this will prevent us from the loading data and convert data tasks that takes a significant amount of time.

In [8]:
with open("hikes.json", "w") as file:
    json.dump(dict_data, file)

In [20]:
hike = Hike(2)

output_file = "output.gpx"

with open(output_file, 'w', encoding='utf-8') as f:
    f.write(hike.gpx)

    # print(f.close())
    # print(f.read())
    # print(f.closed)
