In [1]:
import requests
from bs4 import BeautifulSoup
import gpxpy

In [2]:
def get_gpx_track(track_number=[]):
    """Send request to la flamme rouge website to a specific gpx track
    
    Parameters
    ------------
    track_number: list of track numbers to parse (int)
        
    Returns
    ------------
    The parsed URLs as a list
    """
    soup = []
    for track in track_number:
        url = 'http://la-flamme-rouge.eu/maps/viewtrack/gpx/'+str(track)
        headers={'User-Agent':'Mozilla/5'}
        r = requests.get(url, allow_redirects=True,headers=headers)
        soup.append(BeautifulSoup(r.text,'html.parser'))
    return soup

def get_elevation(track_html):
    """Extract stage name and elevation information from the text of the parsed URLs
    
    Parameters
    ------------
    track_html: list of parsed URLs
    
    Returns
    ------------
    A dictionary where the key is the stage name and the value is a list [positive_elevation_gain, max_elevation]
    """
    # get elevation from html
    elev_stage = {}
    for track in track_html:
        elev = [int(elev.text) for elev in track.find_all('ele')]
        name = track.find('name').text
        elev_change = sum([elev[i+1]-elev[i] for i in range(len(elev)-1) if elev[i+1]>elev[i]])
        max_elev = max(elev)
        elev_stage[name] = [elev_change, max_elev]
    return elev_stage

In [3]:
%%time
track_html = get_gpx_track(track_number=[34572,386318,375307])

CPU times: user 4.34 s, sys: 176 ms, total: 4.52 s
Wall time: 9.69 s


In [4]:
elev_change= get_elevation(track_html)

In [5]:
print(elev_change)

{'Lucca > Genova': [2107, 619], 'BinckBank Tour 2020 stage 3': [118, 116], 'Binckbanck Tour 2020 Stage 5': [2018, 159]}


In [10]:
def get_gpx_file(track_number=[]):
    """Send request to la flamme rouge website to a specific gpx track
    saves the gpx file into "/gpx/__track_number__.gpx
    Parameters
    ------------
    track_number: list of track numbers to parse (int)
        
    Returns
    ------------
    void
    """
    for track in track_number:
        url = 'http://la-flamme-rouge.eu/maps/viewtrack/gpx/'+str(track)
        headers={'User-Agent':'Mozilla/5'}
        r = requests.get(url, allow_redirects=True,headers=headers)
        open('gpx/'+str(track)+'.gpx', 'wb').write(r.content)


def get_data_from_gpx(track_number=[]):
    """Extract stage name elevation, max elevation and distance from the text of the files
    
    Parameters
    ------------
    track_number: number of gpx file
    
    Returns
    ------------
    A dictionary where the key is the stage name and the value is a list [positive_elevation_gain, max_elevation, distance]
    """
    data_race={}
    for track in track_number:
        file_name = 'gpx/'+str(track)+'.gpx'
        gpx_file= open(file_name, 'r')
        gpx = gpxpy.parse(gpx_file)
        name = gpx.tracks[0].name
        elev = [point.elevation for point in gpx.tracks[0].segments[0].points]
        elev_change = sum([ elev[i+1] - elev[i] for i in range(len(elev)-1) if elev[i+1] > elev[i] ])
        max_elev = max(elev)
        distance = round(gpx.length_2d()/1000,2)
        data_race[name] = [elev_change, max_elev, distance]

    return data_race

In [11]:
%%time
get_gpx_file(track_number=[34572,386318,375307])

CPU times: user 153 ms, sys: 27.3 ms, total: 180 ms
Wall time: 5.25 s


In [12]:
get_data_from_gpx(track_number=[34572,386318,375307])

{'Lucca > Genova': [2107.0, 619.0, 181.66],
 'BinckBank Tour 2020 stage 3': [118.0, 116.0, 8.12],
 'Binckbanck Tour 2020 Stage 5': [2018.0, 159.0, 187.64]}