# Download Events

Download event sessions and event rankings from GP3S using a combination of web scraping and the API.

Copyright 2022 Michael George (AKA Logiqx).

This file is part of GP3S Query and is distributed under the terms of the GNU General Public License.

GP3S Query is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.

GP3S Query is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with GP3S Query. If not, see https://www.gnu.org/licenses/.

## API URLs

gps-speedsurfing.com is used for the current event list (web scraping of HTML)

Azure API hosts the GP3S API calls and returns JSON data

In [1]:
SITES = {
    'gps-speed': 'https://www.gps-speedsurfing.com/default.aspx?mnu=events',
    'gps-foil': 'https://www.gps-foilsurfing.com/default.aspx?mnu=events',
    'gps-wing': 'https://www.gps-wingfoiling.com/default.aspx?mnu=events',
    'gps-kite': 'https://www.gps-kitesurfing.com/default.aspx?mnu=events',
    'gps-ice': 'https://www.gps-icesailing.com/default.aspx?mnu=events'
}

SPEED_TYPES = [
    'speed_100',
    'speed_10sec',
    'speed_24hour',
    'speed_250',
    'speed_2sec',
    'speed_500',
    'speed_alpha_racing',
    'speed_avg',
    'speed_halfhour',
    'speed_hour',
    'speed_mile'
]

API_URL = 'apimgp3s.azure-api.net'

## Import Common Modules

In [2]:
import os
import sys

from datetime import datetime

import http.client
import urllib.request
import urllib.parse

import json

from bs4 import BeautifulSoup

## Retrieve Event List

In [3]:
def getEvents(eventsUrl):
    """Get dictionary of events via simple web scraping"""

    events = {}

    domain = urllib.parse.urlparse(eventsUrl).netloc
    print('Downloading event list from {}'.format(domain))

    req = urllib.request.Request(eventsUrl, headers={'User-Agent': 'Mozilla'})
    response = urllib.request.urlopen(req, timeout = 15)
    
    soup = BeautifulSoup(response, "lxml")
    table = soup.find("table", {"id": "eventsTable"})
    anchors = table.find_all("a")

    for anchor in anchors:
        event = {}
        event['eventname'] = anchor.text

        parsedUrl = urllib.parse.urlparse(anchor.get("href"))
        eventId = urllib.parse.parse_qs(parsedUrl.query)["val"][0]
        
        nameTd = anchor.parent
        startDateTd = nameTd.findNext('td')
        event['start_date'] = datetime.strptime(startDateTd.text.split(' ')[0], '%d-%m-%Y').strftime('%Y-%m-%dT00:00:00')
        endDateTd = startDateTd.findNext('td')
        event['end_date'] = datetime.strptime(endDateTd.text.split(' ')[0], '%d-%m-%Y').strftime('%Y-%m-%dT00:00:00')       
        events[eventId] = event

    return events

## Retrieve Events from API

In [4]:
def getApiData(url, filename, sortKey=None):
    """Get data from the GP3S API"""

    keyPath = os.path.join(projdir, 'keys', 'gp3s')
    with open(keyPath) as f:
        key = f.readline().strip()

    headers = {
        'Ocp-Apim-Subscription-Key': key
    }

    conn = http.client.HTTPSConnection(API_URL)
    conn.request("GET", url, headers=headers)
    response = conn.getresponse()
    rawData = response.read()
    conn.close()

    if rawData:
        data = json.loads(rawData)
    else:
        data = []
    if sortKey:
        data = sorted(data, key=lambda x: (x[sortKey], x[sortKey]))
    output = json.dumps(data, indent=2)

    dirname = os.path.dirname(filename)
    if not os.path.exists(dirname):
        os.makedirs(dirname)

    with open(filename, "w") as f:
        f.write(output)
        
    return data

In [5]:
def getEventSessions(apiName, eventId, eventName, service='eventsessions'):
    """Get event sessions from the GP3S API"""

    print('Downloading {} for {}'.format(service, eventName))

    url = "/{}/{}/eventid/{}".format(apiName, service, eventId)
    filename = os.path.join(projdir, 'cache', apiName, service, str(eventId) + '.json')

    sessions = getApiData(url, filename, sortKey='session_id')
        
    return sessions

In [6]:
def getEventRankings(apiName, eventId, eventName, speedType, service='eventranking'):
    """Get event rankings from the GP3S API"""

    print('Downloading {} for {} - {}'.format(service, eventName, speedType))

    url = "/{}/{}/eventid/{}/speed_type/{}".format(apiName, service, eventId, speedType)
    filename = os.path.join(projdir, 'cache', apiName, service, str(eventId), str(speedType) + '.json')

    rankings = getApiData(url, filename)
        
    return rankings

## Download Events

In [7]:
if __name__ == '__main__':
    projdir = os.path.realpath(os.path.join(sys.path[0], '..'))

    for apiName, eventsUrl in SITES.items():
        events = getEvents(eventsUrl)
        output = json.dumps(events, indent=2)

        filename = os.path.join(projdir, 'cache', apiName, 'events.json')
        dirname = os.path.dirname(filename)
        if not os.path.exists(dirname):
            os.makedirs(dirname)

        with open(filename, "w") as f:
            f.write(output)

        print()

        for eventId, event in events.items():
            eventName = event['eventname']
            sessions = getEventSessions(apiName, eventId, eventName)

            for speedType in SPEED_TYPES:
                getEventRankings(apiName, eventId, eventName, speedType)

            print()
        
    print('All done!')

Downloading event list from www.gps-speedsurfing.com

Downloading eventsessions for 2023 2023 Mauritius Speed Challenge
Downloading eventranking for 2023 2023 Mauritius Speed Challenge - speed_100
Downloading eventranking for 2023 2023 Mauritius Speed Challenge - speed_10sec
Downloading eventranking for 2023 2023 Mauritius Speed Challenge - speed_24hour
Downloading eventranking for 2023 2023 Mauritius Speed Challenge - speed_250
Downloading eventranking for 2023 2023 Mauritius Speed Challenge - speed_2sec
Downloading eventranking for 2023 2023 Mauritius Speed Challenge - speed_500
Downloading eventranking for 2023 2023 Mauritius Speed Challenge - speed_alpha_racing
Downloading eventranking for 2023 2023 Mauritius Speed Challenge - speed_avg
Downloading eventranking for 2023 2023 Mauritius Speed Challenge - speed_halfhour
Downloading eventranking for 2023 2023 Mauritius Speed Challenge - speed_hour
Downloading eventranking for 2023 2023 Mauritius Speed Challenge - speed_mile

Downloadin


Downloading eventsessions for 2022 Schildmeer Speed Challenge 2022-2023
Downloading eventranking for 2022 Schildmeer Speed Challenge 2022-2023 - speed_100
Downloading eventranking for 2022 Schildmeer Speed Challenge 2022-2023 - speed_10sec
Downloading eventranking for 2022 Schildmeer Speed Challenge 2022-2023 - speed_24hour
Downloading eventranking for 2022 Schildmeer Speed Challenge 2022-2023 - speed_250
Downloading eventranking for 2022 Schildmeer Speed Challenge 2022-2023 - speed_2sec
Downloading eventranking for 2022 Schildmeer Speed Challenge 2022-2023 - speed_500
Downloading eventranking for 2022 Schildmeer Speed Challenge 2022-2023 - speed_alpha_racing
Downloading eventranking for 2022 Schildmeer Speed Challenge 2022-2023 - speed_avg
Downloading eventranking for 2022 Schildmeer Speed Challenge 2022-2023 - speed_halfhour
Downloading eventranking for 2022 Schildmeer Speed Challenge 2022-2023 - speed_hour
Downloading eventranking for 2022 Schildmeer Speed Challenge 2022-2023 - spe

Downloading eventranking for 2021Red Bull Bora Challenge - speed_250
Downloading eventranking for 2021Red Bull Bora Challenge - speed_2sec
Downloading eventranking for 2021Red Bull Bora Challenge - speed_500
Downloading eventranking for 2021Red Bull Bora Challenge - speed_alpha_racing
Downloading eventranking for 2021Red Bull Bora Challenge - speed_avg
Downloading eventranking for 2021Red Bull Bora Challenge - speed_halfhour
Downloading eventranking for 2021Red Bull Bora Challenge - speed_hour
Downloading eventranking for 2021Red Bull Bora Challenge - speed_mile

Downloading eventsessions for 2021 FFF SPEED CHALLENGE 
Downloading eventranking for 2021 FFF SPEED CHALLENGE  - speed_100
Downloading eventranking for 2021 FFF SPEED CHALLENGE  - speed_10sec
Downloading eventranking for 2021 FFF SPEED CHALLENGE  - speed_24hour
Downloading eventranking for 2021 FFF SPEED CHALLENGE  - speed_250
Downloading eventranking for 2021 FFF SPEED CHALLENGE  - speed_2sec
Downloading eventranking for 2021