# Defi Wind - Results Download

Copyright 2024 Michael George (AKA Logiqx).

This file is part of [defi-results](https://github.com/Logiqx/defi-results) and is distributed under the terms of the GNU General Public License.

defi-results is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.

sse-results is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with sse-results. If not, see <https://www.gnu.org/licenses/>.

In [1]:
import os
import sys
import datetime
import json
import csv

import urllib.request

import unicodedata
import re

## Constants

In [2]:
CONFIG_DIR = 'config'
DATA_DIR = 'data'
JSON_DIR = 'json'
CSV_DIR = 'csv'

COUNTRY_NAME = 'Name'
ALPHA_2_CODE = 'Alpha-2 Code'

EVENTS_CONFIG = 'events.json'
COUNTRIES_CSV = 'countries.csv'
SAILNOS_CSV = 'sailnos.csv'

EVENT_NAME = 'name'
EVENT_YEAR = 'year'
EVENT_FIELDS = 'fields'
EVENT_RACES = 'races'
EVENT_PERSONS = 'persons'

RACE_NAME = 'name'
RACE_URL = 'url'

RESULTS_LIST = 'list'
RESULTS_FIELDS = 'Fields'
RESULTS_DATA = 'data'

FIELD_LABEL = 'Label'
FIELD_RACE_NO = 'RaceNo'
FIELD_PLACE = 'Place'
FIELD_TALLY = 'Tally'
FIELD_NAME = 'HelmName'
FIELD_SAIL = 'SailNo'
FIELD_NAT = 'Nat'
FIELD_SEX = 'HelmSex'
FIELD_CLASS = 'Class'
FIELD_AGE_GROUP = 'HelmAgeGroup'
FIELD_ELAPSED = 'Elapsed'
FIELD_POINTS = 'Points'
FIELD_CODE = 'Code'

CRAFT_OPEN_FR = ['Libre']
CRAFT_OPEN_EN = 'Open'

CRAFT_FIN_FR = ['Aileron']
CRAFT_FIN_EN = 'Fin'

FULL_REFRESH = True

In [3]:
def loadEvents():
    '''Load events config'''
    
    filename = os.path.join(projdir, CONFIG_DIR, EVENTS_CONFIG)
    with open(filename, 'r', encoding='utf-8') as f:
        jsonTxt = f.read()
        events = json.loads(jsonTxt)

    return events


def loadCountries():
    '''Load countries file'''
    
    countryCodes = {}
    filename = os.path.join(projdir, CONFIG_DIR, COUNTRIES_CSV)
    with open(filename, 'r', encoding='utf-8') as f:
        csvReader = csv.DictReader(f)
        for values in csvReader:
            countryCodes[values[ALPHA_2_CODE]] = values

    return countryCodes


def loadSailNos():
    '''Load sail numbers file'''
    
    sailNos = {}
    filename = os.path.join(projdir, CONFIG_DIR, SAILNOS_CSV)
    with open(filename, 'r', encoding='utf-8') as f:
        csvReader = csv.DictReader(f)
        for values in csvReader:
            sailNos[values[FIELD_NAME].upper()] = values[FIELD_SAIL]

    return sailNos

## Data Cleansing

In [4]:
def cleanseValue(fieldName, value):
    '''Cleanse data value - e.g. remove punctuation, etc'''

    # Remove trailing puncuation from place
    if fieldName == FIELD_PLACE:
        value = re.sub("[^0-9]*$", "", value)

    # Remove leading punctuation from tally number
    elif fieldName == FIELD_TALLY:
        value = re.sub("^[^0-9]*", "", value)

    # Convert name to Latin-1, ignoring unsupported characters
    elif fieldName == FIELD_NAME:
        value = str(value.encode('latin-1', 'ignore').decode('latin-1'))

    # Extract nationality from image name
    elif fieldName == FIELD_NAT:
        value = value.split('/')[-1]
        value = os.path.splitext(value)[0]
        value = re.sub("_.*", "", value).upper()
        value = countryCodes[value][COUNTRY_NAME]

    # Remove placing for sex
    elif fieldName == FIELD_SEX:
        value = re.sub(" \(.*\)$", "", value)

    # Remove placing for class
    elif fieldName == FIELD_CLASS:
        value = re.sub(" \(.*\)$", "", value)

        # Category = fin / foil / open
        if value in CRAFT_OPEN_FR:
            value = CRAFT_OPEN_EN
        elif value in CRAFT_FIN_FR:
            value = CRAFT_FIN_EN

    # Remove placing for age group
    elif fieldName == FIELD_AGE_GROUP:
        value = re.sub(" \(.*\)$", "", value)

    # Replace 0,7 with 0.7
    elif fieldName == FIELD_POINTS:
        value = value.replace(',', '.')
        
    return value

In [5]:
def patchValue(event, tally, fieldName, value):
    '''Patch data value - e.g. industry places'''

    try:
        value = event[EVENT_PERSONS][tally][fieldName]
    except:
        pass

    return value

## Results Parser

In [6]:
def getRaceFields(event, jsonData):
    '''Get field mappings from results'''
    
    fieldMappings = {}

    # Nationality is sometimes blank - simple "or" hack to handle it
    labels = [field[FIELD_LABEL] or 'Nat.' for field in jsonData[RESULTS_LIST][RESULTS_FIELDS]]

    # Determine index of all fields
    for field, aliases in event[EVENT_FIELDS].items():
        fieldMapping = None

        for alias in aliases:
            if alias in labels:
                # Actual data has an additional field at the start which needs to be ignored (entrant number)
                fieldMapping = labels.index(alias) + 1
                
                # No sense searching any further
                break
                
        fieldMappings[field] = fieldMapping

    return fieldMappings


def getRaceData(event, raceNo, jsonData, fieldMappings):
    '''Get data from results'''
      
    place = None
    tally = None
    country = None
    raceData = {}

    for result in jsonData[RESULTS_DATA]:
        record = [raceNo]
        code = ''

        for fieldName, fieldMapping in fieldMappings.items():

            # Start with the raw field value
            if fieldMapping is not None:
                value = result[fieldMapping]
            else:
                value = ''

            # Tidy up data value
            value = cleanseValue(fieldName, value)

            # Remember place number for joint results (only listed once)
            if fieldName == FIELD_PLACE:
                value = value or place
                place = value
            
            # Remember tally
            elif fieldName == FIELD_TALLY:
                tally = value
            
            # Patch value if it is an industry entry (must be before remembering country)
            value = patchValue(event, tally, fieldName, value)

            # Remember country
            if fieldName == FIELD_NAT:
                country = value

            # Elapsed time might actually be a code such as DNS, DNF, DSQ
            elif fieldName == FIELD_ELAPSED and ':' not in value:
                code = value
                value = ''

            # Points might actually be a code such as DNS, DNF, DSQ
            elif fieldName == FIELD_POINTS and '.' not in value:
                code = value
                value = ''

            # Populate code
            elif fieldName == FIELD_CODE:
                value = code

            record.append(value)

        if country in raceData:
            raceData[country].append(record)
        else:
            raceData[country] = [record]

    return raceData

## Results Writer

In [7]:
def slugify(value):
    """
    Taken from https://github.com/django/django/blob/master/django/utils/text.py
    Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
    dashes to single dashes. Remove characters that aren't alphanumerics,
    underscores, or hyphens. Convert to lowercase. Also strip leading and
    trailing whitespace, dashes, and underscores.
    """
    value = str(value)
    value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
    value = re.sub(r'[^\w\s-]', '', value.lower())
    return re.sub(r'[-\s]+', '-', value).strip('-_')


def getEventSlug(event):
    '''Get slug for event filenames'''
    
    return slugify(event[EVENT_NAME])


def getRaceSlug(race):
    '''Get slug for race filenames'''
    
    return slugify(race[RACE_NAME])


def writeJsonData(event, race, results):
    '''Write results data to JSON file'''

    jsonTxt = json.dumps(results, indent=2)

    jsonPath = os.path.join(projdir, DATA_DIR, JSON_DIR, getEventSlug(event))
    if not os.path.exists(jsonPath):
        os.makedirs(jsonPath)

    fn = os.path.join(jsonPath, getRaceSlug(race) + '.json')
    with open(fn, 'w') as f:
        f.write(jsonTxt)


def writeCsvData(event, header, eventData):
    '''Write data to CSV file'''

    # Write individual country results
    for country in eventData:
        csvPath = os.path.join(projdir, DATA_DIR, CSV_DIR, getEventSlug(event))
        if not os.path.exists(csvPath):
            os.makedirs(csvPath)

        fn = os.path.join(csvPath, slugify(country) + '.csv')
        with open(fn, 'w', encoding='latin-1') as f:
            csvWriter = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
            csvWriter.writerow(header)
            csvWriter.writerows(eventData[country])

    # Write full results
    worldData = []
    for country in eventData:
        worldData += eventData[country]

    csvPath = os.path.join(projdir, DATA_DIR, CSV_DIR)
    if not os.path.exists(csvPath):
        os.makedirs(csvPath)

    fn = os.path.join(csvPath, getEventSlug(event) + '.csv')
    with open(fn, 'w', encoding='latin-1') as f:
        csvWriter = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
        csvWriter.writerow(header)
        csvWriter.writerows(worldData)

## Main Processing

In [8]:
def processEvent(event):
    '''Process a single event'''

    header = [FIELD_RACE_NO] + [field for field in event[EVENT_FIELDS]]
    eventData = {}

    raceNo = 1
    for race in event[EVENT_RACES]:
        print(f'Downloading {event[EVENT_NAME]} - {race[RACE_NAME]}...')

        with urllib.request.urlopen(race[RACE_URL]) as f:
            jsonTxt = f.read().decode('utf-8')
            jsonData = json.loads(jsonTxt)

        raceFields = getRaceFields(event, jsonData)
        raceData = getRaceData(event, raceNo, jsonData, raceFields)
        
        for country in raceData:
            if country in eventData:
                eventData[country] += raceData[country]
            else:
                eventData[country] = raceData[country]

        raceNo += 1

        writeJsonData(event, race, jsonData)

    writeCsvData(event, header, eventData)

In [9]:
if __name__ == '__main__':

    projdir = os.path.realpath(os.path.join(sys.path[0], '..'))
                               
    countryCodes = loadCountries()
    events = loadEvents()
    sailNos = loadSailNos()

    year = datetime.date.today().year

    for event in events:
        if event[EVENT_YEAR] == year or FULL_REFRESH:
            processEvent(event)

    print('All done!')

Downloading Défi Wind 2022 - Race 1...
Downloading Défi Wind 2022 - Race 2...
Downloading Défi Wind 2022 - Race 3...
Downloading Défi Wind 2022 - Race 4...
Downloading Défi Wind 2022 - Race 5...
Downloading Défi Wind 2023 - Race 1...
Downloading Défi Wind 2023 - Race 2...
Downloading Défi Wind 2023 - Race 3...
Downloading Défi Wind 2023 - Race 4...
All done!
