# Series Module

Copyright 2024 Michael George (AKA Logiqx).

This file is part of [sse-results](https://github.com/Logiqx/sse-results) and is distributed under the terms of the GNU General Public License.

sse-results is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.

sse-results is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with sse-results. If not, see <https://www.gnu.org/licenses/>.

## Initialisation

Basic approach to determine the project directory

In [1]:
import os
import sys
import glob

from datetime import datetime
import time

import json
import csv

import unicodedata
import re

from bs4 import BeautifulSoup

from operator import itemgetter

#from common import Printable, projdir
#from event import Event
#from constants import *

In [2]:
class Printable:
    def __init__(self, verbosity=1):
        self.verbosity = verbosity

    def __repr__(self):
        return str(self.__class__) + ": " + str(self.__dict__)

    def __str__(self):
        return str(self.__class__) + ": " + str(self.__dict__)
    
    def logInfo(self, msg):
        if self.verbosity >= 2:
            print('INFO:', msg)

    def logWarning(self, msg):
        if self.verbosity >= 1:
            print('WARNING:', msg)

    def logError(self, msg):
        print('ERROR:', msg)
        
projdir = os.path.realpath(os.path.join(sys.path[0], '..'))

In [3]:
EVENTS_DIR = 'events'
CONFIG_DIR = 'config'
SAILWAVE_DIR = 'sailwave'
DOCS_DIR = 'docs'

APP_CONFIG = 'app.json'
EVENT_CONFIG = 'event.json'

COUNTRIES_CSV = 'countries.csv'
ENTRANTS_CSV = 'entrants.csv'

DEFAULT_COUNTRY = 'United Kingdom'

## Django

Copy / paste of slugify method

In [4]:
def slugify(value):
    """
    Taken from https://github.com/django/django/blob/master/django/utils/text.py
    Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
    dashes to single dashes. Remove characters that aren't alphanumerics,
    underscores, or hyphens. Convert to lowercase. Also strip leading and
    trailing whitespace, dashes, and underscores.
    """
    value = str(value)
    value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
    value = re.sub(r'[^\w\s-]', '', value.lower())
    return re.sub(r'[-\s]+', '-', value).strip('-_')

## Process Years

Process all available years

In [5]:
def getRiderId(row):
    '''Get unique rider ID - suitable for Sailwave and native tables'''
    
    riderDetails = []
    for riderDetail in ['name', 'sail-no', 'tally']:
        if riderDetail in row:
            riderDetails.append(row[riderDetail])

    riderId = slugify('-'.join(riderDetails))
    
    return riderId

In [6]:
class Event():
    def __init__(self, path, appConfig, countries, verbosity=1):
        
        #super().__init__(verbosity=verbosity)

        self.path = path
        self.year = int(os.path.basename(path))

        self.appConfig = appConfig
        self.countries = countries

        self.entrantFlags = {}
        self.craftTypes = {}
        
        self.motions = {}
        self.sessions = {}
        
        self.initialised = False


    def loadConfig(self):
        '''Read app config from JSON'''

        filename = os.path.join(self.path, CONFIG_DIR, EVENT_CONFIG)
        with open(filename, 'r', encoding='utf-8') as f:
            jsonTxt = f.read()
            try:
                self.eventConfig = json.loads(jsonTxt)
            except:
                self.logError('Could not parse {}'.format(filename))
                raise

    
    def loadEntrantFlags(self):
        '''Read entrant flags from CSV'''

        filename = os.path.join(self.path, CONFIG_DIR, ENTRANTS_CSV)
        with open(filename, 'r', encoding='utf-8') as f:
            csvReader = csv.DictReader(f)
            for values in csvReader:
                riderDetails = []
                for riderDetail in ['First Name', 'Family Name', 'Sail No', 'Tally']:
                    if riderDetail in values:
                        riderDetails.append(values[riderDetail])
                riderId = slugify('-'.join(riderDetails))

                countryName = values['Country'].strip() or DEFAULT_COUNTRY
                country = self.countries[countryName]
                nation = '<td><i class="flag flag-{}"></i>&nbsp;{}</td>'.format(country['Alpha-2 Code'], country['WSW Code'])

                self.entrantFlags[riderId] = nation

        return self.entrantFlags

In [7]:
def getNativeTitles(soup):
    '''Get native titles from HTML soup'''
    
    titles = []
    
    summaryTitles = soup.find_all('h3')
    for summaryTitle in summaryTitles:
        titles.append(summaryTitle.text)

    return titles

    
def getNativeColClasses(table):
    '''Get native column classes from HTML soup'''
    
    colClasses = []

    cols = table.find_all('col')
    for col in cols:
        colClasses.append(col['class'][0])

    return colClasses

    
def getNativeRows(table, colClasses):
    '''Get native table rows from HTML soup'''
    
    rows = {}

    tbody = table.find('tbody')

    trs = tbody.find_all('tr')
    for tr in trs:
        row = {}

        tds = tr.find_all('td')
        for i, td in enumerate(tds):
            # Set some values to TBC, making it obvious if subsequent processing is unsuccessful
            if colClasses[i] in ['rank', 'points', 'total']:
                td.string = 'TBC'

            # Nation needs to be kept intact (image and text)
            if colClasses[i] == 'nation':
                row[colClasses[i]] = str(td)
            else:
                row[colClasses[i]] = td.text

        riderId = getRiderId(row)

        rows[riderId] = row

    return rows

    
def getNativeTables(results):
    '''Get native tables from HTML soup'''
    
    soup = results['soup']

    titles = getNativeTitles(soup)
    tables = {}

    summaryTables = soup.find_all('table')
    for tableIdx, summaryTable in enumerate(summaryTables):
        colClasses = getNativeColClasses(summaryTable)
        tableRows = getNativeRows(summaryTable, colClasses)

        title = titles[tableIdx]
        tables[title] = tableRows

    return tables

    
def loadNativeResults(event, seriesId):
    '''Load native results into dictionary'''
    
    results = {}

    results['filename'] = os.path.join(projdir, 'docs', EVENTS_DIR, str(event.year), seriesId.lower() + '.html')
    with open(results['filename'], encoding='utf-8') as f:
        html = f.read()
        results['soup'] = BeautifulSoup(html, 'html.parser')
        results['tables'] = getNativeTables(results)
    
    return results

In [8]:
def getSailwaveTitles(soup):
    '''Get Sailwave titles from HTML soup'''
    
    titles = []
    
    summaryTitles = soup.find_all('h3', {'class': 'summarytitle'})
    for summaryTitle in summaryTitles:
        title = summaryTitle.text
        if title.endswith(' Fleet'):
            title = title.replace(' Fleet', '')
        titles.append(title)

    return titles

    
def getSailwaveColClasses(table):
    '''Get Sailwave column classes from HTML soup'''
    
    colClasses = []

    cols = table.find_all('col')
    for col in cols:
        colClass = col['class'][0]

        if colClass == 'helmname':
            colClass = 'name'
        elif colClass == 'helmagegroup':
            colClass = 'age'
        elif colClass == 'sailno':
            colClass = 'sail-no'

        colClasses.append(colClass)

    return colClasses

    
def getSailwaveRaceNames(table, colClasses):
    '''Get Sailwave race names from HTML soup'''
    
    raceNames = []

    ths = table.find_all('th')
    for i, th in enumerate(ths):
        if colClasses[i] == 'race':
            raceNames.append(th.text)

    return raceNames

    
def getSailwaveRows(table, colClasses):
    '''Get Sailwave table rows from HTML soup'''
    
    rows = {}
    
    tbody = table.find('tbody')

    trs = tbody.find_all('tr', {'class': 'summaryrow'})
    for tr in trs:
        row = {}
        races = []

        tds = tr.find_all('td')
        for i, td in enumerate(tds):
            if colClasses[i] == 'race':
                races.append(td.text)
            else:
                row[colClasses[i]] = td.text

        row['races'] = races
        
        # Points is a shortened form of the results
        races = []
        for race in row['races']:
            codes = re.findall('[A-Z][A-Z][A-Z]', race)
            if len(codes) > 0:
                code = codes[0]
                if '(' in race:
                    code = '(' + code + ')'
                races.append(code)
            else:
                races.append(race)
        row['points'] = ' - '.join(races)

        riderId = getRiderId(row)
        rows[riderId] = row

    return rows

    
def getSailwaveTables(results):
    '''Get Sailwave tables from HTML soup'''
    
    soup = results['soup']

    titles = getSailwaveTitles(soup)
    tables = {}

    summaryTables = soup.find_all('table', {'class': 'summarytable'})
    for tableIdx, summaryTable in enumerate(summaryTables):
        colClasses = getSailwaveColClasses(summaryTable)
        raceNames = getSailwaveRaceNames(summaryTable, colClasses)
        tableRows = getSailwaveRows(summaryTable, colClasses)

        title = titles[tableIdx]
        table = {'races': raceNames, 'rows': tableRows}
        tables[title] = table

    return tables

    
def loadSailwaveResults(event, seriesId):
    '''Load Sailwave results into dictionary'''
    
    results = {}

    results['filename'] = os.path.join(event.path, SAILWAVE_DIR, seriesId + '.html')   
    with open(results['filename'], encoding='latin-1') as f:
        html = f.read()
        results['soup'] = BeautifulSoup(html, 'html.parser')
        results['tables'] = getSailwaveTables(results)
    
    return results

In [9]:
def copySpeeds(nativeResults, sailwaveResults, entrantFlags):
    '''Copy best speeds from native results to sailwave results'''
    
    for title, sailwaveTable in sailwaveResults['tables'].items():
        try:
            nativeTable = nativeResults['tables'][title]

        except KeyError:
            logger = Printable()
            logger.logError('Table "{}" not found in native results'.format(title))
            raise

        for rider, row in sailwaveTable['rows'].items():
            try:
                points = re.findall('[0-9]', row['points'])
                if len(points) > 0:
                    row['speed-kts'] = nativeTable[rider]['speed-kts']
                else:
                    row['speed-kts'] = '0.0'

                # TODO - fix missing entries such as cedric-bordes-fra-91
                if rider in nativeTable:
                    row['nation'] = nativeTable[rider]['nation']
                else:
                    row['nation'] = entrantFlags[rider]

            except KeyError:
                logger = Printable()
                logger.logError('Rider "{}" not found in native results'.format(rider))
                raise
            
    return sailwaveResults

In [10]:
def sortResults(results):
    '''Prepare results for sorting according to rules for Prince of Speed 2023'''

    # Use pre-compiled regex to extract points from race scores
    pattern = re.compile('\d+\.\d+')

    for title, table in results['tables'].items():
        rows = list(table['rows'].values())
        for row in rows:
            # Convert race scores which include codes such DNC to numerics
            points = []
            for race in row['races']:
                points.append(float(pattern.findall(race)[0]))
                
            # A board’s series score is the total of its heat scores after discarding its worst scores
            row['sort'] = [float(row['nett'])]
            
            # If there is a series score tie between two or more boards,
            # it shall be broken in favour of the board(s) with the fastest run during the competition
            #   N.B. multiplying by -1 avoids the need for reversed sorting for this one element
            row['sort'] += [float(row['speed-kts']) * -1]
            
            # If a tie remains between two or more boards, each board’s heat scores,
            # including excluded scores, shall be listed in order of best to worst,
            # and at the first point(s) where there is a difference
            # the tie shall be broken in favour of the board(s) with the best score(s).
            # These scores shall be used even if some of them are excluded scores.
            row['sort'] += sorted(points)
            
            # If a tie still remains between two or more boards,
            # they shall be ranked in order of their scores in the last heat.
            # Any remaining ties shall be broken by using the tied boards’ scores
            # in the next-to-last heat and so on until all ties are broken.
            # These scores shall be used even if some of them are excluded scores.
            row['sort'] += reversed(points)

            # Sort DNC / DNS competitors by name
            row['sort'] += [row['name']]

        # Python's sort algorithm is more than happy to use the pre-prepared "sort" lists
        #   https://docs.python.org/3/howto/sorting.html
        #   https://docs.python.org/3/reference/expressions.html#value-comparisons
        rows.sort(key=itemgetter('sort'))
        
        table['sorted'] = rows

        print(f'    {title}')

    return results

In [11]:
def indexTables(soup):
    '''Create index of the HTML tables based on the column classes'''
    
    tableIdx = []
    titles = getNativeTitles(soup)

    tables = soup.find_all('table')
    for i, table in enumerate(tables):
        columnIdx = []

        cols = table.find_all('col')
        for col in cols:
            columnIdx.append(col['class'][0])

        entry = {'title': titles[i], 'soup': table, 'cols': columnIdx}
        tableIdx.append(entry)
    
    return tableIdx

    
def getDummyRow(soup, table):
    '''Returns a dummy <tr> with the appropriate number of <td> items'''

    # Determine number of columns from the header
    numCols = len(table.find_all('th'))

    # Create table row tag
    tr = soup.new_tag('tr')

    # Create table data tags
    for i in range(numCols):
        lf = soup.new_string('\n')
        tr.append(lf)
        td = soup.new_tag('td')
        tr.append(td)

    lf = soup.new_string('\n')
    tr.append(lf)

    return tr

    
def copyResults(sailwaveResults, nativeResults):
    '''Copy sorted sailwave results to native results soup'''
    
    soup = nativeResults['soup']
    tableIdx = indexTables(soup)

    tables = soup.find_all('table')
    for i, table in enumerate(tables):
        title = tableIdx[i]['title']
        cols = tableIdx[i]['cols']
    
        sailwaveTable = sailwaveResults['tables'][title]
        sortedResults = sailwaveTable['sorted']
        
        # Ensure the number of rows is correct
        trs = table.find_all('tr')[1:]

        # Decompose rows that are not required
        if len(trs) > len(sortedResults):
            for tr in trs[len(sortedResults):]:
                tr.decompose()

        # Append rows that are missing
        if len(trs) < len(sortedResults):
            tbody = table.find('tbody')
            numDummyRows = len(sortedResults) - len(trs)
            for i in range(numDummyRows):
                dummyRow = getDummyRow(soup, table)
                tbody.append(dummyRow)
                lf = soup.new_string('\n')
                tbody.append(lf)

        # Replace all of the results
        rank = 1
        for j, tr in enumerate(table.find_all('tr')[1:]):
            for k, td in enumerate(tr.find_all('td')):
                if cols[k] == 'rank':
                    td.string = str(rank)
                    if float(sortedResults[j][cols[-1]]) > 0.0:
                        rank += 1
                elif cols[k] == 'nation':
                    td.replace_with(BeautifulSoup(sortedResults[j][cols[k]], "html.parser"))
                elif cols[k] == 'speed-kts' and float(sortedResults[j][cols[k]]) == 0.0:
                    td.string = 'n/a'
                else:
                    td.string = sortedResults[j][cols[k]]

In [12]:
def processEvent(event):
    '''Process event series'''
    
    event.loadConfig()

    if 'Series' in event.eventConfig:
        print(f'Processing {event.path}...')

        # Blend sailwave and native results
        for seriesId, seriesInfo in event.eventConfig['Series'].items():
            if 'Refresh' in seriesInfo and seriesInfo['Refresh']:
                print(f'  {seriesInfo["Name"]}')
    
                entrantFlags = event.loadEntrantFlags()

                sailwaveResults = loadSailwaveResults(event, seriesId)
                nativeResults = loadNativeResults(event, seriesId)
    
                copySpeeds(nativeResults, sailwaveResults, entrantFlags)
                sortResults(sailwaveResults)
                copyResults(sailwaveResults, nativeResults)

                # Overwrite native results - indentation is lost and attribute orders change, but it's quick and easy!
                with open(nativeResults['filename'], 'w', encoding='utf-8') as file:
                    file.write(str(nativeResults['soup']))

                print()


def processEvents(countries):
    '''Process all events from 1998 onwards'''

    eventPaths = sorted(glob.glob(os.path.join(projdir, EVENTS_DIR, '[1-2][0-9][0-9][0-9]*')))
    latestEvent = os.path.basename(eventPaths[-1])
    for eventPath in eventPaths:
        currentEvent = os.path.basename(eventPath)

        if currentEvent == latestEvent and appConfig['Latest']['Refresh'] or \
                currentEvent != latestEvent and appConfig['History']['Refresh']:
            if currentEvent == latestEvent:
                verbosity = appConfig['Latest']['Verbosity']
            else:
                verbosity = appConfig['History']['Verbosity']

            event = Event(eventPath, appConfig, countries, verbosity=verbosity)
            processEvent(event)

In [13]:
def loadCountries():
    '''Read countries from CSV'''

    countries = {}
    
    filename = os.path.join(projdir, CONFIG_DIR, COUNTRIES_CSV)
    with open(filename, 'r', encoding='utf-8') as f:
        csvReader = csv.DictReader(f)
        for values in csvReader:
            countries[values['Name']] = values

    return countries

In [14]:
if __name__ == '__main__':
    pc1 = time.perf_counter()
    
    # Read main config
    filename = os.path.join(projdir, CONFIG_DIR, APP_CONFIG)
    with open(filename, 'r', encoding='utf-8') as f:
        jsonTxt = f.read()
        try:
            appConfig = json.loads(jsonTxt)
        except:
            logger = Printable()
            logger.logError('Could not parse {}'.format(filename))
            raise

    countries = loadCountries()

    # Process the required year(s)
    processEvents(countries)
    
    pc2 = time.perf_counter()
    print("Reports completed in %0.2f seconds" % (pc2 - pc1))

Processing /home/jovyan/work/wsw-results/events/2008...
Processing /home/jovyan/work/wsw-results/events/2009...
Processing /home/jovyan/work/wsw-results/events/2010...
Processing /home/jovyan/work/wsw-results/events/2011...
Processing /home/jovyan/work/wsw-results/events/2012...
Processing /home/jovyan/work/wsw-results/events/2013...
Processing /home/jovyan/work/wsw-results/events/2014...
Processing /home/jovyan/work/wsw-results/events/2015...
Processing /home/jovyan/work/wsw-results/events/2016...
Processing /home/jovyan/work/wsw-results/events/2017...
Processing /home/jovyan/work/wsw-results/events/2018...
Processing /home/jovyan/work/wsw-results/events/2019...
Processing /home/jovyan/work/wsw-results/events/2020...
Processing /home/jovyan/work/wsw-results/events/2021...
Processing /home/jovyan/work/wsw-results/events/2022...
Processing /home/jovyan/work/wsw-results/events/2023...
Processing /home/jovyan/work/wsw-results/events/2024...
  UKWA Speed Championship
    Sailboard
    Wing

## All Done!