# Series Module

Copyright 2024 Michael George (AKA Logiqx).

This file is part of [sse-results](https://github.com/Logiqx/sse-results) and is distributed under the terms of the GNU General Public License.

sse-results is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.

sse-results is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with sse-results. If not, see <https://www.gnu.org/licenses/>.

## Initialisation

Basic approach to determine the project directory

In [1]:
import os
import sys
import glob

from datetime import datetime
import time

import json

import unicodedata
import re

from bs4 import BeautifulSoup

from operator import itemgetter

#from common import Printable, projdir
#from event import Event
#from constants import *

In [2]:
class Printable:
    def __init__(self, verbosity=1):
        self.verbosity = verbosity

    def __repr__(self):
        return str(self.__class__) + ": " + str(self.__dict__)

    def __str__(self):
        return str(self.__class__) + ": " + str(self.__dict__)
    
    def logInfo(self, msg):
        if self.verbosity >= 2:
            print('INFO:', msg)

    def logWarning(self, msg):
        if self.verbosity >= 1:
            print('WARNING:', msg)

    def logError(self, msg):
        print('ERROR:', msg)
        
projdir = os.path.realpath(os.path.join(sys.path[0], '..'))

In [3]:
EVENTS_DIR = 'events'
CONFIG_DIR = 'config'
SAILWAVE_DIR = 'sailwave'
DOCS_DIR = 'docs'

APP_CONFIG = 'app.json'
EVENT_CONFIG = 'event.json'

## Django

Copy / paste of slugify method

In [4]:
def slugify(value):
    """
    Taken from https://github.com/django/django/blob/master/django/utils/text.py
    Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
    dashes to single dashes. Remove characters that aren't alphanumerics,
    underscores, or hyphens. Convert to lowercase. Also strip leading and
    trailing whitespace, dashes, and underscores.
    """
    value = str(value)
    value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
    value = re.sub(r'[^\w\s-]', '', value.lower())
    return re.sub(r'[-\s]+', '-', value).strip('-_')

## Process Years

Process all available years

In [5]:
class Event():
    def __init__(self, path, appConfig, existingNames={}, verbosity=1):
        
        #super().__init__(verbosity=verbosity)

        self.path = path
        self.year = int(os.path.basename(path))

        self.appConfig = appConfig

        self.existingNames = existingNames
        self.craftTypes = {}
        
        self.motions = {}
        self.sessions = {}
        
        self.initialised = False


    def loadConfig(self):
        '''Read app config from JSON'''

        filename = os.path.join(self.path, CONFIG_DIR, EVENT_CONFIG)
        with open(filename, 'r', encoding='utf-8') as f:
            jsonTxt = f.read()
            try:
                self.eventConfig = json.loads(jsonTxt)
            except:
                self.logError('Could not parse {}'.format(filename))
                raise

        if 'Comment' in self.eventConfig:
            self.comment = self.eventConfig['Comment']
        else:
            self.comment = None

In [6]:
def getRiderId(row):
    '''Get unique rider ID - suitable for Sailwave and native tables'''
    
    riderDetails = []

    for riderDetail in ['name', 'sail-no', 'tally']:
        if riderDetail in row:
            riderDetails.append(row[riderDetail])

    riderId = slugify('-'.join(riderDetails))
    
    return riderId

In [7]:
def getNativeTitles(soup):
    '''Get native titles from HTML soup'''
    
    titles = []
    
    summaryTitles = soup.find_all('h3')
    for summaryTitle in summaryTitles:
        titles.append(summaryTitle.text)

    return titles

    
def getNativeColClasses(table):
    '''Get native column classes from HTML soup'''
    
    colClasses = []

    cols = table.find_all('col')
    for col in cols:
        colClasses.append(col['class'][0])

    return colClasses

    
def getNativeRows(table, colClasses):
    '''Get native table rows from HTML soup'''
    
    rows = {}

    tbody = table.find('tbody')

    trs = tbody.find_all('tr')
    for tr in trs:
        row = {}

        tds = tr.find_all('td')
        for i, td in enumerate(tds):
            # Set all series values to TBC
            if colClasses[i] in ['rank', 'points', 'total']:
                td.string = 'TBC'

            # Nation needs to be kept intact (image and text)
            if colClasses[i] == 'nation':
                row[colClasses[i]] = td
            else:
                row[colClasses[i]] = td.text

        riderId = getRiderId(row)

        rows[riderId] = row

    return rows

    
def getNativeTables(results):
    '''Get native tables from HTML soup'''
    
    soup = results['soup']

    titles = getNativeTitles(soup)
    tables = {}

    summaryTables = soup.find_all('table')
    for tableIdx, summaryTable in enumerate(summaryTables):
        colClasses = getNativeColClasses(summaryTable)
        tableRows = getNativeRows(summaryTable, colClasses)

        title = titles[tableIdx]
        tables[title] = tableRows

    return tables

    
def loadNativeResults(event, seriesId):
    '''Load native results into dictionary'''
    
    results = {}

    results['filename'] = os.path.join(projdir, 'docs', EVENTS_DIR, str(event.year), seriesId.lower() + '.html')
    with open(results['filename']) as f:
        html = f.read()
        results['soup'] = BeautifulSoup(html, 'html.parser')
        results['tables'] = getNativeTables(results)
    
    return results

In [8]:
def getSailwaveTitles(soup):
    '''Get Sailwave titles from HTML soup'''
    
    titles = []
    
    summaryTitles = soup.find_all('h3', {'class': 'summarytitle'})
    for summaryTitle in summaryTitles:
        title = summaryTitle.text
        if title.endswith(' Fleet'):
            title = title.replace(' Fleet', '')
        titles.append(title)

    return titles

    
def getSailwaveColClasses(table):
    '''Get Sailwave column classes from HTML soup'''
    
    colClasses = []

    cols = table.find_all('col')
    for col in cols:
        colClass = col['class'][0]

        if colClass == 'helmname':
            colClass = 'name'
        elif colClass == 'sailno':
            colClass = 'sail-no'

        colClasses.append(colClass)

    return colClasses

    
def getSailwaveRaceNames(table, colClasses):
    '''Get Sailwave race names from HTML soup'''
    
    raceNames = []

    ths = table.find_all('th')
    for i, th in enumerate(ths):
        if colClasses[i] == 'race':
            raceNames.append(th.text)

    return raceNames

    
def getSailwaveRows(table, colClasses):
    '''Get Sailwave table rows from HTML soup'''
    
    rows = []
    
    tbody = table.find('tbody')

    trs = tbody.find_all('tr', {'class': 'summaryrow'})
    for tr in trs:
        row = {}
        races = []

        tds = tr.find_all('td')
        for i, td in enumerate(tds):
            if colClasses[i] == 'race':
                races.append(td.text)
            else:
                row[colClasses[i]] = td.text

        row['races'] = races

        rows.append(row)

    return rows

    
def getSailwaveTables(results):
    '''Get Sailwave tables from HTML soup'''
    
    soup = results['soup']

    titles = getSailwaveTitles(soup)
    tables = {}

    summaryTables = soup.find_all('table', {'class': 'summarytable'})
    for tableIdx, summaryTable in enumerate(summaryTables):
        colClasses = getSailwaveColClasses(summaryTable)
        raceNames = getSailwaveRaceNames(summaryTable, colClasses)
        tableRows = getSailwaveRows(summaryTable, colClasses)

        title = titles[tableIdx]
        table = {'races': raceNames, 'rows': tableRows}
        tables[title] = table

    return tables

    
def loadSailwaveResults(event, seriesId):
    '''Load Sailwave results into dictionary'''
    
    results = {}

    results['filename'] = os.path.join(event.path, SAILWAVE_DIR, seriesId + '.html')   
    with open(results['filename']) as f:
        html = f.read()
        results['soup'] = BeautifulSoup(html, 'html.parser')
        results['tables'] = getSailwaveTables(results)
    
    return results

In [9]:
def blendResults(sailwaveResults, nativeResults):
    '''Blend sailwave results and native results - essentially take speed-kts from native results'''
    
    for title, sailwaveTable in sailwaveResults['tables'].items():
        try:
            nativeTable = nativeResults['tables'][title]

        except KeyError:
            logger = Printable()
            logger.logError('Table "{}" not found in native results'.format(title))
            raise

        for row in sailwaveTable['rows']:
            rider = getRiderId(row)

            try:
                row['speed-kts'] = nativeTable[rider]['speed-kts']

            except KeyError:
                logger = Printable()
                logger.logError('Rider "{}" not found in native results'.format(rider))
                raise
            
    return sailwaveResults

In [10]:
def sortResults(results):
    '''Prepare results for sorting according to rules for Prince of Speed 2023'''

    # Use pre-compiled regex to extract points from race scores
    pattern = re.compile('\d+\.\d+')

    for title, table in results['tables'].items():
        for row in table['rows']:
            # Convert race scores which include codes such DNC to numerics
            points = []
            for race in row['races']:
                points.append(float(pattern.findall(race)[0]))
                
            # A board’s series score is the total of its heat scores after discarding its worst scores
            row['sort'] = [float(row['nett'])]
            
            # If there is a series score tie between two or more boards,
            # it shall be broken in favour of the board(s) with the fastest run during the competition
            #   N.B. multiplying by -1 avoids the need for reversed sorting for this one element
            row['sort'] += [float(row['speed-kts']) * -1]
            
            # If a tie remains between two or more boards, each board’s heat scores,
            # including excluded scores, shall be listed in order of best to worst,
            # and at the first point(s) where there is a difference
            # the tie shall be broken in favour of the board(s) with the best score(s).
            # These scores shall be used even if some of them are excluded scores.
            row['sort'] += sorted(points)
            
            # If a tie still remains between two or more boards,
            # they shall be ranked in order of their scores in the last heat.
            # Any remaining ties shall be broken by using the tied boards’ scores
            # in the next-to-last heat and so on until all ties are broken.
            # These scores shall be used even if some of them are excluded scores.
            row['sort'] += reversed(points)

        # Python's sort algorithm is more than happy to use the pre-prepared "sort" lists
        #   https://docs.python.org/3/howto/sorting.html
        #   https://docs.python.org/3/reference/expressions.html#value-comparisons
        table['rows'].sort(key=itemgetter('sort'))

        # TODO - remove DEBUG
        print(f'    {title}')
        #for row in table['rows']:
            #print(row['rank'], row['name'], row['nett'], row['speed-kts'], row['races'])
        #print()

    return results

In [11]:
existingNames = {}

def processEvent(event):
    '''Process event series'''
    
    event.loadConfig()

    if 'Series' in event.eventConfig:
        print(f'Processing {event.path}...')

        # Blend sailwave and native results
        for seriesId, seriesInfo in event.eventConfig['Series'].items():
            print(f'  {seriesInfo["Name"]}')

            sailwaveResults = loadSailwaveResults(event, seriesId)
            nativeResults = loadNativeResults(event, seriesId)
            
            results = blendResults(sailwaveResults, nativeResults)
            results = sortResults(results)

        # Overwrite native results - indentation is lost and attribute orders change, but it's quick and easy!
        with open(nativeResults['filename'], 'w', encoding='utf-8') as file:
            file.write(str(nativeResults['soup']))

        print()


def processEvents():
    '''Process all events from 1998 onwards'''

    eventPaths = sorted(glob.glob(os.path.join(projdir, EVENTS_DIR, '[1-2][0-9][0-9][0-9]')))
    currYear = os.path.basename(eventPaths[-1])
    for eventPath in eventPaths:
        eventYear = os.path.basename(eventPath)

        if eventYear == currYear and appConfig['Latest']['Refresh'] or \
                eventYear != currYear and appConfig['History']['Refresh']:
            if eventYear == currYear:
                verbosity = appConfig['Latest']['Verbosity']
            else:
                verbosity = appConfig['History']['Verbosity']

            event = Event(eventPath, appConfig, existingNames=existingNames, verbosity=verbosity)
            processEvent(event)

In [12]:
if __name__ == '__main__':
    pc1 = time.perf_counter()
    
    # Read main config
    filename = os.path.join(projdir, CONFIG_DIR, APP_CONFIG)
    with open(filename, 'r', encoding='utf-8') as f:
        jsonTxt = f.read()
        try:
            appConfig = json.loads(jsonTxt)
        except:
            logger = Printable()
            logger.logError('Could not parse {}'.format(filename))
            raise
    
    # Process the required year(s)
    processEvents()
    
    pc2 = time.perf_counter()
    print("Reports completed in %0.2f seconds" % (pc2 - pc1))

Processing /home/jovyan/work/wst-results/events/2023...
  ISWC Speed Championship
    Windsurf Men
    Windsurf Women
    Youth Men
    Youth Women
    Windfoil Open

Reports completed in 0.07 seconds


## All Done!