# Course Module

## Initialisation

Basic approach to determine the project directory

In [1]:
import os
import csv
import re

from datetime import datetime

import json
import unittest

from common import testExit, projdir

from name import Name
from entrant import Entrant
from period import Period
from speedrun import SpeedRun

from constants import *

## Course Class

Class to manage courses - start / end times

In [2]:
MIN_RUN_INTERVAL_SECS = 5

class Course(Period):
    def __init__(self, session, courseId, startTime, endTime, distance=500, verbosity=1):
        '''Initialise course object'''

        super().__init__(parent=session, verbosity=verbosity)
        
        self.date = session.date
        self.weekend = session.weekend

        self.courseId = courseId
        self.courseName = self.appConfig['Courses'][courseId[:1]]
        if courseId[1:] != '?':
            self.courseName += ' ' + courseId[1:]

        self.startTime = startTime
        self.endTime = endTime
        
        self.distance = distance

        self.minStartTime = '23:59:59'
        self.maxStartTime = '00:00:00'


    def storeRun(self, entrantId, speedRun):
        '''Store run in memory'''

        if entrantId in self.runs:
            self.runs[entrantId].append(speedRun)
        else:
            self.runs[entrantId] = [speedRun]
            
        self.numRuns += 1


    def storeValidRun(self, entrantId, speedRun, startTime):
        '''Store run data from CSV files'''

        if startTime:
            if startTime < self.minStartTime:
                self.minStartTime = startTime
            if startTime > self.maxStartTime:
                self.maxStartTime = startTime

            if startTime >= self.startTime and startTime <= self.endTime:
                self.storeRun(entrantId, speedRun)
        else:
            self.storeRun(entrantId, speedRun)


    def getNextEntrantId(self):
        '''Get the next entrant ID'''
        
        if len(self.entrants) > 0:
            entrantId = max(self.entrants) + 1
        else:
            entrantId = 1

        return entrantId


    def getEntrantBySailNo(self, sailNo, name):
        '''Get entrant from the sail number'''

        if sailNo not in self.sailNos:

            # Start by attempting a quick lookup of the name itself
            # TODO - match by craft type as well as name
            if name and name in self.names:
                entrant = self.names[name][0]
                self.sailNos[sailNo] = entrant
                self.logWarning('Auto-matched sail number {} to {} ({})'.format(
                    sailNo, entrant.getName(), entrant.getCraftType()))
               
            else:
                # Next try looking for entrant names that count as a fuzzy match
                # TODO - match by craft type as well as name
                entrants = []
                if name:
                    nameObj = Name(name)
                    for entrantId in self.entrants:
                        entrant = self.entrants[entrantId]
                        if self.fuzzyMatch.matchNameObjects(entrant.name, nameObj):
                            entrants.append(entrant)

                # Only accept a unique match - multiple matches will be ignored
                if len(entrants) == 1:
                    entrant = entrants[0]
                    self.names[name] = [entrant]
                    self.sailNos[sailNo] = entrant
                    self.logWarning('Auto-matched sail number {} to {} ({})'.format(
                        sailNo, entrant.getName(), entrant.getCraftType()))

                else:
                    entrantId = self.getNextEntrantId()

                    if name:
                        entrant = Entrant(
                            self.eventConfig, ["ID", "Sail Number", "Name"], [entrantId, sailNo, name],
                            verbosity=self.verbosity)
                        self.logWarning('Unrecognised sail number {} ({}) on {}'.format(sailNo, entrant.getName(), self.date))
                    else:
                        entrant = Entrant(
                            self.eventConfig, ["ID", "Sail Number"], [entrantId, sailNo], verbosity=self.verbosity)
                        self.logWarning('Unrecognised sail number {} on {}'.format(sailNo, self.date))

                    self.entrants[entrantId] = entrant
                    self.names[name] = [entrant]
                    self.sailNos[sailNo] = entrant

        else:
            entrant = self.sailNos[sailNo]

            # TODO - check craft type as well as name
            if name:
                if name != entrant.getName():
                    nameObj = Name(name)
                    if self.fuzzyMatch.matchNameObjects(nameObj, entrant.name) is False:
                        self.logWarning('Name mismatch for sail {} - {} vs {}'.format(sailNo, name, entrant.getName()))

                if name not in self.names:
                    self.names[name] = [entrant]

        return entrant


    def getEntrantByGt31(self, gt31Id, gt31Serial):
        '''Get entrant from the GT-31 ID and serial'''

        gt31Id = gt31Id.upper()

        if gt31Id not in self.gt31Ids:
            # Try to determine name elements from GT-31 ID
            reMatch = re.match('([A-Z]+)[1-9][0-9]*([A-Z]+)', gt31Id)
            if reMatch:
                lastName, firstName = reMatch.groups()
            else:
                lastName = firstName = 'XXXXX'

            # Try looking for entrant names that match the GT-31 ID
            entrants = []
            regex = re.compile('[^A-Z]')

            for entrantId in self.entrants:
                entrant = self.entrants[entrantId]
                entrantFirstName = entrant.getValue('First Name')
                entrantLastName = entrant.getValue('Family Name')

                if entrantFirstName and entrantLastName:
                    # Remove non-alphas from the name; spaces, hyphens, apostrophes, etc.
                    # Replicates SSERPENT logic which takes the first 5 letters prior to filtering
                    modifiedFirstName = regex.sub('', entrantFirstName[:5].upper())
                    modifiedLastName = regex.sub('', entrantLastName[:5].upper())

                    if (modifiedFirstName == firstName and modifiedLastName == lastName or
                            modifiedFirstName == lastName and modifiedLastName == firstName):
                        entrants.append(entrant)

            # Only accept a unique match - multiple matches will be ignored
            if len(entrants) == 1:
                entrant = entrants[0]
                name = entrant.getName()

                self.names[name] = [entrant]
                self.gt31Ids[gt31Id] = entrant

                entrant.gt31SerialNumbers.add(gt31Serial)

                self.logWarning('Auto-matched GT-31 ID {} to {} ({})'.format(
                    gt31Id, entrant.getName(), entrant.getCraftType()))

            else:
                self.logWarning('Unrecognised GT-31 ID {} on {}'.format(gt31Id, self.date))

                entrantId = self.getNextEntrantId()

                entrant = Entrant(
                    self.eventConfig, ["ID", "GT31 ID", "GT31 SN"], [entrantId, gt31Id, gt31Serial], verbosity=self.verbosity)
                name = entrant.getName()

                self.entrants[entrantId] = entrant
                self.names[entrant] = [entrant]
                self.gt31Ids[gt31Id] = entrant

                entrant.gt31SerialNumbers.add(gt31Serial)

        else:
            entrant = self.gt31Ids[gt31Id]

            # Only report unrecognised GT-31 serials if the entrant had any GT-31 serials registered
            if entrant.gt31SerialNumbers and gt31Serial not in entrant.gt31SerialNumbers:
                self.logWarning('Unrecognised GT-31 SN for {} ({}) on {} - {} vs {}'.format(
                        entrant.getValue('Name'), entrant.getCraftType(), self.date,
                        gt31Serial, entrant.gt31SerialNumbers))

                entrant.gt31SerialNumbers.add(gt31Serial)

        return entrant


    def loadRunData(self, csvPath):
        '''Read run data from CSV files'''

        prevSailNo = None

        with open(csvPath, 'r') as f:
            csvReader = csv.reader(f)
            headers = colNames = next(csvReader)
            headersPlus = headers + [T_COURSE, T_DISTANCE, T_WEEKEND]

            if T_SAIL_NUMBER not in headers:
                raise ValueError('Field "{}" missing in "{}"'.format(T_SAIL_NUMBER, os.path.basename(csvPath)))

            sailNoIndex = headers.index(T_SAIL_NUMBER)

            # Some early years do not have the start times of runs so use whatever is available!
            if 'Start Time' in headers:
                startTimeIndex = headers.index('Start Time')
            elif 'Time' in headers:
                startTimeIndex = headers.index('Time')
            elif 'Finish Time' in headers:
                startTimeIndex = headers.index('Finish Time')
            else:
                startTimeIndex = -1

            # Name may be used for secondary lookup
            if 'Name' in headers:
                nameIndex = headers.index('Name')
            else:
                nameIndex = -1

            if 'FirstName' in headers:
                firstNameIndex = headers.index('FirstName')
            elif 'First Name' in headers:
                firstNameIndex = headers.index('First Name')
            else:
                firstNameIndex = -1

            if 'LastName' in headers:
                lastNameIndex = headers.index('LastName')
            elif 'Last Name' in headers:
                lastNameIndex = headers.index('Last Name')
            else:
                lastNameIndex = -1

            for values in csvReader:
                if len(values) != len(headers):
                    raise ValueError('Incorrect number of fields in "{}" - {}'.format(os.path.basename(csvPath), values))

                # Sail number always upper case
                sailNo = values[sailNoIndex]
                if startTimeIndex >= 0:
                    startTime = values[startTimeIndex]
                else:
                    startTime = None

                # Determine name for secondary match
                if nameIndex >= 0:
                    name = values[nameIndex].strip()                  
                elif firstNameIndex >= 0 and lastNameIndex >= 0:
                    firstName = values[firstNameIndex].strip()
                    lastName = values[lastNameIndex].strip()
                    if firstName and lastName:
                        name = firstName + ' ' + lastName
                    elif firstName:
                        name = firstName
                    elif lastName:
                        name = lastName
                    else:
                        name = None
                else:
                    name = None

                # Quick hack for 2009
                if startTime == 'GPS':
                    startTime = '12:00:00'

                # Lookup is required less often if data is sorted by sail number
                if sailNo != prevSailNo:
                    entrant = self.getEntrantBySailNo(sailNo, name)
                    entrantId = entrant.getValue('ID')
                    prevSailNo = sailNo

                # Ensure course name is stored in the result itself for the benefit of reporting
                valuesPlus =  values + [self.courseName, self.distance, self.weekend]

                # Run details are stored in a dedicated object
                speedRun = SpeedRun(self, entrant, headersPlus, valuesPlus, verbosity=self.verbosity)

                # Store the run if the start time is valid
                self.storeValidRun(entrantId, speedRun, startTime)


    def loadGpsData(self, csvPath):
        '''Read GPS data from CSV files'''

        prevGt31Id = None
        headers = [T_RUN, T_FILENAME, T_START_TIME, T_DURATION, T_SPEED, T_COG]
        headersPlus = headers + [T_COURSE, T_DISTANCE, T_WEEKEND]

        with open(csvPath, 'r') as f:
            csvReader = csv.reader(f)
            for values in csvReader:
                if len(values) != len(headers):
                    raise ValueError('Incorrect number of fields in "{}" - {}'.format(os.path.basename(csvPath), values))

                # GPSResults does not output a header line
                run, filename, startTime, duration, speed, cog = values

                # Split up record and format the GT-31 details
                try:
                    gt31Id, gt31Serial, fileDate, fileTime = os.path.splitext(filename)[0].split('_')[:4]
                except:
                    self.logError('Problem parsing GT-31 details in run data - "{}"'.format(filename))
                    raise
                gt31Id = gt31Id.upper()

                # Lookup is required less often if data is sorted by GT-31 ID
                if gt31Id != prevGt31Id:
                    entrant = self.getEntrantByGt31(gt31Id, gt31Serial)
                    entrantId = entrant.getValue('ID')
                    prevGt31Id = gt31Id

                # Ensure course name is stored in the result itself for the benefit of reporting
                valuesPlus = values + [self.courseName, self.distance, self.weekend]
                
                # Run details are stored in a dedicated object
                speedRun = SpeedRun(self, entrant, headersPlus, valuesPlus, verbosity=self.verbosity)

                # Store the run if the start time is valid
                self.storeValidRun(entrantId, speedRun, startTime)


    def loadResults(self, csvPath):
        '''Read results data from CSV files'''
        
        # Temporary hack which works for years prior to 2010
        self.loadRunData(csvPath)
        

    def getUniqueRuns(self, runs):
        '''Get unique list of runs in case of multiple GPS files for the same person'''

        uniqueRuns = []

        prevRun = None
        prevSinceMidnight = None

        for run in runs:
            if run.time:
                secsSinceMidnight = (int(run.time[0:2]) * 60 + int(run.time[3:5])) * 60 + int(run.time[6:8])
                if secsSinceMidnight and prevSinceMidnight and \
                        secsSinceMidnight < prevSinceMidnight + MIN_RUN_INTERVAL_SECS:

                    # Retain the fastest run when there are temporal "duplicates" from multiple GPS devices
                    if run.speed > prevRun.speed:
                        prevRun = run
                else:
                    if prevRun:
                        uniqueRuns.append(prevRun)
                    prevRun = run

                prevSinceMidnight = secsSinceMidnight
            else:
                if prevRun:
                    uniqueRuns.append(prevRun)
                prevRun = run
                
        if prevRun:
            uniqueRuns.append(prevRun)

        return uniqueRuns


    def processDuplicateRuns(self):
        '''Process duplicate runs in case of multiple GPS files for the same person'''

        for entrantId in self.runs:
            runs = sorted(self.runs[entrantId], key=lambda x: x.time or '00:00:00')

            prevSinceMidnight = None
            dupes = 0
            timeDiffs = 0

            for run in runs:
                if run.time:
                    secsSinceMidnight = (int(run.time[0:2]) * 60 + int(run.time[3:5])) * 60 + int(run.time[6:8])
                    if secsSinceMidnight and prevSinceMidnight and \
                            secsSinceMidnight < prevSinceMidnight + MIN_RUN_INTERVAL_SECS:

                        # Update basic stats on duplicates
                        dupes += 1  
                        timeDiffs += secsSinceMidnight - prevSinceMidnight

                    prevSinceMidnight = secsSinceMidnight

            if dupes:
                entrant = self.entrants[entrantId]

                percentDupes = 100 * dupes // len(runs)
                averageDiff = timeDiffs // dupes

                if averageDiff > 1:
                    suffix = ' (times >1s apart)'
                else:
                    suffix = ''

                self.logInfo('{} duplicate run{} removed for {} ({}) on {} - {}% dupes{}'.format(
                    dupes, 's' if dupes > 1 else '',
                    entrant.getName(), entrant.getCraftType(), self.date, percentDupes, suffix))

                self.runs[entrantId] = self.getUniqueRuns(runs)


    def finaliseRuns(self):
        '''Final processing after loading the runs'''

        self.logInfo('{} runs by {} participants down course {} on {} - {} to {}'.format(
            self.numRuns, len(self.runs), self.courseId, self.date, self.minStartTime, self.maxStartTime))

        if self.minStartTime < self.startTime:
            self.logWarning('Runs found before course {} opened on {} - earliest was {}'.format(
                self.courseId, self.date, self.minStartTime))

        if self.maxStartTime > self.endTime:
            self.logWarning('Runs found after course {} closed on {} - latest was {}'.format(
                self.courseId, self.date, self.maxStartTime))

        # De-duplication only makes sense for GPS-based events - i.e. 2010 onwards
        if self.year >= 2010:
            self.processDuplicateRuns()

        self.sortRuns()


    def loadRuns(self, csvPath):
        '''Read speeds from CSV files'''

        csvName = os.path.basename(csvPath)
        prefix = csvName.split('_')[0].upper()

        if prefix == 'RUNDATA':
            self.loadRunData(csvPath)
        elif prefix == 'GPSDATA':
            self.loadGpsData(csvPath)
        elif prefix == 'RESULTS':
            self.loadResults(csvPath)
        else:
            raise ValueError('Unexpected prefix "{}" for {}'.format(prefix, csvName))

        self.finaliseRuns()

## Unit Tests

A handful of very basic tests, including a dummy session class

In [3]:
class DummyEvent(Period):
    def __init__(self, path, verbosity=1):
        
        super().__init__(verbosity=verbosity)

        self.path = path
        self.year = int(os.path.basename(path))
        
        self.appConfig = appConfig
        
        self.entrants = {}
        

    def loadConfig(self):
        '''Read app config from JSON'''

        filename = os.path.join(self.path, CONFIG_DIR, EVENT_CONFIG)
        with open(filename, 'r', encoding='utf-8') as f:
            jsonTxt = f.read()
            self.eventConfig = json.loads(jsonTxt)


class DummySession(Period):
    def __init__(self, sessionDate):
        '''Initialise session object'''
        
        # Ugly hack for the benefit of the DummySession class
        event = DummyEvent(os.path.join(projdir, EVENTS_DIR, sessionDate[:4]))
        event.loadConfig()

        super().__init__(parent=event)

        self.date = sessionDate
        self.weekend = 'Y' if datetime.strptime(self.date, "%Y%m%d").weekday() >= 5 else 'N'

        self.entrants[0] = Entrant(event.eventConfig)

        self.courses = {}

        
    def copyRuns(self):
        '''Copy runs from course objects'''
        
        for courseId in self.courses:
            runs = self.courses[courseId].runs

            for entrantId in runs:
                if entrantId in self.runs:
                    self.runs[entrantId] += runs[entrantId]
                else:
                    self.runs[entrantId] = runs[entrantId].copy()

                self.numRuns += len(runs[entrantId])

In [4]:
class TestRunData20001003(unittest.TestCase):
    '''Class to test Course class'''
    
    def testRunData20001003_S1(self, session=None):
        '''Test RUNDATA using 20001003 S1 data'''

        if session is None:
            sessionDate = '20001003'
            session = DummySession(sessionDate)
        else:
            sessionDate = session.date

        courseId = 'S1'
        csvPath = os.path.join(projdir, EVENTS_DIR, sessionDate[:4], RUNDATA_DIR, sessionDate,
                        'RUNDATA_{}_{}.csv'.format(sessionDate, courseId))

        # Vebosity is zero to suppress 'WARNING: Unrecognised sail number' 
        course = Course(session, courseId, '09:00:00', '18:00:00', verbosity=0)
        course.loadRuns(csvPath)
        
        session.courses[courseId] = course
        
        self.assertEqual(course.numRuns, 246)
        self.assertEqual(len(course.runs), 26)

        # Check runs are sorted correctly
        for personId in course.runs:
            maxSpeed = 99.999
            for run in course.runs[personId]:
                self.assertEqual(run.data[T_SPEED] <= maxSpeed, True)
                maxSpeed = run.data[T_SPEED]


    def testRunData20001003_H1(self, session=None):
        '''Test RUNDATA using 20001003 H1 data'''

        if session is None:
            sessionDate = '20001003'
            session = DummySession(sessionDate)
        else:
            sessionDate = session.date

        courseId = 'H1'
        csvPath = os.path.join(projdir, EVENTS_DIR, sessionDate[:4], RUNDATA_DIR, sessionDate,
                        'RUNDATA_{}_{}.csv'.format(sessionDate, courseId))

        # Vebosity is zero to suppress 'WARNING: Unrecognised sail number' 
        course = Course(session, courseId, '09:00:00', '18:00:00', verbosity=0)
        course.loadRuns(csvPath)
        
        session.courses[courseId] = course
        
        self.assertEqual(course.numRuns, 124)
        self.assertEqual(len(course.runs), 14)

        # Check runs are sorted correctly
        for personId in course.runs:
            maxSpeed = 99.999
            for run in course.runs[personId]:
                self.assertEqual(run.data[T_SPEED] <= maxSpeed, True)
                maxSpeed = run.data[T_SPEED]


    def testRunData20001003(self):
        '''Test RUNDATA using test20001003 data'''

        sessionDate = '20001003'
        session = DummySession(sessionDate)

        self.testRunData20001003_S1(session=session)
        self.testRunData20001003_H1(session=session)
        
        session.copyRuns()

        self.assertEqual(session.numRuns, 370)
        self.assertEqual(len(session.runs), 30)

In [5]:
class TestResults20001003(unittest.TestCase):
    '''Class to test Course class'''
    
    def testResults20001003_S1(self, session=None):
        '''Test RESULTS using 20001003 S1 data'''

        if session is None:
            sessionDate = '20001003'
            session = DummySession(sessionDate)
        else:
            sessionDate = session.date

        courseId = 'S1'
        csvPath = os.path.join(projdir, EVENTS_DIR, sessionDate[:4], RESULTS_DIR, sessionDate,
                        'RESULTS_{}_{}.csv'.format(sessionDate, courseId))

        # Vebosity is zero to suppress 'WARNING: Unrecognised sail number' 
        course = Course(session, courseId, '09:00:00', '18:00:00', verbosity=0)
        course.loadRuns(csvPath)
        
        session.courses[courseId] = course
        
        self.assertEqual(course.numRuns, len(course.runs))


    def testResults20001003_H1(self, session=None):
        '''Test RESULTS using 20001003 H1 data'''

        if session is None:
            sessionDate = '20001003'
            session = DummySession(sessionDate)
        else:
            sessionDate = session.date

        courseId = 'S1'
        csvPath = os.path.join(projdir, EVENTS_DIR, sessionDate[:4], RESULTS_DIR, sessionDate,
                        'RESULTS_{}_{}.csv'.format(sessionDate, courseId))

        # Vebosity is zero to suppress 'WARNING: Unrecognised sail number' 
        course = Course(session, courseId, '09:00:00', '18:00:00', verbosity=0)
        course.loadRuns(csvPath)
        
        session.courses[courseId] = course
        
        self.assertEqual(course.numRuns, len(course.runs))

In [6]:
class TestGpsData20191011(unittest.TestCase):
    '''Class to test Course class'''
    
    def testGpsData20191011_S1(self, session=None):
        '''Test GPSDATA using 20191011 S1 data'''

        if session is None:
            sessionDate = '20191011'
            session = DummySession(sessionDate)
        else:
            sessionDate = session.date

        courseId = 'S1'
        csvPath = os.path.join(projdir, EVENTS_DIR, sessionDate[:4], GPSDATA_DIR, sessionDate,
                        'GPSDATA_{}_{}.csv'.format(sessionDate, courseId))

        # Vebosity is zero to suppress 'WARNING: Unrecognised GT-31 ID' 
        course = Course(session, courseId, '09:00:00', '17:00:00', verbosity=0)
        course.loadRuns(csvPath)
        
        session.courses[courseId] = course
        
        self.assertEqual(course.numRuns, 3)
        self.assertEqual(len(course.runs), 3)

        # Check runs are sorted correctly
        for personId in course.runs:
            maxSpeed = 99.999
            for run in course.runs[personId]:
                self.assertEqual(run.data[T_SPEED] <= maxSpeed, True)
                maxSpeed = run.data[T_SPEED]


    def testGpsData20191011_S2(self, session=None):
        '''Test GPSDATA using 20191011 S2 data'''

        if session is None:
            sessionDate = '20191011'
            session = DummySession(sessionDate)
        else:
            sessionDate = session.date

        courseId = 'S2'
        csvPath = os.path.join(projdir, EVENTS_DIR, sessionDate[:4], GPSDATA_DIR, sessionDate,
                        'GPSDATA_{}_{}.csv'.format(sessionDate, courseId))

        # Vebosity is zero to suppress 'WARNING: Unrecognised GT-31 ID'
        course = Course(session, courseId, '09:00:00', '17:00:00', verbosity=0)
        course.loadRuns(csvPath)
        
        session.courses[courseId] = course
        
        self.assertEqual(course.numRuns, 512)
        self.assertEqual(len(course.runs), 36)

        # Check runs are sorted correctly
        for personId in course.runs:
            maxSpeed = 99.999
            for run in course.runs[personId]:
                self.assertEqual(run.data[T_SPEED] <= maxSpeed, True)
                maxSpeed = run.data[T_SPEED]


    def testGpsData20191011(self):
        '''Test GPSDATA using 20191011 data'''

        sessionDate = '20191011'
        session = DummySession(sessionDate)

        self.testGpsData20191011_S1(session=session)
        self.testGpsData20191011_S2(session=session)

        session.copyRuns()

        # 9 duplicate runs should be removed for WALLI35ADRIA
        self.assertEqual(session.numRuns, 515 - 9)
        self.assertEqual(len(session.runs), 36)

## Run Unit Tests

Note: Only run unit tests when running this script directly, not during an import

In [7]:
if __name__ == '__main__':
    # Read main config into global variable
    filename = os.path.join(projdir, CONFIG_DIR, APP_CONFIG)
    with open(filename, 'r', encoding='utf-8') as f:
        jsonTxt = f.read()
        appConfig = json.loads(jsonTxt)

    unittest.main(argv=['first-arg-is-ignored'], exit=testExit)

........
----------------------------------------------------------------------
Ran 8 tests in 0.151s

OK


## All Done!