# Course Module

## Initialisation

Basic approach to determine the project directory

In [1]:
import os
import csv

import json
import unittest

from common import Printable, projdir

from name import Name
from fuzzy import FuzzyMatch
from entrant import Entrant
from speedrun import SpeedRun

from constants import *

## Course Class

Class to manage courses - start / end times

In [2]:
class Course(Printable):
    def __init__(self, session, courseId, startTime, endTime, appConfig, verbosity=1):
        '''Initialise course object'''

        super().__init__(verbosity=verbosity)

        self.appConfig = appConfig
        
        self.session = session
        self.entrants = session.entrants
        self.names = session.names
        self.sailNos = session.sailNos
        self.gt31Ids = session.gt31Ids
        self.date = session.date
        
        self.courseId = courseId
        self.courseName = '{} {}'.format(appConfig['Courses'][courseId[:1]], courseId[1:])

        self.startTime = startTime
        self.endTime = endTime

        self.minStartTime = '23:59:59'
        self.maxStartTime = '00:00:00'

        self.fuzzyMatch = FuzzyMatch()
        
        self.runs = {}

        self.numRuns = 0
        

    def getEntrantSailNo(self, sailNo, name):
        '''Get entrant from the sail number'''

        if sailNo not in self.sailNos:
            
            # Start by attempting a quick lookup of the name itself
            if name and name in self.names:
                entrant = self.names[name][0]
                self.sailNos[sailNo] = entrant
                self.logWarning('Auto-matched sail number {} to {} ({})'.format(
                    sailNo, entrant.getName(), entrant.getValue('Craft Type')))
               
            else:
                # Next try looking for entrant names that count as a fuzzy match
                entrants = []
                if name:
                    nameObj = Name(name)
                    for entrantId in self.entrants:
                        entrant = self.entrants[entrantId]
                        if self.fuzzyMatch.matchNameObjects(entrant.name, nameObj):
                            entrants.append(entrant)

                # Only accept a unique match - multiple matches will be ignored
                if len(entrants) == 1:
                    entrant = entrants[0]
                    self.names[name] = [entrant]
                    self.sailNos[sailNo] = entrant
                    self.logWarning('Auto-matched sail number {} to {} ({})'.format(
                        sailNo, entrant.getName(), entrant.getValue('Craft Type')))

                else:
                    entrantId = max(self.entrants) + 1

                    if name:
                        entrant = Entrant(["ID", "Sail Number", "Name"], [entrantId, sailNo, name], verbosity=self.verbosity)
                        self.logWarning('Unrecognised sail number {} ({}) on {}'.format(sailNo, entrant.getName(), self.date))
                    else:
                        entrant = Entrant(["ID", "Sail Number"], [entrantId, sailNo], verbosity=self.verbosity)
                        self.logWarning('Unrecognised sail number {} on {}'.format(sailNo, self.date))

                    self.entrants[entrantId] = entrant
                    self.names[name] = [entrant]
                    self.sailNos[sailNo] = entrant

        else:
            entrant = self.sailNos[sailNo]           
                
            if name not in self.names:
                if name and name != entrant.getName():
                    nameObj = Name(name)
                    if self.fuzzyMatch.matchNameObjects(nameObj, entrant.name) is False:
                        self.logWarning('Name mismatch for sail {} - {} vs {}'.format(sailNo, name, entrant.getName()))

                self.names[name] = [entrant]

        return entrant


    def getEntrantGt31(self, gt31Id, gt31Serial):
        '''Get entrant from the GT-31 ID and serial'''

        if gt31Id not in self.gt31Ids:
            self.logWarning('Unrecognised GT-31 ID on {} - {}'.format(self.date, gt31Id))

            entrantId = max(self.entrants) + 1
            entrant = Entrant(["ID", "GT31 ID", "GT31 SN"], [entrantId, gt31Id, gt31Serial], verbosity=self.verbosity)
            name = entrant.getName()

            self.entrants[entrantId] = entrant
            self.names[name] = [entrant]
            self.gt31Ids[gt31Id] = entrant

        # TODO - check for unique serial (just in case GPS name has been changed)

        else:
            entrant = self.gt31Ids[gt31Id]

            # Only report unrecognised GT-31 serials if the entrant had any GT-31 serials registered
            if entrant.gt31SerialNumbers and gt31Serial not in entrant.gt31SerialNumbers:
                self.logWarning('Unrecognised GT-31 SN for {} ({}) on {} - {} vs {}'.format(
                        entrant.getValue('Name'), entrant.getValue('Craft Type'), self.date,
                        gt31Serial, entrant.gt31SerialNumbers))

                entrant.gt31SerialNumbers.add(gt31Serial)

        return entrant


    def storeRun(self, entrantId, speedRun):
        '''Store run in memory'''

        if entrantId in self.runs:
            self.runs[entrantId].append(speedRun)
        else:
            self.runs[entrantId] = [speedRun]

        self.numRuns += 1

        self.session.storeRun(entrantId, speedRun)
        

    def loadRunData(self, csvPath):
        '''Read run data from CSV files'''

        prevSailNo = None

        with open(csvPath, 'r') as f:
            csvReader = csv.reader(f)
            headers = colNames = next(csvReader)
            headersPlus = [T_COURSE] + headers

            if T_SAIL_NUMBER not in headers:
                raise ValueError('Field "{}" missing in "{}"'.format(T_SAIL_NUMBER, os.path.basename(csvPath)))

            sailNoIndex = headers.index(T_SAIL_NUMBER)

            # Some early years do not have the start times of runs so use whatever is available!
            if 'Start Time' in headers:
                startTimeIndex = headers.index('Start Time')
            elif 'Time' in headers:
                startTimeIndex = headers.index('Time')
            elif 'Finish Time' in headers:
                startTimeIndex = headers.index('Finish Time')
            else:
                raise ValueError('No time field in "{}"'.format(os.path.basename(csvPath)))

            # Name may be used for secondary lookup
            if 'Name' in headers:
                nameIndex = headers.index('Name')
            else:
                nameIndex = -1

            if 'FirstName' in headers:
                firstNameIndex = headers.index('FirstName')
            elif 'First Name' in headers:
                firstNameIndex = headers.index('First Name')
            else:
                firstNameIndex = -1

            if 'LastName' in headers:
                lastNameIndex = headers.index('LastName')
            elif 'Last Name' in headers:
                lastNameIndex = headers.index('Last Name')
            else:
                lastNameIndex = -1

            for values in csvReader:
                if len(values) != len(headers):
                    raise ValueError('Incorrect number of fields in "{}" - {}'.format(os.path.basename(csvPath), values))

                # Sail number always upper case
                sailNo = values[sailNoIndex]
                startTime = values[startTimeIndex]

                # Determine name for secondary match
                if nameIndex >= 0:
                    name = values[nameIndex].strip()                  
                elif firstNameIndex >= 0 and lastNameIndex >= 0:
                    firstName = values[firstNameIndex].strip()
                    lastName = values[lastNameIndex].strip()
                    if firstName and lastName:
                        name = firstName + ' ' + lastName
                    elif firstName:
                        name = firstName
                    elif lastName:
                        name = lastName
                    else:
                        name = None
                else:
                    name = None

                # Quick hack for 2009
                if startTime == 'GPS':
                    startTime = '12:00:00'

                # Lookup is required less often if data is sorted by sail number
                if sailNo != prevSailNo:
                    entrant = self.getEntrantSailNo(sailNo, name)
                    entrantId = entrant.getValue('ID')
                    
                valuesPlus = [self.courseName] + values 
                speedRun = SpeedRun(self, entrant, headersPlus, valuesPlus, verbosity=self.verbosity)

                # TODO - move into shared method
                if startTime < self.minStartTime:
                    self.minStartTime = startTime
                if startTime > self.maxStartTime:
                    self.maxStartTime = startTime

                if startTime >= self.startTime and startTime <= self.endTime:
                    self.storeRun(entrantId, speedRun)

                prevSailNo = sailNo

        self.finaliseRuns()


    def loadGpsData(self, csvPath):
        '''Read GPS data from CSV files'''

        prevGt31Id = None
        headers = [T_RUN, T_FILENAME, T_START_TIME, T_DURATION, T_SPEED, T_COG]
        headersPlus = [T_COURSE] + headers

        with open(csvPath, 'r') as f:
            csvReader = csv.reader(f)
            for values in csvReader:
                if len(values) != len(headers):
                    raise ValueError('Incorrect number of fields in "{}" - {}'.format(os.path.basename(csvPath), values))

                # GPSResults does not output a header line
                run, filename, startTime, duration, speed, cog = values

                # Split up record and format the GT-31 details
                try:
                    gt31Id, gt31Serial, fileDate, fileTime = os.path.splitext(filename)[0].split('_')[:4]
                except:
                    self.logError('Problem parsing GT-31 details in run data - "{}"'.format(filename))
                    raise
                gt31Id = gt31Id.upper()

                # Lookup is required less often if data is sorted by GT-31 ID
                if gt31Id != prevGt31Id:
                    entrant = self.getEntrantGt31(gt31Id, gt31Serial)
                    entrantId = entrant.getValue('ID')

                valuesPlus = [self.courseName] + values 
                speedRun = SpeedRun(self, entrant, headersPlus, valuesPlus, verbosity=self.verbosity)

                # TODO - move into shared method
                if startTime < self.minStartTime:
                    self.minStartTime = startTime
                if startTime > self.maxStartTime:
                    self.maxStartTime = startTime

                if startTime >= self.startTime and startTime <= self.endTime:
                    self.storeRun(entrantId, speedRun)

                prevGt31Id = gt31Id

        self.finaliseRuns()


    def finaliseRuns(self):
        '''Final processing after loading the runs'''

        self.logInfo('{} runs by {} participants down course {} on {} - {} to {}'.format(
            self.numRuns, len(self.runs), self.courseId, self.date, self.minStartTime, self.maxStartTime))
            
        if self.minStartTime < self.startTime:
            self.logWarning('Runs found before course {} opened on {} - earliest was {}'.format(
                self.courseId, self.date, self.minStartTime))

        if self.maxStartTime > self.endTime:
            self.logWarning('Runs found after course {} closed on {} - latest was {}'.format(
                self.courseId, self.date, self.maxStartTime))

        self.sortRuns()


    def sortRuns(self):
        '''Sort runs for each person, fastest to slowest'''

        for entrantId in self.runs:
            self.runs[entrantId].sort(key=lambda x: x.data[T_SPEED], reverse=True)

## Unit Tests

A handful of very basic tests, including a dummy session class

In [3]:
class DummySession(Printable):
    def __init__(self, sessionDate):
        '''Initialise session object'''
        
        self.entrants = {0: None}
        self.date = sessionDate

        self.sailNos = {}
        self.gt31Ids = {}
        self.names = {}

        self.runs = {}
        self.numRuns = 0


    def storeRun(self, entrantId, speedRun):
        
        if entrantId in self.runs:
            self.runs[entrantId].append(speedRun)
        else:
            self.runs[entrantId] = [speedRun]
            
        self.numRuns += 1

In [4]:
class TestCourse20001003(unittest.TestCase):
    '''Class to test Course class'''
    
    def test20001003_S1(self, session=None):
        '''Test using 20001003 S1 data'''

        if session is None:
            sessionDate = '20001003'
            session = DummySession(sessionDate)
        else:
            sessionDate = session.date

        courseId = 'S1'
        csvPath = os.path.join(projdir, EVENTS_DIR, sessionDate[:4], RUNDATA_DIR, sessionDate,
                        'RUNDATA_{}_{}.csv'.format(sessionDate, courseId))

        # Vebosity is zero to suppress 'WARNING: Unrecognised GT-31 ID' 
        course = Course(session, courseId, '09:00:00', '18:00:00', appConfig, verbosity=0)
        course.loadRunData(csvPath)
        
        self.assertEqual(course.numRuns, 246)
        self.assertEqual(len(course.runs), 26)

        # Check runs are sorted correctly
        for personId in course.runs:
            maxSpeed = 99.999
            for run in course.runs[personId]:
                self.assertEqual(run.data[T_SPEED] <= maxSpeed, True)
                maxSpeed = run.data[T_SPEED]


    def test20001003_H1(self, session=None):
        '''Test using 20001003 H1 data'''

        if session is None:
            sessionDate = '20001003'
            session = DummySession(sessionDate)
        else:
            sessionDate = session.date

        courseId = 'H1'
        csvPath = os.path.join(projdir, EVENTS_DIR, sessionDate[:4], RUNDATA_DIR, sessionDate,
                        'RUNDATA_{}_{}.csv'.format(sessionDate, courseId))

        # Vebosity is zero to suppress 'WARNING: Unrecognised GT-31 ID' 
        course = Course(session, courseId, '09:00:00', '18:00:00', appConfig, verbosity=0)
        course.loadRunData(csvPath)
        
        self.assertEqual(course.numRuns, 124)
        self.assertEqual(len(course.runs), 14)

        # Check runs are sorted correctly
        for personId in course.runs:
            maxSpeed = 99.999
            for run in course.runs[personId]:
                self.assertEqual(run.data[T_SPEED] <= maxSpeed, True)
                maxSpeed = run.data[T_SPEED]


    def testtest20001003(self):
        '''Test using test20001003 data'''

        sessionDate = '20001003'
        session = DummySession(sessionDate)

        self.test20001003_S1(session=session)
        self.test20001003_H1(session=session)

        self.assertEqual(session.numRuns, 370)
        self.assertEqual(len(session.runs), 30)

In [5]:
class TestCourse20191011(unittest.TestCase):
    '''Class to test Course class'''
    
    def test20191011_S1(self, session=None):
        '''Test using 20191011 S1 data'''

        if session is None:
            sessionDate = '20191011'
            session = DummySession(sessionDate)
        else:
            sessionDate = session.date

        courseId = 'S1'
        csvPath = os.path.join(projdir, EVENTS_DIR, sessionDate[:4], GPSDATA_DIR, sessionDate,
                        'GPSDATA_{}_{}.csv'.format(sessionDate, courseId))

        # Vebosity is zero to suppress 'WARNING: Unrecognised GT-31 ID' 
        course = Course(session, courseId, '09:00:00', '17:00:00', appConfig, verbosity=0)
        course.loadGpsData(csvPath)
        
        self.assertEqual(course.numRuns, 3)
        self.assertEqual(len(course.runs), 3)

        # Check runs are sorted correctly
        for personId in course.runs:
            maxSpeed = 99.999
            for run in course.runs[personId]:
                self.assertEqual(run.data[T_SPEED] <= maxSpeed, True)
                maxSpeed = run.data[T_SPEED]


    def test20191011_S2(self, session=None):
        '''Test using 20191011 S2 data'''

        if session is None:
            sessionDate = '20191011'
            session = DummySession(sessionDate)
        else:
            sessionDate = session.date

        courseId = 'S2'
        csvPath = os.path.join(projdir, EVENTS_DIR, sessionDate[:4], GPSDATA_DIR, sessionDate,
                        'GPSDATA_{}_{}.csv'.format(sessionDate, courseId))

        # Vebosity is zero to suppress 'WARNING: Unrecognised GT-31 ID'
        course = Course(session, courseId, '09:00:00', '17:00:00', appConfig, verbosity=0)
        course.loadGpsData(csvPath)
        
        self.assertEqual(course.numRuns, 512)
        self.assertEqual(len(course.runs), 36)

        # Check runs are sorted correctly
        for personId in course.runs:
            maxSpeed = 99.999
            for run in course.runs[personId]:
                self.assertEqual(run.data[T_SPEED] <= maxSpeed, True)
                maxSpeed = run.data[T_SPEED]


    def test20191011(self):
        '''Test using 20191011 data'''

        sessionDate = '20191011'
        session = DummySession(sessionDate)

        self.test20191011_S1(session=session)
        self.test20191011_S2(session=session)

        self.assertEqual(session.numRuns, 515)
        self.assertEqual(len(session.runs), 36)

## Run Unit Tests

Note: Only run unit tests when running this script directly, not during an import

In [6]:
if __name__ == '__main__':
    # Read main config into global variable
    filename = os.path.join(projdir, CONFIG_DIR, CONFIG_JSON)
    with open(filename, 'r', encoding='utf-8') as f:
        jsonTxt = f.read()
        appConfig = json.loads(jsonTxt)

    unittest.main(argv=['first-arg-is-ignored'], exit=False)

......
----------------------------------------------------------------------
Ran 6 tests in 0.079s

OK


## All Done!