# Senior Cubers Worldwide - Weekly Competition

Created by Michael George (AKA Logiqx)

**TODO**
- Resubmissions
  - Copy link / WCA ID from earlier records (i.e. resubmissions)?
  - Do not report errors in results that have been re-submitted
- Fuzzy matching
  - Match people based on WCA ID
  - Match people based on Levenstein
  - Handle typos in new people
- Report new people
- Count people

Website: https://logiqx.github.io/scw-comp/

## Initialisation

Basic approach to determine the project directory

In [1]:
import os, sys

projdir = os.path.realpath(os.path.join(sys.path[0], '..'))

## Supported Events

Supported events

In [2]:
sheetMap = \
{
    '3x3x3': '333',
    '2x2x2': '222',
    'oh': '333oh',
    'mega':  'minx',

    '4x4x4': '444',
    '5x5x5': '555',
    '6x6x6': '666',
    '7x7x7': '777',
    
    'pyra': 'pyram',
    'skewb': 'skewb',
    'sq-1': 'sq1',
    'clock': 'clock',

    '3bld': '333bf',
    '4bld': '444bf',
    '5bld': '555bf',
    'fmc': '333fm'
}

responseMap = \
{
    '3x3x3': '333',
    '2x2x2': '222',
    '3x3x3 One-Handed': '333oh',
    'Megaminx': 'minx',

    '4x4x4': '444',
    '5x5x5': '555',
    '6x6x6': '666',
    '7x7x7': '777',

    'Pyraminx': 'pyram',
    'Skewb': 'skewb',
    'Square-1': 'sq1',
    'Clock': 'clock',

    '3x3x3 Blindfolded': '333bf',
    '4x4x4 Blindfolded': '444bf',
    '5x5x5 Blindfolded': '555bf',
    '3x3x3 Multi-Blind': '333mbf',

    '3x3x3 Fewest Moves': '333fm'
}

## Common Functions

Functions to convert results to and from seconds, interpret an age category, calculate an average, etc.

In [3]:
from Common_Functions import *

## Generic Class

Generic class to ensure that all custom classes are printable

In [4]:
class Printable:
    def __repr__(self):
        return str(self.__class__) + ": " + str(self.__dict__)

    def __str__(self):
        return str(self.__class__) + ": " + str(self.__dict__)

## Spreadsheet Classes

Class to parse result spreadsheets

In [5]:
from xlrd import open_workbook
import csv

class Spreadsheet(Printable):
    def __init__(self, filename):
        self.filename = filename
        self.date = os.path.basename(os.path.dirname(filename))
        self.workbook = open_workbook(filename)

In [6]:
class LegacySheet(Spreadsheet):
    def processSheet(self, sheet):
        '''Process a sheet which was filled in by participants'''

        event = sheetMap[sheet.name.lower()]
        columnNames = []
        rows = []
        rowsIdx = []

        for rowNo in range(sheet.nrows):

            # Process header row
            if rowNo == 2:
                for colNo in range(sheet.ncols):
                    value = sheet.cell(rowNo, colNo).value.strip()
                    
                    # Simple string matches
                    if value in ('Name', 'Age', 'Result', 'Best'):
                        columnNames.append(value)
                    elif value.startswith('Solve'):
                        columnNames.append(value)
                        
                    # Simple substring matches
                    elif 'Ao5' in value or 'Mo3' in value:
                        columnNames.append('Average')
                    elif 'link' in value.lower():
                        columnNames.append('Link')
                    elif value.startswith('Comment'):
                        columnNames.append('Comment')
                        
                    # Columns to ignore
                    elif value.startswith('Pos') or value.startswith('Award') or value == '':
                        columnNames.append(None)

                    # Report any other columns
                    else:
                        print('WARNING: Unexpected field "{}" in {} ({})'.format(value, sheet.name, self.date))

                row = []
                for columnName in columnNames:
                    if columnName:
                        row.append(columnName)
                rows.append(row)

            # Process result row - must be after header on row 3
            elif rowNo >= 3:
                name = None
                age = None
                best = None
                average = None
                row = []
                solves = []
                
                for colNo in range(sheet.ncols):
                    if columnNames[colNo]:
                        fieldValue = sheet.cell(rowNo, colNo).value
                        if isinstance(fieldValue, str):
                            fieldValue = fieldValue.strip()
                        
                        # Retain name
                        if columnNames[colNo] == 'Name':
                            name = fieldValue
                          
                        # Standardise age
                        elif columnNames[colNo] == 'Age':
                            try:
                                age = interpretAge(fieldValue)
                                fieldValue = formatAge(age)
                            except:
                                print('ERROR: Age "{}" for {} in {} ({})'.format(fieldValue, name, sheet.name, self.date))
                                raise

                        # Standardise average
                        elif columnNames[colNo] == 'Average':
                            try:
                                # Convert average to time in seconds
                                average = numSeconds(fieldValue, truncate = False)
                                
                                # Use the formatted average in the output
                                fieldValue = formatResult(average, event, average = True)
                            except:
                                print('ERROR: Average "{}" for {} in {} ({})'.format(fieldValue, name, sheet.name, self.date))
                                raise

                        # Standardise result fields
                        elif columnNames[colNo] == 'Best' or columnNames[colNo] == 'Result' \
                                or columnNames[colNo].startswith('Solve') and columnNames[colNo] != 'Solved':
                            try:
                                # Convert result to time in seconds
                                result = numSeconds(fieldValue)
                                
                                # Add to list of solves and remember best / average
                                if (columnNames[colNo].startswith('Solve')) or columnNames[colNo] == 'Result':
                                    solves.append(result)
                                elif columnNames[colNo] == 'Best':
                                    best = result

                                # Use the formatted result in the output
                                fieldValue = formatResult(result, event)
                            except:
                                print('ERROR: Result "{}" for {} in {} ({})'.format(fieldValue, name, sheet.name, self.date))
                                raise

                        # Standardise Facebook links
                        elif columnNames[colNo] == 'Link':
                            try:
                                fieldValue = formatFacebookLink(fieldValue)
                            except:
                                print('ERROR: Link "{}" for {} in {} ({})'.format(fieldValue, name, sheet.name, self.date))
                                raise

                        row.append(fieldValue)

                # Check name
                if name not in persons:
                    safeName = getSafeName(name)
                    if safeName in safeNames:
                        name = safeNames[safeName]
                    elif wcaId in wcaIds:
                        if Levenshtein.distance(wcaIds[wcaId], name) < 5:
                            name = wcaIds[wcaId]
                    else:
                        persons[name] = {"wcaId": wcaId, "age": age}
                        print('INFO: New person {} in {} ({})'.format(name, event, self.date))

                    row[0] = name

                # Check best
                if best:
                    diff = round(calculateBest(solves) - best, 2)
                    if diff != 0:
                        print('ERROR: Best incorrect for {} in {} ({}) - calculated {}, diff {}'.format(
                            name, event, self.date, formatResult(calculateBest(solves), event), diff))
                else:
                    best = calculateBest(solves)

                # Check average
                if average:
                    diff = round(calculateAverage(solves) - average, 2)
                    if diff < -0.01 or diff > 0.01:
                        print('ERROR: Average incorrect for {} in {} ({}) - calculated {}, diff {}'.format(
                            name, event, self.date, formatResult(calculateAverage(solves), event, average = True), diff))
                else:
                    average = calculateAverage(solves)

                # Record row
                rows.append(row)

                # Create index entry, patching DNS and DNF to 1 day
                if best < 0:
                    best = maxResult
                if average < 0:
                    average = maxResult
                rowsIdx.append((rowNo - 2, best, average, 100 - age, name))

        # Sort index
        if event.endswith('bf'):
            rowsIdx = sorted(rowsIdx, key = lambda x: (x[1], x[2], x[3], x[4]))
        else:
            rowsIdx = sorted(rowsIdx, key = lambda x: (x[2], x[1], x[3], x[4]))
        
        # Sort rows
        sortedRows = rows[:1]
        for rowIdx in rowsIdx:
            sortedRows.append(rows[rowIdx[0]])

        # Write CSV
        filename = os.path.join(os.path.dirname(self.filename), event + '.csv')
        with open(filename, 'w') as outfile:
            csvWriter = csv.writer(outfile, quoting = csv.QUOTE_MINIMAL, lineterminator = os.linesep)
            csvWriter.writerows(sortedRows)


    def processSheets(self, latest):
        '''Process a spreadsheet which has been downloaded from Google Sheets'''

        for sheet in self.workbook.sheets():
            if sheet.name.lower() in sheetMap:
                self.processSheet(sheet)

In [7]:
import Levenshtein

class ResponseSheet(Spreadsheet):

    def processSheet(self, sheet, latest):
        '''Process a sheet which was filled in by Google Forms'''

        columnNames = []
        eventRows = {}
        eventRowsIdx = {}
        eventCols = {}
        nameIdx = -1
        eventIdx = -1
        commentIdx = -1
        linkIdx = -1
        numSubmissions = 0

        for rowNo in range(sheet.nrows):

            # Process header row
            if rowNo == 0:
                for colNo in range(sheet.ncols):
                    value = sheet.cell(rowNo, colNo).value.strip()
                    
                    # Simple column names
                    if value == 'Name':
                        nameIdx = colNo
                        columnNames.append(value)
                    elif value in ('Age', 'Result', 'Attempted', 'Solved', 'Time'):
                        columnNames.append(value)
                    elif value.startswith('Solve'):
                        columnNames.append(value)
                        
                    # Variable column names
                    elif value.startswith('WCA ID'):
                        columnNames.append('WCA ID')
                    elif value.startswith('Event'):
                        eventIdx = colNo
                        columnNames.append('Event')
                    elif value in ('Best', 'Single'):
                        columnNames.append('Best')
                    elif 'Ao5' in value or 'Mo3' in value:
                        columnNames.append('Average')
                    elif 'link' in value.lower():
                        linkIdx = colNo
                        columnNames.append('Link')
                    elif value.startswith('Comment'):
                        commentIdx = colNo
                        columnNames.append('Comment')

                    # Report any other columns
                    elif value in ('Timestamp'):
                        columnNames.append(None)
                    else:
                        print('WARNING: Unexpected field "{}" in {} ({})'.format(value, sheet.name, self.date))

            # Process result row
            else:
                name = None
                wcaId = None
                age = None
                event = None
                best = None
                average = None
                attempted = None
                solved = None
                formattedTime = None
                row = []
                solves = []
                cols = []
                
                for colNo in range(sheet.ncols):
                    fieldValue = sheet.cell(rowNo, colNo).value
                    if isinstance(fieldValue, str):
                        fieldValue = fieldValue.strip()
                        
                    if columnNames[colNo]:
                        # Retain name
                        if columnNames[colNo] == 'Name':
                            name = fieldValue

                        # Retain WCA ID
                        elif columnNames[colNo] == 'WCA ID':
                            wcaId = fieldValue

                        # Standardise event
                        elif columnNames[colNo] == 'Event':
                            event = responseMap[fieldValue]
                            fieldValue = None

                        # Standardise age
                        elif columnNames[colNo] == 'Age':
                            try:
                                age = interpretAge(fieldValue)
                                fieldValue = formatAge(age)
                            except:
                                print('ERROR: Age "{}" for {} in {} ({})'.format(fieldValue, name, event, self.date))
                                raise

                        # Standardise average
                        elif columnNames[colNo] == 'Average':
                            try:
                                if fieldValue:
                                    # Convert average to time in seconds
                                    average = numSeconds(fieldValue, truncate = False)

                                    # Use the formatted average in the output
                                    fieldValue = formatResult(average, event, average = True)
                            except:
                                print('ERROR: Average "{}" for {} in {} ({})'.format(fieldValue, name, event, self.date))
                                raise

                        # Standardise result fields
                        elif columnNames[colNo] == 'Best' or columnNames[colNo] == 'Result' \
                                or columnNames[colNo].startswith('Solve') and columnNames[colNo] != 'Solved':
                            try:
                                if fieldValue:
                                    # Convert result to time in seconds
                                    result = numSeconds(fieldValue)

                                    # Add to list of solves and remember best / average
                                    if (columnNames[colNo].startswith('Solve')) or columnNames[colNo] == 'Result':
                                        solves.append(result)
                                    elif columnNames[colNo] == 'Best':
                                        best = result

                                    # Use the formatted result in the output
                                    fieldValue = formatResult(result, event)
                            except:
                                print('ERROR: Result "{}" for {} in {} ({})'.format(fieldValue, name, event, self.date))
                                raise

                        # Standardise Facebook links
                        elif columnNames[colNo] == 'Link':
                            try:
                                fieldValue = formatFacebookLink(fieldValue)
                            except:
                                print('ERROR: Link "{}" for {} in {} ({})'.format(fieldValue, name, event, self.date))
                                raise
                            
                        # Standardise attempted
                        elif columnNames[colNo] == 'Attempted':
                            try:
                                if fieldValue:
                                    fieldValue = int(fieldValue)
                                    attempted = fieldValue
                            except:
                                print('ERROR: Attempted "{}" for {} in {} ({})'.format(fieldValue, name, event, self.date))
                                raise

                        # Standardise solved
                        elif columnNames[colNo] == 'Solved':
                            try:
                                if fieldValue != '':
                                    fieldValue = int(fieldValue)
                                    solved = fieldValue
                            except:
                                print('ERROR: Solved "{}" for {} in {} ({})'.format(fieldValue, name, event, self.date))
                                raise

                        # Standardise time taken
                        elif columnNames[colNo] == 'Time':
                            try:
                                if fieldValue:
                                    formattedTime = fieldValue
                            except:
                                print('ERROR: Time "{}" for {} in {} ({})'.format(fieldValue, name, event, self.date))
                                raise

                        if fieldValue:
                            row.append(fieldValue)
                            cols.append(colNo)
                        else:
                            row.append('')
                    else:
                        row.append('')

                # Check name
                if name not in persons:
                    safeName = getSafeName(name)
                    if safeName in safeNames:
                        name = safeNames[safeName]
                    elif wcaId in wcaIds:
                        if Levenshtein.distance(wcaIds[wcaId], name) >= 5:
                            print('WARNING: Unexpected name {} for {} in {} ({})'.format(name, wcaId, event, self.date))
                        name = wcaIds[wcaId]
                    else:
                        persons[name] = {"wcaId": wcaId, "age": age}
                        print('INFO: New person {} in {} ({})'.format(name, event, self.date))

                    row[nameIdx] = name

                # Check WCA ID
                if wcaId and 'wcaId' in persons[name] and wcaId != persons[name]['wcaId']:
                    print('WARNING: Different WCA ID for {} in {} {} - {}'.format(name, event, self.date, wcaId))

                # Check best
                if best:
                    diff = round(calculateBest(solves) - best, 2)
                    if diff != 0:
                        print('ERROR: Best incorrect for {} in {} ({}) - calculated {}, diff {}'.format(
                            name, event, self.date, formatResult(calculateBest(solves), event), diff))
                else:
                    best = calculateBest(solves)
                    
                # Check average
                if average:
                    diff = round(calculateAverage(solves) - average, 2)
                    if diff < -0.01 or diff > 0.01:
                        print('ERROR: Average incorrect for {} in {} ({}) - calculated {}, diff {}'.format(
                            name, event, self.date, formatResult(calculateAverage(solves), event, average = True), diff))
                else:
                    average = calculateAverage(solves)

                # Encode MBLD result so that results can be sorted
                if event == '333mbf':
                    best = encodeMultiBlind(attempted, solved, formattedTime)

                # Report duplicate submission
                if event in eventRows and name in eventRows[event]:
                    print('INFO: Duplicate submission for {} in {} ({})'.format(name, event, self.date))
                    
                    oldRow = eventRows[event][name].copy()
                    del(oldRow[commentIdx])
                    del(oldRow[linkIdx])
                    #print('{}'.format(','.join(filter(None, oldRow))))
                    
                    newRow = row.copy()
                    del(newRow[commentIdx])
                    del(newRow[linkIdx])
                    #print('{}'.format(','.join(filter(None, newRow))))
                    #print()
                else:
                    numSubmissions += 1

                # Record columns
                if event not in eventCols:
                    eventCols[event] = cols
                else:
                    for col in cols:
                        if col not in eventCols[event]:
                            eventCols[event].append(col)

                # Record row
                if event not in eventRows:
                    eventRows[event] = {}
                eventRows[event][name] = row

                # Create index entry, patching DNS and DNF to 1 day
                if best < 0:
                    best = maxResult
                if average < 0:
                    average = maxResult
                if event not in eventRowsIdx:
                    eventRowsIdx[event] = {}
                eventRowsIdx[event][name] = (rowNo, best, average, 100 - age, name)

        # Tidy up event columns - sort columns numerically then move "link" and comment" to the end
        for event in eventCols:
            eventCols[event] = sorted(eventCols[event])
            if linkIdx in eventCols[event]:
                eventCols[event].remove(linkIdx)
            if commentIdx in eventCols[event]:
                eventCols[event].remove(commentIdx)
            eventCols[event].append(linkIdx)
            eventCols[event].append(commentIdx)

        # Save events
        for event in eventRows:
            rows = []
            rowsIdx = []

            # Sort index
            for name in eventRowsIdx[event]:
                rowsIdx.append(eventRowsIdx[event][name])

            if event.endswith('bf'):
                rowsIdx = sorted(rowsIdx, key = lambda x: (x[1], x[2], x[3], x[4]))
            else:
                rowsIdx = sorted(rowsIdx, key = lambda x: (x[2], x[1], x[3], x[4]))

            # Create header (comment last)
            row = []
            for col in eventCols[event]:
                if col != commentIdx:
                    row.append(columnNames[col])
            row.append(columnNames[commentIdx])
            rows.append(row)

            # Create results (comment last)
            for rowIdx in rowsIdx:
                name = rowIdx[4]
                row = []
                for col in eventCols[event]:
                    row.append(eventRows[event][name][col])
                rows.append(row)
            
            filename = os.path.join(os.path.dirname(self.filename), event + '.csv')
            with open(filename, 'w') as outfile:
                csvWriter = csv.writer(outfile, quoting = csv.QUOTE_MINIMAL, lineterminator = os.linesep)
                csvWriter.writerows(rows)

        if latest:
            print("Number of submissions = {}".format(numSubmissions))


    def processSheets(self, latest):
        '''Process a spreadsheet which has been downloaded from Google Sheets'''

        for sheet in self.workbook.sheets():
            self.processSheet(sheet, latest)

## Main Code

Process all competitions

In [8]:
import glob
import time

pc1 = time.perf_counter()

In [9]:
import json

# Read existing persons from JSON

fn = os.path.join(projdir, 'data', 'persons.json')
if os.path.exists(fn):
    with open(fn) as f:
        jsonTxt = f.read()
        persons = json.loads(jsonTxt)
else:
    persons = {}

safeNames = {}
for person in persons:
    safeName = getSafeName(person)
    safeNames[safeName] = person

    if 'aliases' in persons[person]:
        for alias in persons[person]['aliases']:
            safeName = getSafeName(alias)
            safeNames[safeName] = person

wcaIds = {}
for person in persons:
    if 'wcaId' in persons[person]:
        wcaIds[persons[person]['wcaId']] = person

In [10]:
# Process spreadsheets

datePattern = '[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]'
xlsxPattern = '*.xlsx'

competitions = {}

filenames = glob.glob(os.path.join(projdir, 'data', datePattern, xlsxPattern))
filenames.sort()
refresh = False
latest = False

for filename in filenames:
    if not os.path.basename(filename)[:2] == '~$':
        if filename == filenames[-1]:
            latest = True

        if refresh or latest:
            if 'Responses' in filename:
                spreadsheet = ResponseSheet(filename)
            else:
                spreadsheet = LegacySheet(filename)

            spreadsheet.processSheets(latest)

Number of submissions = 27


In [11]:
pc2 = time.perf_counter()
print("Conversion completed in %0.2f seconds" % (pc2 - pc1))

Conversion completed in 0.11 seconds


## All Done!