# Percentile Ranks

Created by Michael George (AKA Logiqx)

Link: https://www.speedsolving.com/forum/showthread.php?54128-How-fast-are-the-over-40-s-in-competitions

In [446]:
# List of recognised events
events = \
[
    ('333', '3x3x3 Cube', '10', 'time', 180),
    ('222', '2x2x2 Cube', '20', 'time', 60),
    ('444', '4x4x4 Cube', '30', 'time', 180),
    ('555', '5x5x5 Cube', '40', 'time', 240),
    ('666', '6x6x6 Cube', '50', 'time', 360),
    ('777', '7x7x7 Cube', '60', 'time', 540),
    ('333bf', '3x3x3 Blindfolded', '70', 'time', 180),
    ('333fm', '3x3x3 Fewest Moves', '80', 'number', 60),
    ('333oh', '3x3x3 One-Handed', '90', 'time', 120),
    ('333ft', '3x3x3 With Feet', '100', 'time', 240),
    ('clock', 'Clock', '110', 'time', 60),
    ('minx', 'Megaminx', '120', 'time', 240),
    ('pyram', 'Pyraminx', '130', 'time', 60),
    ('skewb', 'Skewb', '140', 'time', 60),
    ('sq1', 'Square-1', '150', 'time', 120),
    ('444bf', '4x4x4 Blindfolded', '160', 'time', 0),
    ('555bf', '5x5x5 Blindfolded', '170', 'time', 0),
    ('333mbf', '3x3x3 Multi-Blind', '180', 'multi', 0)
]

# Dictionary of recognised events
eventsDict = {}
for event in events:
    eventsDict[event[0]] = (event[1:])

## Seconds to HH:MM:SS

Intelligently convert seconds to hours, minutes and seconds

In [447]:
import time

def formatTime(seconds):
    if seconds > 3600:
        return str(seconds / 3600) + ':' + str(seconds % 3600 / 60).zfill(2) + ':' + str(seconds % 60).zfill(2)
    elif seconds > 60:
        return str(seconds / 60) + ':' + str(seconds % 60).zfill(2)
    else:
        return seconds

## Data Prep

Activities for each file:
* Split into multiple files - one file per event
* Standardise the format - CSV with minimal quoting
* Apply cutoffs - TODO

Note: All of the output files are safe to distribute due to the application of cutoffs

In [448]:
import os, csv

# Function to write rows using the CSV writer
def writeResults(basename, event, rows):
    """Write event results from memory to CSV"""
    
    fn = os.path.join('data', basename, event + '.csv')
    with open(fn, 'wb') as f:
        csvWriter = csv.writer(f, quoting = csv.QUOTE_MINIMAL)
        for row in rows:
            csvWriter.writerow(row)
    
# Process all 3 input files
for basename in ['all_averages', 'senior_averages', 'known_averages']:

    # Read rows using the CSV reader
    fn = os.path.join('data', basename + '.csv')
    with open(fn, 'rb') as f:
        csvReader = csv.reader(f)
        
        # Initialisise the event / results
        event = None
        results = []
        cutoff = 0
        count = 0
        
        # Process each row individually
        for inputRow in csvReader:

            # Only process the current row if it is a recognised event
            if eventsDict.has_key(inputRow[0]) > 0:
                
                # Has the event changed?
                if (event != inputRow[0]):
                    
                    # Save the previous event
                    if (event != None):
                        if count > 0:
                            results.append([cutoff, count])
                        writeResults(basename, event, results)
                    
                    # Initialisise the event / results
                    event = inputRow[0]
                    results = []
                    cutoff = int(eventsDict[event][3])
                    count = 0                   
                    
                # Add the current row to the output buffer
                if (int(inputRow[1]) < cutoff):
                    results.append(inputRow[1:])
                else:
                    count += 1

        # Save the final event
        if (event != None):
            if count > 0:
                results.append([cutoff, count])
            writeResults(basename, event, results)

## Read Event / Results from CSV

Read event data from CSV into memory, prior to processing

In [449]:
class EventResults:
    
    def __init__(self):
        """Initialisise the event / results"""
        
        self.event = None
        self.results = []
        self.worst = 0
        self.total = 0
        
    def readResults(self, basename, event):
        """Read event results from CSV into memory"""
        
        self.basename = basename
        self.event = event
        self.results = []
        self.worst = 0
        self.total = 0

        # Read rows using the CSV reader
        fn = os.path.join('data', self.basename, self.event[0] + '.csv')
        with open(fn, 'rb') as f:
            csvReader = csv.reader(f)
            
            # Process each row individually
            for inputRow in csvReader:
                
                # Pack out results with zeros
                while (int(inputRow[0]) > len(self.results)):
                    self.results.append([None, None])
                
                count = int(inputRow[1])
                if int(inputRow[0]) < self.event[4] and count > 0:
                    self.worst = int(inputRow[0])
                self.total += count
                
                self.results.append([count, self.total])
                
        # Analysse the results
        self.analyseResults()
                
    def analyseResults(self):
        """Analyse the results which are in memory"""

        for result in self.results:
            if result[1]:
                percentile = 100.0 * result[1] / self.total
                result.append(percentile)
            else:
                result.append(None)
            
        # print self.results

## Analyse results

Process 3 sets of of results simultaneously

In [450]:
class EventAnalysis:
    
    def __init__(self):
        """Initialisise the event / results"""
        
        self.event = None
        self.wcaResults = None
        self.seniorResults = None
        self.knownResults = None
        
    def readResults(self, event):
        """Read event results from CSV into memory"""

        # Skip processing if no cutoff is defined
        if event[4] > 0:
            self.event = event

            self.wcaResults = EventResults()
            self.wcaResults.readResults('all_averages', self.event)

            self.seniorResults = EventResults()
            self.seniorResults.readResults('senior_averages', self.event)

            self.knownResults = EventResults()
            self.knownResults.readResults('known_averages', self.event)

            self.checkSanity()

    def checkSanity(self):
        """General sanity checks"""

        for i in range(len(self.wcaResults.results)):
            # WCA results comes straight from the results
            wcaResult = self.wcaResults.results[i]
            
            # Senior results need a boundary check
            if (len(self.seniorResults.results) > i):
                seniorResult = self.seniorResults.results[i]
            else:
                seniorResult = [None, None, None]
                
            # Known results need a boundary check
            if (len(self.knownResults.results) > i):
                knownResult = self.knownResults.results[i]
            else:
                knownResult = [None, None, None]

            if self.event[3] == 'time':
                result = formatTime(i)
            else:
                result = i

            self.checkUniqueness(result, wcaResult, seniorResult, knownResult)
            self.checkSenior(result, wcaResult, seniorResult)
            self.checkKnown(result, seniorResult, knownResult)

    def checkUniqueness(self, result, wcaResult, seniorResult, knownResult):
        """Check for personally identifiable result"""

        # Determine the number of unknown results
        if seniorResult[0]:
            if knownResult[0]:
                unknown = seniorResult[0] - knownResult[0]
            else:
                unknown = seniorResult[0]
        else:
            if knownResult[0]:
                unknown = - knownResult[0]
            else:
                unknown = 0

        # Calculate uniqueness
        if wcaResult[0]:
            if knownResult[0]:
                possible = wcaResult[0] - knownResult[0]
            else:
                possible = wcaResult[0]
        else:
            possible = 0

        if possible > 0:
            uniqueness = 100.0 * unknown / possible

            if uniqueness >= 50:
                print 'Warning: %s result of %s - uniqueness is %.2f%% (%d of %d)' % \
                (self.event[1], result, uniqueness, unknown, possible)


    def checkSenior(self, result, wcaResult, seniorResult):
        """Check for counts which don't make sense"""

        # WCA result needs a safety check
        if wcaResult[0]:
            wca = wcaResult[0]
        else:
            wca = 0
                
        # Senior result needs a safety check
        if seniorResult[0]:
            senior = seniorResult[0]
        else:
            senior = 0
                
        if wca < senior:
            print 'Warning: %s result of %s - senior exceeds wca (%d of %d)' % \
                (self.event[1], result, senior, wca)
                
    def checkKnown(self, result, seniorResult, knownResult):
        """Check for counts which don't make sense"""

        # Senior result needs a safety check
        if seniorResult[0]:
            senior = seniorResult[0]
        else:
            senior = 0
                
        # Known result needs a safety check
        if knownResult[0]:
            known = knownResult[0]
        else:
            known = 0
                
        if senior < known:
            print 'Warning: %s result of %s - known exceeds senior (%d of %d)' % \
                (self.event[1], result, known, senior)

In [451]:
for event in events:
    eventAnalysis = EventAnalysis()
    eventAnalysis.readResults(event)



# TODO - check the negatives as the person may not be 40!

https://www.worldcubeassociation.org/persons/2012PETR01?event=555

5x5x5 - 3:12.61 (2018) - ('2012PETR01', '1975-12-31', 'Niki_Petrov'). SUSPECT NO DOB IN WcA

https://www.worldcubeassociation.org/persons/2004FERN01?event=333oh

3x3x3 OH - 38.12 (2008) - ('2004FERN01', NULL, 'ernesto'), SUSPECT NO DOB IN WCA

https://www.worldcubeassociation.org/persons/2007HUGH01?event=333oh

3x3x3 OH - 38.85 (2012) - ('2007HUGH01', '1962-03-16', 'Mike Hughey')

https://www.worldcubeassociation.org/persons/2007SANC01?event=sq1

Sq-1 - 3:49.19 (2009) - ('2007SANC01', NULL, 'Cabezuelo'), PROVIDED BY QQWREF - DROP HIM FROM MY LIST