# Percentile Ranks

Created by Michael George (AKA Logiqx)

Link: https://www.speedsolving.com/forum/showthread.php?54128-How-fast-are-the-over-40-s-in-competitions

In [183]:
# List of recognised events
eventsInfo = \
[
    ('333', '3x3x3 Cube', '10', 'time', 180),
    ('222', '2x2x2 Cube', '20', 'time', 60),
    ('444', '4x4x4 Cube', '30', 'time', 240),
    ('555', '5x5x5 Cube', '40', 'time', 240),
    ('666', '6x6x6 Cube', '50', 'time', 360),
    ('777', '7x7x7 Cube', '60', 'time', 480),
    ('333bf', '3x3x3 Blindfolded', '70', 'time', 180),
    ('333fm', '3x3x3 Fewest Moves', '80', 'number', 60),
    ('333oh', '3x3x3 One-Handed', '90', 'time', 120),
    ('333ft', '3x3x3 With Feet', '100', 'time', 240),
    ('clock', 'Clock', '110', 'time', 60),
    ('minx', 'Megaminx', '120', 'time', 240),
    ('pyram', 'Pyraminx', '130', 'time', 60),
    ('skewb', 'Skewb', '140', 'time', 60),
    ('sq1', 'Square-1', '150', 'time', 120),
    ('444bf', '4x4x4 Blindfolded', '160', 'time', 0),
    ('555bf', '5x5x5 Blindfolded', '170', 'time', 0),
    ('333mbf', '3x3x3 Multi-Blind', '180', 'multi', 0)
]

# Names of recognised events
events = []
for eventInfo in eventsInfo:
    events.append(eventInfo[0])

# Dictionary of recognised events
eventsDict = {}
for eventInfo in eventsInfo:
    eventsDict[eventInfo[0]] = (eventInfo[1:])

## Data Prep

Activities for each file:
* Split into multiple files - one file per event
* Standardise the format - CSV with minimal quoting
* Apply cutoffs - TODO

Note: All of the output files are safe to distribute due to the application of cutoffs

In [196]:
import os, csv

# Function to write rows using the CSV writer
def writeResults(basename, event, rows):
    """Write event results from memory to CSV"""
    
    fn = os.path.join('data', basename, event + '.csv')
    with open(fn, 'wb') as f:
        csvWriter = csv.writer(f, quoting = csv.QUOTE_MINIMAL)
        for row in rows:
            csvWriter.writerow(row)
    
# Process all 3 input files
for basename in ['all_averages', 'senior_averages', 'known_averages']:

    # Read rows using the CSV reader
    fn = os.path.join('data', basename + '.csv')
    with open(fn, 'rb') as f:
        csvReader = csv.reader(f)
        
        # Initialisise the event / results
        event = None
        results = []
        cutoff = 0
        count = 0
        
        # Process each row individually
        for inputRow in csvReader:

            # Only process the current row if it is a recognised event
            if events.count(inputRow[0]) > 0:
                
                # Has the event changed?
                if (event != inputRow[0]):
                    
                    # Save the previous event
                    if (event != None):
                        if count > 0:
                            results.append([cutoff, count])
                        writeResults(basename, event, results)
                    
                    # Initialisise the event / results
                    event = inputRow[0]
                    results = []
                    cutoff = int(eventsDict[event][3])
                    count = 0                   
                    
                # Add the current row to the output buffer
                if (int(inputRow[1]) < cutoff):
                    results.append(inputRow[1:])
                else:
                    count += 1

        # Save the final event
        if (event != None):
            if count > 0:
                results.append([cutoff, count])
            writeResults(basename, event, results)

## Read Event / Results from CSV

Read event data from CSV into memory, prior to processing

In [227]:
class EventResults:
    
    def __init__(self):
        """Initialisise the event / results"""
        
        self.event = None
        self.results = []
        self.worst = 0
        self.total = 0
        
    def readResults(self, basename, event):
        """Read event results from CSV into memory"""
        
        self.basename = basename
        self.event = event
        self.results = []
        self.worst = 0
        self.total = 0

        # Read rows using the CSV reader
        fn = os.path.join('data', self.basename, self.event + '.csv')
        with open(fn, 'rb') as f:
            csvReader = csv.reader(f)
            
            # Process each row individually
            for inputRow in csvReader:
                
                # Pack out results with zeros
                while (int(inputRow[0]) > len(self.results)):
                    self.results.append([None, None])
                
                count = int(inputRow[1])
                if int(inputRow[0]) < eventsDict[self.event][3] and count > 0:
                    self.worst = int(inputRow[0])
                self.total += count
                
                self.results.append([count, self.total])
                
        # Analysse the results
        self.analyseResults()
                
    def analyseResults(self):
        """Analyse the results which are in memory"""

        for result in self.results:
            if result[1]:
                percentile = 100.0 * result[1] / self.total
                result.append(percentile)
            else:
                result.append(None)
            
        # print self.results

## Analyse results

Process 3 sets of of results simultaneously

In [233]:
class EventAnalysis:
    
    def __init__(self):
        """Initialisise the event / results"""
        
        self.event = None
        self.wcaResults = None
        self.seniorResults = None
        self.knownResults = None
        
    def readResults(self, event):
        """Read event results from CSV into memory"""

        self.event = event
        
        self.wcaResults = EventResults()
        self.wcaResults.readResults('all_averages', self.event)

        self.seniorResults = EventResults()
        self.seniorResults.readResults('senior_averages', self.event)

        self.knownResults = EventResults()
        self.knownResults.readResults('known_averages', self.event)
        
        self.analyseResults()

    def analyseResults(self):
        """Analyse the results which are in memory"""

        for i in range(len(self.wcaResults.results)):
            wcaResult = self.wcaResults.results[i]
            
            if (len(self.seniorResults.results) > i):
                seniorResult = self.seniorResults.results[i]
            else:
                seniorResult = [None, None, None]
                
            if (len(self.knownResults.results) > i):
                knownResult = self.knownResults.results[i]
            else:
                knownResult = [None, None, None]
                
            if seniorResult[0]:
                unknown = seniorResult[0]
            else:
                unknown = 0
                
            if knownResult[0]:
                unknown -= knownResult[0]
                
            if wcaResult[0] :
                uniqueness = 100.0 * unknown / wcaResult[0]
            else :
                uniqueness = 999
                
            if uniqueness > 50:
                asterisk = '*'
            else:
                asterisk = ''

            if i == len(self.wcaResults.results) - 1 or \
                i == self.wcaResults.worst or i == self.seniorResults.worst or i == self.knownResults.worst:
                print self.event, i, wcaResult, seniorResult, knownResult, uniqueness
                
        print

## Process Averages

Process all of the results

In [234]:
for event in events:
    if eventsDict[event][3] > 0:
        eventAnalysis = EventAnalysis()
        eventAnalysis.readResults(event)

333 155 [21, 108918, 99.59036629299783] [5, 1435, 94.5945945945946] [1, 112, 99.11504424778761] 19.0476190476
333 179 [11, 109234, 99.87930435418686] [1, 1473, 97.0995385629532] [None, None, None] 9.09090909091
333 180 [132, 109366, 100.0] [44, 1517, 100.0] [1, 113, 100.0] 32.5757575758

222 46 [41, 67119, 99.47682001422812] [5, 570, 92.23300970873787] [1, 77, 100.0] 9.75609756098
222 59 [15, 67404, 99.89921745316576] [1, 597, 96.60194174757281] [None, None, None] 6.66666666667
222 60 [68, 67472, 100.0] [21, 618, 100.0] [None, None, None] 30.8823529412

444 179 [22, 22367, 97.72369800768962] [2, 169, 90.86021505376344] [2, 63, 100.0] 0.0
444 234 [4, 22781, 99.5325061167424] [2, 179, 96.23655913978494] [None, None, None] 50.0
444 239 [2, 22799, 99.61114994757078] [None, None, None] [None, None, None] 0.0
444 240 [89, 22888, 100.0] [7, 186, 100.0] [None, None, None] 7.86516853933

555 239 [12, 10559, 98.3147113594041] [1, 68, 76.40449438202248] [1, 33, 80.48780487804878] 0.0
555 240 [181