# Percentile Ranks

Created by Michael George (AKA Logiqx)

Link: https://www.speedsolving.com/forum/showthread.php?54128-How-fast-are-the-over-40-s-in-competitions

In [120]:
# List of recognised events
eventsInfo = \
[
    ('333', '3x3x3 Cube', '10', 'time'),
    ('222', '2x2x2 Cube', '20', 'time'),
    ('444', '4x4x4 Cube', '30', 'time'),
    ('555', '5x5x5 Cube', '40', 'time'),
    ('666', '6x6x6 Cube', '50', 'time'),
    ('777', '7x7x7 Cube', '60', 'time'),
    ('333bf', '3x3x3 Blindfolded', '70', 'time'),
    ('333fm', '3x3x3 Fewest Moves', '80', 'number'),
    ('333oh', '3x3x3 One-Handed', '90', 'time'),
    ('333ft', '3x3x3 With Feet', '100', 'time'),
    ('clock', 'Clock', '110', 'time'),
    ('minx', 'Megaminx', '120', 'time'),
    ('pyram', 'Pyraminx', '130', 'time'),
    ('skewb', 'Skewb', '140', 'time'),
    ('sq1', 'Square-1', '150', 'time'),
    ('444bf', '4x4x4 Blindfolded', '160', 'time'),
    ('555bf', '5x5x5 Blindfolded', '170', 'time'),
    ('333mbf', '3x3x3 Multi-Blind', '180', 'multi')
]

# Names of recognised events
events = []
for eventInfo in eventsInfo:
    events.append(eventInfo[0])

# Dictionary of recognised events
eventsDict = {}
for eventInfo in eventsInfo:
    eventsDict[eventInfo[0]] = (eventInfo[1:])

## Data Prep

Activities for each file:
* Split into multiple files - one file per event
* Standardise the format - CSV with minimal quoting
* Apply cutoffs - TODO

Note: All of the output files are safe to distribute due to the application of cutoffs

In [121]:
import os, csv

# Function to write rows using the CSV writer
def writeResults(basename, event, rows):
    """Write event results from memory to CSV"""
    
    fn = os.path.join('data', basename, event + '.csv')
    with open(fn, 'wb') as f:
        csvWriter = csv.writer(f, quoting = csv.QUOTE_MINIMAL)
        for row in rows:
            csvWriter.writerow(row)
    
# Process all 3 input files
for basename in ['all_averages', 'senior_averages', 'known_averages']:

    # Read rows using the CSV reader
    fn = os.path.join('data', basename + '.csv')
    with open(fn, 'rb') as f:
        csvReader = csv.reader(f)
        
        # Initialisise the event / results
        event = None
        results = []       
        
        # Process each row individually
        for inputRow in csvReader:

            # Only process the current row if it is a recognised event
            if events.count(inputRow[0]) > 0:
                
                # Has the event changed?
                if (event != inputRow[0]):
                    
                    # Save the previous event
                    if (event != None):
                        writeResults(basename, event, results)
                    
                    # Initialisise the event / results
                    event = inputRow[0]
                    results = []
                    
                # Add the current row to the output buffer
                results.append(inputRow[1:])

        # Save the final event
        if (event != None):
            writeResults(basename, event, results)

## Read Event / Results from CSV

Read event data from CSV into memory, prior to processing

In [142]:
class EventResults:
    
    def __init__(self):
        """Initialisise the event / results"""
        
        self.event = None
        self.results = []
        self.total = 0
        
    def readResults(self, basename, event):
        """Read event results from CSV into memory"""
        
        self.basename = basename
        self.event = event
        self.results = []
        self.total = 0

        # Read rows using the CSV reader
        fn = os.path.join('data', self.basename, self.event + '.csv')
        with open(fn, 'rb') as f:
            csvReader = csv.reader(f)
            
            # Process each row individually
            for inputRow in csvReader:
                
                # Pack out results with zeros
                while (int(inputRow[0]) > len(self.results)):
                    self.results.append([None, None])
                
                count = int(inputRow[1])
                self.total += count
                
                self.results.append([count, self.total])
                
        # Analysse the results
        self.analyseResults()
                
    def analyseResults(self):
        """Analyse the results which are in memory"""

        for result in self.results:
            if result[1]:
                percentile = 100.0 * result[1] / self.total
                result.append(percentile)
            else:
                result.append(None)
            
        # print self.results

## Analyse results

Process 3 sets of of results simultaneously

In [155]:
class EventAnalysis:
    
    def __init__(self):
        """Initialisise the event / results"""
        
        self.event = None
        self.wcaResults = None
        self.seniorResults = None
        self.knownResults = None
        
    def readResults(self, event):
        """Read event results from CSV into memory"""

        self.event = event
        
        self.wcaResults = EventResults()
        self.wcaResults.readResults('all_averages', self.event)

        self.seniorResults = EventResults()
        self.seniorResults.readResults('senior_averages', self.event)

        self.knownResults = EventResults()
        self.knownResults.readResults('known_averages', self.event)
        
        self.analyseResults()

    def analyseResults(self):
        """Analyse the results which are in memory"""

        maxUniqueness = 0
        
        for i in range(len(self.wcaResults.results)):
            wcaResult = self.wcaResults.results[i]
            
            if (len(self.seniorResults.results) > i):
                seniorResult = self.seniorResults.results[i]
            else:
                seniorResult = [None, None, None]
                
            if (len(self.knownResults.results) > i):
                knownResult = self.knownResults.results[i]
            else:
                knownResult = [None, None, None]
                
            if seniorResult[0]:
                uniqueness = 100.0 * seniorResult[0] / wcaResult[0]
                
                if uniqueness > maxUniqueness:
                    maxUniqueness = uniqueness
                    
                if uniqueness >= 50:
                    asterisk = '*'
                else:
                    asterisk = ''
                    
                if asterisk:
                    print self.event, i, wcaResult, seniorResult, knownResult, uniqueness

## Process Averages

Process all of the results

In [156]:
eventAnalysis = EventAnalysis()
eventAnalysis.readResults('333')


333 194 [3, 109351, 99.77372056314383] [2, 1492, 97.45264532984977] [None, None, None] 66.6666666667
333 199 [7, 109366, 99.78740681940529] [4, 1498, 97.84454604833442] [1, 113, 100.0] 57.1428571429
333 210 [4, 109417, 99.83394009069426] [2, 1503, 98.17112998040497] [None, None, None] 50.0
333 220 [2, 109457, 99.87043677405816] [1, 1508, 98.4977139124755] [None, None, None] 50.0
333 224 [3, 109461, 99.87408644239454] [2, 1510, 98.62834748530372] [None, None, None] 66.6666666667
333 230 [2, 109475, 99.88686028157191] [1, 1511, 98.69366427171784] [None, None, None] 50.0
333 236 [1, 109489, 99.89963412074928] [1, 1513, 98.82429784454605] [None, None, None] 100.0
333 237 [2, 109491, 99.90145895491747] [1, 1514, 98.88961463096015] [None, None, None] 50.0
333 239 [1, 109494, 99.90419620616976] [1, 1515, 98.95493141737427] [None, None, None] 100.0
333 252 [2, 109517, 99.92518179910401] [1, 1518, 99.15088177661659] [None, None, None] 50.0
333 257 [2, 109522, 99.9297438845245] [1, 1519, 99.2161