# Processing Wheel Files

## Loading and checking results files

Here is a function that deals with parsing a wheel result file and running some checks on it as well.

In [1]:
def load_wheel_file(filepath, min_sector=1, max_sector=16, min_label=1, max_label=41):
    """
    Loads a wheel file, expected as:
    sector_number, label_number, label_number, label_number
    ...
    for example:
1,,,,,,,
2,24,,,,,,
3,9,,,,,,
4,26,,,,,,
5,27,4,,,,,
6,8,,,,,,
7,7,,,,,,
8,18,,,,,,
9,13,3,,,,,
10,23,,,,,,
11,22,,,,,,
12,11,19,21,33,35,,
13,2,12,15,12,31,36,38
14,10,2,14,20,25,32,34
15,,,,,,,
16,,,,,,,
    Where the first value of the line is the sector number number,
    and the following sequence are the labels associated
    
    The function returns it as a dictionnary:
    {1:[], 2:[34, 21, 24], 3:[], ... }
    Checks whether:
    - the sector number is between min_sector and max_sector
    - the label number is between min_label and max_label
    - no sector number was given twice
    - no label number was given twice
    Warns about:
    - missing sector number
    - missing label number
    """
    min_sector = min(int(min_sector), int(max_sector))
    max_sector = max(int(min_sector), int(max_sector))
    min_label = min(int(min_label), int(max_label))
    max_label = max(int(min_label), int(max_label))
    
    file_content = dict()  # holds the result of the file parsing
    found_labels = list()  # to monitor label_number duplicates
    
    print("processing file : %s" % filepath)
    
    with open(filepath) as input_file:
        for idx, line in enumerate(input_file):
            idx = idx+1
            split = line.split(",")
            sector_number = int(split[0])
            
            # checking if sector_number was already register
            if sector_number in file_content:
                raise EnvironmentError(
                    "%s (line %s) : sector_number found twice : %s" % (filepath, idx, sector_number))
                
            # checking if sector_number has an expected value
            if not (min_sector <= sector_number <= max_sector):
                raise EnvironmentError(
                    "%s (line %s) : sector_number not between %s and %s : %s" %
                    (filepath, idx, min_sector, max_sector, sector_number))
                
            # adding line as a list of labels with the key as the sector number
            file_content[sector_number] = list()
            if len(split) >= 0:
                for label in split[1:]:
                    label = label.strip()  # remove newline
                    if label is not "":
                        label_number = int(label)
                        
                        # check label_number is in the expected range
                        if not (min_label <= label_number <= max_label):
                            raise EnvironmentError(
                                "%s (line %s) : label_number not between %s and %s : %s" %
                                (filepath, idx, min_label, max_label, label_number))
                            
                        # check label_number is not a duplicate
                        if label_number in found_labels:
                            raise EnvironmentError(
                                "%s (line %s) : label_number duplicated : %s" %
                                (filepath, idx, label_number))
                        else:
                            found_labels.append(label_number)
                            
                        file_content[sector_number].append(label_number)
                
    # check if all labels where found
    all_labels = [label for sublist in file_content.values() for label in sublist]
    missing_labels = list()
    for label in range(min_label, max_label+1):
        if label not in all_labels:
            missing_labels.append(label)
    if len(missing_labels) >= 1:
        print("\x1b[31mWARNING: did not find label(s) %s\x1b[0m" % missing_labels)
        In [28]: print("\x1b[31m\"red\"\x1b[0m")
    else:
        print("No missing labels")
            
    # check if all sectors where filled in
    missing_sectors = list()
    for sector in range(min_sector, max_sector+1):
        if sector not in file_content.keys():
            missing_sectors.append(sector)
    if len(missing_sectors) >= 1:
        print("\x1b[31mWARNING: did not find sector(s) %s\x1b[0m" % missing_sectors)
    else:
        print("No missing sectors")
            
    return file_content

Now one should specify the following values :

- FOLDER_PATH : path where to find the input files
- OUTPUT_FILE : path of the file where to cumulate the results
- SKIP_LABEL : a prefix to exclude files from being processed

In [2]:
test_ID = "A"
FOLDER_PATH = r"..\Chart " + test_ID
OUTPUT_FILE = r"..\wheel_test_" + test_ID + "_results.csv"
SKIP_LABEL = "VOID"

In [3]:
import os
files_list = [os.path.join(FOLDER_PATH, f) for f in os.listdir(FOLDER_PATH) if os.path.isfile(os.path.join(FOLDER_PATH, f))]
print(files_list)

results = dict()
for file_path in files_list:
    print()
    file_name = os.path.basename(file_path)
    if not file_name.upper().startswith(SKIP_LABEL.upper()):
        results[file_name] = load_wheel_file(file_path)

for file,res in results.items(): print(file," :\n",res)

['..\\Chart A\\Chart A s1.csv', '..\\Chart A\\Chart A s10.csv', '..\\Chart A\\Chart A s11.csv', '..\\Chart A\\Chart A s12.csv', '..\\Chart A\\Chart A s13.csv', '..\\Chart A\\Chart A s14.csv', '..\\Chart A\\Chart A s15.csv', '..\\Chart A\\Chart A s16.csv', '..\\Chart A\\Chart A s17.csv', '..\\Chart A\\Chart A s18.csv', '..\\Chart A\\Chart A s2.csv', '..\\Chart A\\Chart A s21.csv', '..\\Chart A\\Chart A s4.csv', '..\\Chart A\\Chart A s5 VOID AFTER 28.csv', '..\\Chart A\\Chart A s6.csv', '..\\Chart A\\Chart A s7.csv', '..\\Chart A\\Chart A s8.csv', '..\\Chart A\\Chart A s9.csv', '..\\Chart A\\VOID Chart A s19.csv', '..\\Chart A\\VOID Chart A s20.csv', '..\\Chart A\\VOID Chart A s3.csv', '..\\Chart A\\VOID_Chart A sRetard.csv']

processing file : ..\Chart A\Chart A s1.csv
No missing sectors

processing file : ..\Chart A\Chart A s10.csv
No missing sectors

processing file : ..\Chart A\Chart A s11.csv
No missing sectors

processing file : ..\Chart A\Chart A s12.csv
No missing sectors

proces

In [4]:
def print_to_file(output_file, input_list, separator=" , "):
    """Write input_list as a line in file, seperating each element of the list with separator"""
    print(separator.join(str(x) for x in input_list), file=output_file)

# elements for natural sorting by filename
#https://stackoverflow.com/questions/5967500/how-to-correctly-sort-a-string-with-a-number-inside
import re

def atoi(text):
    return int(text) if text.isdigit() else text

def natural_keys(text):
    '''
    alist.sort(key=natural_keys) sorts in human order
    http://nedbatchelder.com/blog/200712/human_sorting.html
    (See Toothy's implementation in the comments)
    '''
    return [ atoi(c) for c in re.split('(\d+)', text) ]

"""
alist=[
    "something1",
    "something12",
    "something17",
    "something2",
    "something25",
    "something29"]

alist.sort(key=natural_keys)
print(alist)
""" 

def results_compilation(results_dict, output_file, min_label=1, max_label=41):
    """
    Given a result dictionnary, compiles the results into a csv file.
    Exemple of expected result_dict:
    {'Chart A s1.csv': {1: [], 2: [24], 3: [9], 4: [26], 5: [27, 4], 6: [8], 7: [7], 8: [18], 9: [13, 3], 10: [23], 11: [22], 12: [11, 19, 21, 33, 35], 13: [1, 12, 15, 31, 36, 38], 14: [10, 2, 14, 20, 25, 32, 34], 15: [], 16: []}, 'Chart A s2.csv': {1: [23, 30], 2: [26, 32], 3: [17, 19, 31], 4: [18, 16, 33], 5: [14], 6: [11, 20], 7: [13], 8: [25, 28], 9: [24, 7], 10: [5, 8, 27], 11: [4, 10, 15], 12: [12], 13: [1, 3, 34, 36], 14: [2, 21, 35], 15: [6, 22], 16: [9, 29]}}
    """
    min_label = min(int(min_label), int(max_label))
    max_label = max(int(min_label), int(max_label))
    
    title_line = ["Result ID"] + list(range(min_label, max_label+1))
    
    with open(output_file, mode='w') as out_file:
        out_file.print = print_to_file.__get__(out_file)
        
        # add title line
        out_file.print(title_line)
        
        # process each result file
        #for result_file,results in results_dict.items():
        #alist.sort(key=natural_keys)
        sorted_dict_keys = list(results_dict.keys())
        sorted_dict_keys.sort(key=natural_keys)
        for result_file in sorted_dict_keys:
            results = results_dict[result_file]
            # first building the line, by searching which sector was associated to each label
            line = [result_file]
            # add the sector associated to each label
            for label in range(min_label, max_label+1):
                label_found = False
                # run through the results to find the sector in which the label was written
                for sector,associated_labels in results.items():
                    if label in associated_labels:
                        label_found = True
                        line.append(sector)
                        break
                # when the label hasn't been found
                if not label_found:
                    line.append("nan")
            
            # write the recomposed lined
            out_file.print(line)
                    
                    
    
    
    

In [5]:
results_compilation(results, OUTPUT_FILE)

import time ; print("Last execution : "+time.strftime("%Y-%m-%d %H:%M"))

Last execution : 2018-04-08 20:23
