In [1]:
file_1 = """Stock Close Beta Cap
Apple 188.72 0.2 895.667B
Tesla 278.62 0.5 48.338B"""

file_2 = """Employee Wage Hired Promotion
Linda 3000 2017 Yes
Bob 2000 2016 No
Joshua 800 2019 Yes"""

### My solution
Other approaches are possible

In [2]:
def parser(stringa):
    """
    Parse string and returns dict of lists: keys are first line, lists are columns.
    """
    # lines will be a list of lists
    # each sub list contains the words of a single line
    lines = list()
    for line in stringa.splitlines():
        lines.append(line.split())
    
    keys = lines[0]     # the first line is the key         
    lines = lines[1:]   # now lines does not include the first line
    
    result = dict()
    count = 0
    for key in keys:
        values = [line[count] for line in lines]
        result[key] = values
        count += 1
    
    return result

In [3]:
parser(file_1)

{'Stock': ['Apple', 'Tesla'],
 'Close': ['188.72', '278.62'],
 'Beta': ['0.2', '0.5'],
 'Cap': ['895.667B', '48.338B']}

In [4]:
parser(file_2)

{'Employee': ['Linda', 'Bob', 'Joshua'],
 'Wage': ['3000', '2000', '800'],
 'Hired': ['2017', '2016', '2019'],
 'Promotion': ['Yes', 'No', 'Yes']}

### Test
We want to verify carefully that everything works as intended

In [5]:
def feel_bored_1_test(function):
    """
    Verify that function returns result1 and result2.
    """
    result_1 = {'Stock': ['Apple', 'Tesla'], 'Close': ['188.72', '278.62'], 'Beta': ['0.2', '0.5'], 
           'Cap': ['895.667B', '48.338B']}

    result_2 = {'Employee': ['Linda', 'Bob', 'Joshua'], 'Wage':  ['3000', '2000', '800'], 'Hired': ['2017', '2016', '2019'], 
           'Promotion': ['Yes', 'No', 'Yes']}
    
    results = list()
    if function(file1) == result1:
        print("Test 1 passed")
        results.append(True)
    else:
        print("Test 1 not passed")
        results.append(False)
    
    if function(file2) == result2:
        print("Test 2 passed")
        results.append(True)
    else:
        print("Test 2 not passed")
        results.append(False)
    
    return results

We can follow DRY (Don't Repeat Yourself) with a for loop to improve the testing function

In [6]:
def feel_bored_1_test(function):
    result_1 = {'Stock': ['Apple', 'Tesla'], 'Close': ['188.72', '278.62'], 'Beta': ['0.2', '0.5'], 
           'Cap': ['895.667B', '48.338B']}

    result_2 = {'Employee': ['Linda', 'Bob', 'Joshua'], 'Wage':  ['3000', '2000', '800'], 'Hired': ['2017', '2016', '2019'], 
           'Promotion': ['Yes', 'No', 'Yes']}
    
    input_to_output = {file_1: result_1, file_2: result_2}
    results = list()
    
    count = 1
    for key, value in input_to_output.items():
        if function(key) == value:
            results.append(True)
            print(f"Test {count} passed")
        else:
            results.append(False)
            print(f"Test {count} not passed")
        count += 1
            
    return results

In [7]:
feel_bored_1_test(parser)

Test 1 passed
Test 2 passed


[True, True]

### Improve code

In [8]:
def fast_parser(stringa):
    """
    Parse string and returns dict of lists: keys are first line, lists are columns.
    """
    lines = [line.split() for line in stringa.splitlines()]    # list of lists
    keys = lines.pop(0)                                      # remove first line and assign to keys
    
    result = {
        key: [line[index] for line in lines] for index, key in enumerate(keys)
    }
    
    return result

### Everything appears to work as intended

In [9]:
feel_bored_1_test(fast_parser)

Test 1 passed
Test 2 passed


[True, True]

### Efficiency does not matter, but it's still interesting to measure
We can see that the difference is insignificant for small inputs

In [10]:
%%timeit
parser(file_1)

6.57 µs ± 245 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [11]:
%%timeit
fast_parser(file_1)

6.37 µs ± 217 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


### With bigger inputs, parsing efficiency becomes relevant
<br>

**Key Takeaway:** 
<br>
Do not waste time on optimizing code if you don't need it
<br>
<br>
**Premature optimization is the root of all evil**

In [12]:
big_input = (file_1 + '\n') * 100
print(big_input)

Stock Close Beta Cap
Apple 188.72 0.2 895.667B
Tesla 278.62 0.5 48.338B
Stock Close Beta Cap
Apple 188.72 0.2 895.667B
Tesla 278.62 0.5 48.338B
Stock Close Beta Cap
Apple 188.72 0.2 895.667B
Tesla 278.62 0.5 48.338B
Stock Close Beta Cap
Apple 188.72 0.2 895.667B
Tesla 278.62 0.5 48.338B
Stock Close Beta Cap
Apple 188.72 0.2 895.667B
Tesla 278.62 0.5 48.338B
Stock Close Beta Cap
Apple 188.72 0.2 895.667B
Tesla 278.62 0.5 48.338B
Stock Close Beta Cap
Apple 188.72 0.2 895.667B
Tesla 278.62 0.5 48.338B
Stock Close Beta Cap
Apple 188.72 0.2 895.667B
Tesla 278.62 0.5 48.338B
Stock Close Beta Cap
Apple 188.72 0.2 895.667B
Tesla 278.62 0.5 48.338B
Stock Close Beta Cap
Apple 188.72 0.2 895.667B
Tesla 278.62 0.5 48.338B
Stock Close Beta Cap
Apple 188.72 0.2 895.667B
Tesla 278.62 0.5 48.338B
Stock Close Beta Cap
Apple 188.72 0.2 895.667B
Tesla 278.62 0.5 48.338B
Stock Close Beta Cap
Apple 188.72 0.2 895.667B
Tesla 278.62 0.5 48.338B
Stock Close Beta Cap
Apple 188.72 0.2 895.667B
Tesla 278.62 0.5 

In [13]:
%%timeit
parser(big_input)

293 µs ± 17.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [14]:
%%timeit
fast_parser(big_input)

249 µs ± 15.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
