## Original solution:

Slightly edited to allow running in a notebook - `main()` added, and path handling edited to allow finding input file from within notebook

In [1]:
# %load ../../../day_08/Python/team_template/part1.py

import numpy as np
from pathlib import Path

from load_input import get_input


def get_layers(image_data, layer_size):
    layers = []
    for starting_index in range(0, len(image_data), layer_size):
        layers.append(image_data[starting_index: starting_index + layer_size])

    return layers


def meaningless_calculation(image_data, width, height):
    layer_size = width * height
    layers = get_layers(image_data, layer_size)
    zero_count = layer_size
    zero_index = 0
    for index, layer in enumerate(layers):
        if layer.count('0') < zero_count:
            zero_count = layer.count('0')
            zero_index = index
    fewest_zeros_layer = layers[zero_index]
    return fewest_zeros_layer.count('1') * fewest_zeros_layer.count('2')


def decode_image_using_numpy(image_data, width, height):
    image_data = image_data.reshape((-1, height, width))
    final_image = image_data[0].copy()
    number_of_layers = image_data.shape[0]
    # So what's going on here?  At this point, we have a 3D array: (depth,
    # height, width).  This loop is looping over each depth layer:
    for index in range(number_of_layers):
        final_image[final_image == 2] = image_data[index][final_image == 2]
        # And this is where the work happens.  Let's break it into pieces:
        #
        # Both sides: [final_image==2] - creates a 2D boolean array of
        # (height, width), with values of True where final_image==2, False
        # everywhere else.  Note this is regenerated for each layer.  I'm
        # deliberately avoiding the term "mask" here - a mask has a particular
        # meaning in numpy.
        #
        # LHS: final_image[final_image==2] - this selects only those pixels in
        # the final_image that still have value of 2 (i.e. those that are
        # still transparent).
        #
        # RHS part 1: image_data[index] - this slices the current 2D layer out
        # of the 3D image_data.  This 2D layer is the same shape as the final
        # image (since each layer is the same shape).
        #
        # RHS part2: image_data[index][final_image==2] - and this selects the
        # values in *the layer* where the *final_image* is still transparent,
        # relying on them both being the same shape.
        #
        # Possibly less opaque version:
        # current_layer = image_data[index]
        # final_image[final_image == 2] = current_layer[final_image == 2]
    return final_image


def decode_image(image_data, width, height):
    layer_size = width * height
    layers = get_layers(image_data, layer_size)
    final_image = []
    for index in range(layer_size):
        for layer in layers:
            if layer[index] != '2':
                final_image.append(layer[index])
                break

    for index in range(layer_size, 1, -width):
        final_image.insert(index, '\n')
    return ''.join(final_image).strip()


def part1():
    input_ = get_input()
    return meaningless_calculation(input_, 25, 6)


def part2():
    input_ = get_input()
    return decode_image(input_, 25, 6).replace('0', ' ')


def part2_using_numpy():
    try:
        input_file = Path(__file__).resolve().parent.parent.parent / "input.txt"
    except NameError:
        input_file = Path('.').resolve().parent.parent / "input.txt"        
    array = np.genfromtxt(input_file, delimiter=1, dtype=np.int32)
    return np.array_str(
        decode_image_using_numpy(array, 25, 6)).replace('0', ' ').replace(
        '[', ' ').replace(']', ' ')


def main():
    print(f'Part 1:\n{part1()}')
    print(f'Part 2:\n{part2()}')
    print(f'Part 2 using NumPy:\n{part2_using_numpy()}')


if __name__ == "__main__":
    main()


Part 1:
1320
Part 2:
111   11  1   11  1 111  
1  1 1  1 1   11 1  1  1 
1  1 1     1 1 11   1  1 
111  1      1  1 1  111  
1 1  1  1   1  1 1  1 1  
1  1  11    1  1  1 1  1 
Part 2 using NumPy:
  1 1 1       1 1     1       1 1     1   1 1 1     
  1     1   1     1   1       1 1   1     1     1   
  1     1   1           1   1   1 1       1     1   
  1 1 1     1             1     1   1     1 1 1     
  1   1     1     1       1     1   1     1   1     
  1     1     1 1         1     1     1   1     1    


In [2]:
%timeit part2()

628 µs ± 10.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [3]:
%timeit part2_using_numpy()

197 ms ± 3.45 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## Apples to apples comparison

Above not apples to apples comparison - numpy version includes loading data and both versions contain string manipulation on output.  Really want to profile just `decode_image` and `decode_image_using_numpy`.

Redefine `decode_image` and `part2` to get string processing out of `decode_image` (profiled) into `part2` (not profiled - and shown further down):

In [4]:
def decode_image(image_data, width, height):
    layer_size = width * height
    layers = get_layers(image_data, layer_size)
    final_image = []
    for index in range(layer_size):
        for layer in layers:
            if layer[index] != '2':
                final_image.append(layer[index])
                break
    return final_image

Replicate just enough of `part2` to let us time just the `decode_image` function:

In [5]:
input_ = get_input()
width=25
height=6

In [6]:
%timeit decode_image(input_, width, height)

425 µs ± 5.39 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


Similarly, replicate just enough of `part2_using_numpy` to let us time just the `decode_image_using_numpy` function:

In [7]:
input_file = Path('.').resolve().parent.parent / "input.txt"        
array = np.genfromtxt(input_file, delimiter=1, dtype=np.int32)

In [8]:
%timeit decode_image_using_numpy(array, 25, 6)

410 µs ± 8.76 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


So pure python and numpy methods are roughly the same time?

## Line by line detail

Need to redefine part2 to fit the new definition of `decode_image` (i.e. extra string processing needs to be here):

In [9]:
def part2():
    input_ = get_input()
    layer_size = 25*6
    final_image = decode_image(input_, 25, 6)
    for index in range(layer_size, 1, -width):
        final_image.insert(index, '\n')
    return ''.join(final_image).strip().replace('0', ' ')

In [10]:
%load_ext line_profiler
# From pip install line_profiler

Following line runs `main()` but only profiles the named functions (i.e. the decode image ones):

In [11]:
%lprun -f decode_image -f decode_image_using_numpy main()

Part 1:
1320
Part 2:
111   11  1   11  1 111  
1  1 1  1 1   11 1  1  1 
1  1 1     1 1 11   1  1 
111  1      1  1 1  111  
1 1  1  1   1  1 1  1 1  
1  1  11    1  1  1 1  1 
Part 2 using NumPy:
  1 1 1       1 1     1       1 1     1   1 1 1     
  1     1   1     1   1       1 1   1     1     1   
  1     1   1           1   1   1 1       1     1   
  1 1 1     1             1     1   1     1 1 1     
  1   1     1     1       1     1   1     1   1     
  1     1     1 1         1     1     1   1     1    


Timer unit: 1e-06 s

Total time: 0.000539 s
File: <ipython-input-1-f79eda26ec28>
Function: decode_image_using_numpy at line 30

Line #      Hits         Time  Per Hit   % Time  Line Contents
    30                                           def decode_image_using_numpy(image_data, width, height):
    31         1          7.0      7.0      1.3      image_data = image_data.reshape((-1, height, width))
    32         1         10.0     10.0      1.9      final_image = image_data[0].copy()
    33         1          2.0      2.0      0.4      number_of_layers = image_data.shape[0]
    34                                               # So what's going on here?  At this point, we have a 3D array: (depth,
    35                                               # height, width).  This loop is looping over each depth layer:
    36       101         41.0      0.4      7.6      for index in range(number_of_layers):
    37       100        479.0      4.8     88.9          final_image[final_image == 2]