In [None]:
import numpy as np
import matplotlib.pyplot as plt
from classification import nearest_neighbor_prediction
%matplotlib inline

# Introduction to `concurrent.futures`

High-level interface for **asynchronous**, **parallel** calculations.

New interface that [replaces](https://www.python.org/dev/peps/pep-3148/) the **`multiprocessing`** and **`threading`** interfaces.

Advantages: no need to execute code in a separate script, no need to save the data in a file, and then read it: results are directly integrated in the interactive python environment.

### Installation

- Python 3: already installed by default 

- Python 2: `conda install futures`

### Import

In [None]:
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor

---
# Digit classification

Let us again load the data, and define a function to look at it.

In [None]:
# Load the data
train_images = np.load('./data/train_images.npy')
train_labels = np.load('./data/train_labels.npy')
test_images = np.load('./data/test_images.npy')

# Define function to have a look at the data
def show_random_digit( images, labels=None ):
    """"Show a random image out of `images`, 
    with the corresponding label if available"""
    i = np.random.randint(len(images))
    image = images[i].reshape((28, 28))
    plt.imshow( image, cmap='Greys' )
    if labels is not None:
        plt.title('Label: %d' %labels[i])

In addition, we need to define a function that takes only one argument, in order to pass it to the function `map`.

In [None]:
def predict( test_images ):
    return( nearest_neighbor_prediction( test_images, train_images, train_labels ) )

## Serial execution

In [None]:
%%time 
test_labels_serial = predict( test_images )

## With processes

In [None]:
# Choose the number of processes and split the data
N_processes = 4
split_arrays = np.array_split( test_images, N_processes )

In [None]:
%%time
with ProcessPoolExecutor(max_workers=N_processes) as e:
    result = e.map( predict, split_arrays )

# Merge the result from each process into a single array
test_labels_proc = np.hstack( ( small_test_labels for small_test_labels in result ) )

In [None]:
show_random_digit( test_images, test_labels_proc )

## With threads

In [None]:
# Choose the number of threads and split the data
N_threads = 4
split_arrays = np.array_split( test_images, N_threads )

In [None]:
%%time
with ThreadPoolExecutor(max_workers=N_threads) as e:
    result = e.map( predict, split_arrays )
    
# Merge the result from each thread into a single array
test_labels_threads = np.hstack( ( small_test_labels for small_test_labels in result ) )

In [None]:
show_random_digit( test_images, test_labels_threads )