In [1]:
# 
# notebook setup
#
try:
    %matplotlib qt
    import matplotlib.pyplot as plt
    #
    # Temporary hack needed to make the gridtools package visible from the notebook.
    # No longer needed when GT4Py will be installed as a regular Python package (i.e. through  setup.py)
    #
    import os
    os.chdir(os.path.abspath('..'))
except Exception:
    print ("### WARNING: plotting not available")
else:
    import numpy as np
    print ("Plotting enabled")

Plotting enabled


# ::: Gridtools4Py :::
## A Python interface for Gridtools


### A copy stencil implemented in Python


In [2]:
from gridtools.stencil import Stencil, MultiStageStencil


class CopyStencil (MultiStageStencil):
    """
    Definition of a simple copy stencil.-
    """
    @Stencil.kernel
    def kernel (self, out_data, in_data):
        """
        The entry stage of this stencil.-
        """
        #
        # iterate over the interior data points
        #
        for p in self.get_interior_points (out_data):
            out_data[p] = in_data[p]


### Use NumPy arrays as data fields

In [3]:
domain = (64, 64, 32)

source = np.random.rand (*domain)   # data field of size 'domain'
                                    # filled with random numbers
target = np.zeros (domain)          # data field of size 'domain'
                                    # filled with zeros

### Run the stencil in Python mode

In [4]:
copy         = CopyStencil ( )      # instance of the stencil defined above
copy.backend = "python"             # will run in Python only mode

%timeit -n1 -p3 -r 10 copy.run (in_data=source, out_data=target) # execute it

1 loop, best of 10: 105 ms per loop


### Run the *same* stencil in C++ mode

In [5]:
copy.set_backend("c++")       # will run using Gridtools in C++

%timeit -n1 -p3 -r 10 copy.run (in_data=source, out_data=target) # execute it

The slowest run took 39248.67 times longer than the fastest. This could mean that an intermediate result is being cached.
1 loop, best of 10: 142 µs per loop


### The code has been translated, compiled and dynamically linked into the current session

In [6]:
Stencil.compiler.lib_handle

<CDLL '/tmp/__gridtools_6kgbr7ba/libgridtools4py.0001.so', handle 2680920 at 7f0b95f3d748>

### Example: the Laplace operator

In [7]:
class Laplace (MultiStageStencil):
    def __init__ (self):
        super ( ).__init__ ( )

    @Stencil.kernel
    def kernel (self, out_data, in_data):
        """
        The user must always define a 'kernel' function.-
        """
        for p in self.get_interior_points (out_data):
            out_data[p] = -4.0 * in_data[p] - (
                          in_data[p + (1,0,0)] + in_data[p + (0,1,0)] + 
                          in_data[p + (-1,0,0)] + in_data[p + (0,-1,0)])

### Run it in Python, C++ and CUDA modes

In [8]:
lap = Laplace ( )
lap.set_halo        ( (1, 1, 1, 1) )
lap.set_k_direction ("forward")

lap.set_backend ('python')
%timeit -n 1 -p 3 -r 5 lap.run (in_data=source, out_data=target)

#lap.backend = "cuda"
#%timeit -n 1 -p 3 -r 5 lap.run (in_data=source, out_data=target)

lap.set_backend ('c++')
%timeit -n 1 -p 3 -r 5 lap.run (in_data=source, out_data=target)

1 loop, best of 5: 994 ms per loop
The slowest run took 14341.67 times longer than the fastest. This could mean that an intermediate result is being cached.
1 loop, best of 5: 264 µs per loop


### Plotting

In [9]:
from tests.test_stencils import HorizontalDiffusion

#
# initialize the input data
#
for i in range (domain[0]):
    for j in range (domain[1]):
        for k in range (domain[2]):
            source[i,j,k] = i**5 + j
wgt    = np.ones (domain)
target = np.ones (domain)

hd = HorizontalDiffusion (domain)
hd.set_halo ( (2,2,2,2) )
hd.run (out_data=target,
        in_wgt=wgt,
        in_data=source)

In [10]:
hd.plot_3d (target[2:-2,2:-2,0])
hd.plot_3d (source[:,:,0])

<mpl_toolkits.mplot3d.art3d.Line3DCollection at 0x7f0bd1471a20>

## Stage execution path and data-dependency graph

In [15]:
hd.plot_stage_execution ( )

In [17]:
hd.plot_data_dependency (show_legend=True)

## Animations

In [18]:
from tests.test_sw import SWTest

anim = SWTest ( )
anim.setUp ( )
anim.test_animation (50000)

## : Game of Life :

In [19]:
class GameOfLife (MultiStageStencil):
    def __init__ (self, domain):
        super ( ).__init__ ( )
        self.counter = np.zeros (domain)
    
    @Stencil.kernel
    def kernel (self, out_X):
        for p in self.get_interior_points (out_X):
            self.counter[p] = out_X[p + (1,0,0)]  + out_X[p + (1,1,0)]   + \
                              out_X[p + (0,1,0)]  + out_X[p + (-1,1,0)]  + \
                              out_X[p + (-1,0,0)] + out_X[p + (-1,-1,0)] + \
                              out_X[p + (0,-1,0)] + out_X[p + (1,-1,0)]
            if out_X[p] == 1.0 and self.counter[p] == 2:
                out_X[p] = 0.0
            elif self.counter[p] == 3:
                out_X[p] = 1.0
            else:
                out_X[p] = 0.0

In [21]:
X = np.array (np.random.randint (2, size=X.shape),
              dtype=np.float64)
gol = GameOfLife (X.shape)
gol.backend = 'c++'
gol.set_halo ( (1,1,1,1) )
gol.run      (out_X = X)

NameError: name 'X' is not defined

### Create a Matplotlib 2D animation

In [None]:
import matplotlib.animation as animation

fig = plt.figure ( )
im  = plt.imshow (X[:,:,0])

def anim_init ( ):
    im.set_data (X[:,:,0])
    return [im]

def anim_frame (i):
    gol.run (out_X = X)
    im.set_data (X[:,:,0])
    return [im]

In [None]:
anim = animation.FuncAnimation (fig,
                                anim_frame,
                                init_func=anim_init,
                                frames=200,
                                interval=1000,
                                blit=True)

### Performance comparison (pure C++ <--> Python with C++ backend) 

```
* Copy                 11342 FPS <--> 6710 FPS (~39% overhead)
* Laplace               4276 FPS <--> 3732 FPS (~13% overhead)
* Horizontal diffusion   633 FPS <-->  550 FPS (~13% overhead)
* Shallow water         2710 FPS <--> 2525 FPS (~7% overhead)
```

#### The constant overhead is on the Python side due to library and parameter handling

## : Status update :

### Done as of today

* several stencils, including game of life, working on CPU/GPU;
* automatic detection of read-only fields based on source analysis;
* automatic construction and display of execution and data dependency graphs by means of source analysis;
* stencil definition containing "independent" stages, which may be potentially executed in parallel;
* stage definition as a function, which avoids code replication and enables per-stage user settings;
* integrated 3D plotting;
* integrated 3D animations using OpenGL;
* tutorial IPython Notebook;
* integration with CMake and the C++ testing infrastructure (Jenkins);
* integration with containers (Docker);
* full C++11 support.

### In progress as of today

* connect lifetime of objects in Python to C++, e.g., tie finalize( ) with the garbage collector;
* implement common boundary conditions constructs;
* support for definition of local variables;
* support for a different offset-indexing syntax (i,j,k);
