# Introduction to benchmarking and profiling

In [1]:
data = [[90, 62, 33, 78, 82],
        [37, 31, 0, 72, 32],
        [7, 71, 79, 81, 100],
        [33, 50, 66, 81, 71],
        [87, 26, 54, 78, 81],
        [37, 22, 96, 79, 41],
        [88, 75, 100, 19, 88],
        [24, 72, 59, 33, 92],
        [71, 6, 59, 8, 11],
        [89, 76, 65, 12, 13]]

In [2]:
def sum1d(l):
  """Compute the sum of a list of numbers"""
  s = 0
  for x in l:
    s = s + x
  
  return s

def sum2d(ll):
  """Compute the sum of a list of lists of numbers"""
  s = 0 
  for l in ll:
    s = s + sum1d(l)

  return s

In [3]:
# Check that this works
sum2d(data)

2817

In [23]:
# Make a meaningfully large dataset
big_data = data * 100000

In [5]:
len (big_data)

100000000

In [6]:
len(big_data[0])

5

In [7]:
# Now confirm that sum2d works again
sum2d(big_data)

28170000000

In [8]:
# Jupyter functions %time, %timeit to benchmark our code
%time sum2d(big_data)

CPU times: user 31.5 s, sys: 6.72 ms, total: 31.5 s
Wall time: 31.5 s


28170000000

In [9]:
# Run %time several times with %timeit
%timeit sum2d(big_data)

1 loop, best of 3: 30.9 s per loop


In [10]:
# Find out where the bottlenecks are by doing some profiling...
# Easiest way in Jupyter is with %prun magic

In [11]:
%prun sum2d(big_data)

 

In [12]:
# We could examine the impact of reducing the number of function calls
# Join the two functions together

def sum2d_v2(ll):
    """Compute the sum of a list of lists of numbers."""
    s = 0
    for l in ll:
        for x in l:
            s += x
    return s

In [13]:
%timeit sum2d_v2(big_data)

1 loop, best of 3: 26.4 s per loop


In [14]:
%prun sum2d_v2(big_data)

 

In [18]:
# Now we need to do some line profiling
!pip3 install line_profiler

Collecting line_profiler
[?25l  Downloading https://files.pythonhosted.org/packages/d8/cc/4237472dd5c9a1a4079a89df7ba3d2924eed2696d68b91886743c728a9df/line_profiler-3.0.2-cp36-cp36m-manylinux2010_x86_64.whl (68kB)
[K     |████▊                           | 10kB 16.9MB/s eta 0:00:01[K     |█████████▌                      | 20kB 1.8MB/s eta 0:00:01[K     |██████████████▎                 | 30kB 2.3MB/s eta 0:00:01[K     |███████████████████             | 40kB 2.6MB/s eta 0:00:01[K     |███████████████████████▉        | 51kB 2.0MB/s eta 0:00:01[K     |████████████████████████████▋   | 61kB 2.3MB/s eta 0:00:01[K     |████████████████████████████████| 71kB 2.0MB/s 
Installing collected packages: line-profiler
Successfully installed line-profiler-3.0.2


In [19]:
# Tell the notebook it can use line_profiler
%load_ext line_profiler

In [20]:
# Line profile my new function
%lprun -f sum2d_v2(big_data)

  profile = LineProfiler(*funcs)


In [None]:
# Whoops. pebkac

In [21]:
%lprun -f sum2d_v2 sum2d_v2(big_data)

*** KeyboardInterrupt exception caught in code being profiled.

In [24]:
# Firstly, less data


In [25]:
# See whether I can improve my summation

def sum2d_v3(ll):
    """Compute the sum of a list of lists of numbers."""
    s = 0
    for l in ll:
        x = sum(l)
        s += x
    return s

In [26]:
%timeit sum2d_v3(big_data)

1 loop, best of 3: 181 ms per loop


In [27]:
%lprun -f sum2d_v3 sum2d_v3(big_data)

In [28]:
# Profiling exercises
def primes(n):
    if n==2:
        return [2]
    elif n<2:
        return []

    s=list(range(3,n+1,2))
    mroot = n ** 0.5
    half=(n+1)//2-1
    i=0
    m=3

    while m <= mroot:
        if s[i]:
            j=(m*m-3)//2
            s[j]=0
            while j<half:
                s[j]=0
                j+=m
        i=i+1
        m=2*i+3
    return [2]+[x for x in s if x]


In [36]:
%timeit primes (100)

The slowest run took 4.33 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 5.44 µs per loop


In [30]:
# 1. Use profiling to find out where the problems are
# 2. Suggest some possible fixes
# 3. Do they work?

In [32]:
%lprun -f primes primes(100)

In [33]:
import math

def primes_v2(n):
    if n==2:
        return [2]
    elif n<2:
        return []

    s=list(range(3,n+1,2))
    mroot = math.sqrt(n)
    half=(n+1)//2-1
    i=0
    m=3

    while m <= mroot:
        if s[i]:
            j=(m*m-3)//2
            s[j]=0
            while j<half:
                s[j]=0
                j+=m
        i=i+1
        m=2*i+3
    return [2]+[x for x in s if x]

In [37]:
%timeit primes_v2(100)

100000 loops, best of 3: 5.42 µs per loop


In [38]:
%lprun -f primes_v2 primes_v2(100)

In [39]:
# Visualising profiling runs
!pip install snakeviz

Collecting snakeviz
[?25l  Downloading https://files.pythonhosted.org/packages/a2/9a/6c753d20af6f177d3cbdb05a4b2e4419db4ec021c50ba86aa0d13a784a5c/snakeviz-2.1.0-py2.py3-none-any.whl (282kB)
[K     |█▏                              | 10kB 13.4MB/s eta 0:00:01[K     |██▎                             | 20kB 1.6MB/s eta 0:00:01[K     |███▌                            | 30kB 2.1MB/s eta 0:00:01[K     |████▋                           | 40kB 2.4MB/s eta 0:00:01[K     |█████▉                          | 51kB 1.9MB/s eta 0:00:01[K     |███████                         | 61kB 2.2MB/s eta 0:00:01[K     |████████▏                       | 71kB 2.4MB/s eta 0:00:01[K     |█████████▎                      | 81kB 2.6MB/s eta 0:00:01[K     |██████████▌                     | 92kB 2.8MB/s eta 0:00:01[K     |███████████▋                    | 102kB 2.7MB/s eta 0:00:01[K     |████████████▉                   | 112kB 2.7MB/s eta 0:00:01[K     |██████████████                  | 122kB 2.7MB/s 

In [40]:
%load_ext snakeviz

In [41]:
!ls


sample_data


In [42]:
import glob

In [None]:
%snakeviz glob.glob('sample_data/*')

In [46]:
%prun -D primes.prof primes_v2(100)

 
*** Profile stats marshalled to file 'primes.prof'. 


In [47]:
!ls


primes.prof  sample_data


In [None]:
%snakeviz primes.prof

In [51]:
from IPython.display import display
%snakeviz primes_v2(100)

 
*** Profile stats marshalled to file '/tmp/tmp49avhwgq'. 
Embedding SnakeViz in this document...
