In [None]:
import numpy as np 

In [None]:
data = np.random.random(size=2000)

In [None]:
%timeit np.sum(data)

# Numba 
Numba is python library which provide Just In Time compiler for compile part of python code into machine code.

Project webpage: https://numba.pydata.org/

Project documentation: https://numba.readthedocs.io/en/stable/user/jit.html

In [None]:
# from pip.__main__ import _main as main
# main(["install", "numba"])

In [None]:
from numba import jit 

In [None]:
def custom_sum(arr):
    res = 0
    for el in arr:
        res += el 
    return res

@jit
def custom_sum2(arr):
    res = 0
    for el in arr:
        res += el 
    return res

In [None]:
%timeit custom_sum(data)

In [None]:
%timeit custom_sum2(data)

## Exercise 1 
Write `cum_positive_sum` function which sum elements of arrays $a$ using equations $S_i = \left\{\begin{matrix} S_{i-1} + a_i & S_{i-1} + a_i > 0 \\ S_{i-1} & otherwise \end{matrix}\right.$

Compare its speed with and withut `jit`


In [None]:
def cum_positive_sum(arr):
    ...



In [None]:
%timeit cum_positive_sum(data)

In [None]:
%timeit cum_positive_sum_jit(data)

## Exercise 2
Compare call of `perform_calc(vectors, norms)` with and without `@jit` decorator. Check if `nopython` or `parallel` could speedup calculation. 



In [None]:
vectors = np.random.random(size=(1000, 20))
norms = np.random.randint(1, 10, size=1000)

In [None]:
def calc_norm(vec, norm_num):
    if norm_num == 1:
        return np.max(np.abs(vec))
    return np.sum(vec**norm_num)*(1/norm_num)

def perform_calc(vectors, norm_nums):
    res = np.zeros(norm_nums.shape)
    for i in range(norm_nums.size):
        res[i] = calc_norm(vectors[i], norm_nums[i])
        

# Cython 
`Cython` is programing language that make writing C/C++ extension for python simple. It do many boring things for us, but wirting `cython` code needs more effort from user. 
To archive full speedup user needs to define type of all variables.

https://cython.readthedocs.io/en/latest/src/quickstart/overview.html
https://cython.readthedocs.io/en/latest/src/quickstart/build.html#using-the-jupyter-notebook

`Cython` introduce `cimport` importing statement for import `cython` related things. Numpy types for `cython` has added `_t` suffix. For example `float64_t`.
`cdef` is for define C only available things.
For numpy arrays `cython` allow defining dtype and number of dimension of `np.ndarray`: `np.ndarray[float64_t, ndim=2]`

`Cython` allow running in annotate mode to show which lines interact with python interpreter (Which most often cause function slowdown).

To allow accepting multiple types of data `cython` offer Fused Types, which is similar to C++ templates: https://cython.readthedocs.io/en/latest/src/userguide/fusedtypes.html?highlight=ctypedef%20fused

For working `cython` needs availability of compiler on a machine. For Windows machine it could be installed with Visual Studio. It could be downloaded from https://visualstudio.microsoft.com/pl/vs/community/.
On MacOS and Linux compilers should be available by default.

In [None]:
# from pip.__main__ import _main as main
# main(["install", "cython"])

%load_ext Cython

## Exercise 3
Compare speed of `calc_diameter`, `calc_diameter2` and `calc_diameter3`. See that `calc_diameter3` does not accept `float32`. using `ctypedef fused` fix it to accept `float32` numpy arrays.  

In [None]:
vectors1 = np.random.random(size=(4000, 20))

In [None]:
def calc_diameter(vec1):
    res = 0
    for i in range(vec1.shape[0]-1):
        r = np.sum((vec1[i+1:] - vec1[i])**2, axis=1)
        res = max(res, np.max(r))
    return res

@jit
def calc_diameter2(vec1):
    res = 0
    for i in range(vec1.shape[0]-1):
        r = np.sum((vec1[i+1:] - vec1[i])**2, axis=1)
        res = max(res, np.max(r))
    return res

In [None]:
%%cython --annotate 

# cython: boundscheck=False, wraparound=False, nonecheck=False, cdivision=True
# cython: language_level=3

from numpy cimport float64_t, int16_t
cimport numpy as np

def calc_diameter3(np.ndarray[float64_t, ndim=2] vec):
    cdef int16_t size, dim
    cdef int16_t i, j, k
    cdef float64_t r, res = 0
    size = vec.shape[0]
    dim = vec.shape[1]
    for i in range(size):
        for j in range(i, size):
            r = 0
            for k in range(dim):
                r += (vec[i, k] - vec[j, k])**2
            res = max(res, r)
    return res



## Exercise 4*
Base on [documentation](https://cython.readthedocs.io/en/latest/src/quickstart/build.html#building-a-cython-module-using-setuptools) create python package which will allow import `calc_diameter3` using `from package_name import calc_diameter3` 