In [1]:
import numpy as np
import pandas as pd
import numba
from numba import vectorize, jit
import time, random

In [4]:
# Serial execution of Kernel density estimation
def serial_kde(eval_points, samples, band):
    
    re_x = (eval_points[:, np.newaxis] - samples[np.newaxis, :]) / band[np.newaxis, :]
    gaussian = np.exp(-0.5*(re_x**2))/np.sqrt(2*np.pi)/band[np.newaxis, :]
    
    return gaussian.sum(axis=1)/len(samples)

In [5]:
# Gaussian function
@jit(nopython=True)
def gaussian(x):
    
    return np.exp(-0.5*(x**2))/np.sqrt(2*np.pi)

In [6]:
# Parallel execution of Kernel density estimation
@jit(nopython=True, parallel=True)
def parallel_kde(eval_points, samples, band):
    
    res = np.zeros_like(eval_points)
    for i in numba.prange(len(eval_points)):
        eval_x = eval_points[i]
        for s, b in zip(samples, band):
            res[i] += gaussian((eval_x-s)/b)/b
        res[i] /= len(samples)
        
    return res

In [7]:
# Function to generate the input samples 
def generate_input_samples():
    
    for dtype in [np.float64]:
        for n in [1000,5000]:
            sigma=0.5
            samples = np.random.normal(loc=0.0, scale=sigma, size=n).astype(dtype)
            band = np.full_like(samples, 1.06*n**0.2*sigma)
            for n_eval in [10,1000, 5000]:
                cat = ('samples%d' %n,np.dtype(dtype).name)
                ep = np.random.normal(loc=0.0, scale=5.0, size=n_eval).astype(dtype)
                yield dict(category=cat, x=n_eval, input_args=(ep, samples, band), input_kwargs={})

In [8]:
# Store the generated values in 'val'
val = generate_input_samples()

In [9]:
ip_args, ip_kwargs, size = tuple(),dict(),0
for item in val:
    ip_args = item['input_args']
    ip_kwargs = item['input_kwargs']
    size = item['x']

In [10]:
e_p, s, b = ip_args[0], ip_args[1], ip_args[2]

In [11]:
# Keep a timer for the execution of 'kernel density estimation' in serial mode
start_s = time.time()
serial_kde(e_p,s,b)
end_s = time.time()

In [12]:
# Print out the time taken for serial execution of the code
print("Time taken for Serial implementation : {} seconds".format(end_s-start_s))
print("Dataset Size : {} tuples".format(size))

Time taken for Serial implementation : 0.8072371482849121 seconds
Dataset Size : 5000 tuples


In [13]:
# Keep a timer for the execution of 'kernel density estimation' in parallel mode
start_p = time.time()
parallel_kde(e_p,s,b)
end_p = time.time()

In [14]:
# Print out the time taken for parallel execution of the code
print("Time taken for Parallel implementation : {} seconds".format(end_p-start_p))
print("Dataset Size : {} tuples".format(size))

Time taken for Parallel implementation : 3.7228453159332275 seconds
Dataset Size : 5000 tuples


In [15]:
# Improvement due to parallelism
print("Improvement with parallel implementation : {:.2f} times".format(1/((end_p-start_p)/(end_s-start_s))))

Improvement with parallel implementation : 0.22 times
