In [9]:
from numba import njit, prange
import pandas as pd
import numpy as np
from metalib.utils import load_hist_data

In [13]:
@njit(cache=True, parallel=True)
def gaussian(x):
    return np.exp(-0.5 * x**2) / np.sqrt(2 * np.pi)

@njit(cache=True, parallel=True)
def numba_kde(eval_points, samples, bandwidths):
    result = np.zeros_like(eval_points)

    for i, eval_x in enumerate(eval_points):
        for sample, bandwidth in zip(samples, bandwidths):
            result[i] += gaussian((eval_x - sample) / bandwidth) / bandwidth
        result[i] /= len(samples)

    return result

# Multithread

@njit(parallel=True)
def numba_kde_multithread(eval_points, samples, bandwidths):
    result = np.zeros_like(eval_points)

    # SPEEDTIP: Parallelize over evaluation points with prange()
    for i in prange(len(eval_points)):
        eval_x = eval_points[i]
        for sample, bandwidth in zip(samples, bandwidths):
            result[i] += gaussian((eval_x - sample) / bandwidth) / bandwidth
        result[i] /= len(samples)

    return result

In [10]:
ohlc = load_hist_data("EURUSD", 2020)

  return Index(sequences[0], name=names)


In [12]:
subsample = ohlc.tail(400)

In [14]:
# Generate random sample points
np.random.seed(42)
samples = np.random.randn(10000)  # 10,000 sample points
bandwidths = np.full_like(samples, 0.2)  # Fixed bandwidth for each point

# Define evaluation points (where you want to estimate the density)
eval_points = np.linspace(-3, 3, 1000)

In [15]:
density_estimates = numba_kde_multithread(eval_points, samples, bandwidths)


The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see https://numba.readthedocs.io/en/stable/user/parallel.html#diagnostics for help.
[1m
File "..\..\..\..\AppData\Local\Temp\ipykernel_11872\2912872259.py", line 1:[0m
[1m<source missing, REPL/exec in use?>[0m
[0m[0m
  result[i] += gaussian((eval_x - sample) / bandwidth) / bandwidth


In [16]:
@njit(parallel=True)
def test(x):
    n = x.shape[0]
    a = np.sin(x)
    b = np.cos(a * a)
    acc = 0
    for i in prange(n - 2):
        for j in prange(n - 1):
            acc += b[i] + b[j + 1]
    return acc

test(np.arange(10))

test.parallel_diagnostics(level=4)

 
 Parallel Accelerator Optimizing:  Function test, 
C:\Users\Trismegist\AppData\Local\Temp\ipykernel_11872\2862204045.py (1)  


Parallel loop listing for  Function test, C:\Users\Trismegist\AppData\Local\Temp\ipykernel_11872\2862204045.py (1) 
--------------------------------------|loop #ID
@njit(parallel=True)                  | 
def test(x):                          | 
    n = x.shape[0]                    | 
    a = np.sin(x)---------------------| #1
    b = np.cos(a * a)-----------------| #2
    acc = 0                           | 
    for i in prange(n - 2):-----------| #4
        for j in prange(n - 1):-------| #3
            acc += b[i] + b[j + 1]    | 
    return acc                        | 
--------------------------------- Fusing loops ---------------------------------
Attempting fusion of parallel loops (combines loops with similar properties)...
  Trying to fuse loops #1 and #2:
    - fusion succeeded: parallel for-loop #2 is fused into for-loop #1.
  Trying to fuse loop