## Parallel fourth-order Runge-Kutta

Given the simplest form of the initial-value problem for ODEs:
$$
y' = f(t, y), \quad y(a) = \alpha,
$$
the classical Runge-Kutta solver is defined as
$$
\begin{align*}
    y^{(n+1)} &= y^{(n)} + \frac{h}{6}(k_1 + 2k_2 + 2k_3 + k_4),\\
    k_1 &= f\left(t^{(n)}, y^{(n)}\right),\\
    k_2 &= f\left(t^{(n)} + h/2, y^{(n)} + hk_1/2\right),\\
    k_3 &= f\left(t^{(n)} + h/2, y^{(n)} + hk_2/2\right),\\
    k_4 &= f\left(t^{(n)} + h/2, y^{(n)} + hk_3\right),\\
\end{align*}
$$

In accordance with [C. Liu et al. (2011)](https://dl.acm.org/doi/abs/10.1007/978-3-642-25255-6_25), the Runge-Kutta solver can be parallelized as follows:
$$
\begin{align*}
    y_{(n+1)} =& y_n + \frac{h}{6}(k_1^{(n)} + 2k_2^{(n)} + 2k_3^{(n)} + k_4^{(n)}),\\
    k_1^{(n)} =& f\left(t^{(n)}, y^{(n)}\right),\\
    k_2^{(n)} =& f\left(t^{(n)} + h/2, y^{(n)} - 3k_1^{(n-1)}/4 + k_2^{(n-1)}/2 + 3k_4^{(n-1)}/4\right),\\
    k_3^{(n)} =& f\left(t^{(n)} + h/2, y^{(n)} - k_1^{(n-1)} + 2k_2^{(n-1)} - k_3^{(n-1)}/2 \right),\\
    k_4^{(n)} =& f\left(t^{(n)} + h/2, y^{(n)} + k_1^{(n-1)}/2 + k_4^{(n-1)}/2\right),\\
\end{align*}
$$

In [53]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from threading import Thread, Condition
from queue import Queue
from multiprocessing import Pool
import time

In [43]:
def timeit(function, *args, **kwargs):
    t0 = time.time()
    y = function(*args, **kwargs)
    tf = time.time()
    return tf - t0, y

class RK4:
    def __init__(self, h: float):
        self.h = h
    
    def _get_next_ks(self, x, y):
        k1 = self.f(x, *y)
        k2 = self.f(x + self.h/2, *(y + self.h*k1/2))
        k3 = self.f(x + self.h/2, *(y + self.h*k2/2))
        k4 = self.f(x + self.h/2, *(y + self.h*k3))
        return (k1, k2, k3, k4)

    def _k1(self, x, y, ks, next_ks):
        next_ks[0] = self.f(x, *y)

    def _k2(self, x, y, ks, next_ks):
        k1, k2, _, k4 = ks
        next_ks[1] = self.f(x + self.h/2, *(y - 3*k1/4 + k2/2 + 3*k4/4))

    def _k3(self, x, y, ks, next_ks):
        k1, k2, k3, _ = ks
        next_ks[2] = self.f(x + self.h/2, *(y - k1 + 2*k2 - k3/2))
    
    def _k4(self, x, y, ks, next_ks):
        k1, _, _, k4 = ks
        next_ks[3] = self.f(x + self.h/2, *(y + k1/2 + k4/2))
    
    def _ks_calc(self, x, y, last_ks, k):
        with self.condition:
            while True:
                k(x, y, last_ks)
                self.condition.wait()

    def _get_next_ks_threading(self, x, y, last_ks):
        threads = []
        next_ks = [None]*4
        for K in (self._k1, self._k2, self._k3, self._k4):
            threads.append(Thread(target=K, args=(x, y, last_ks, next_ks)))
            threads[-1].start()
        
        for thread in threads:
            thread.join()

        return next_ks
    
    def apply_on(self, function):
        self.f = function
    
    def pure(self, x0: float, y0: np.ndarray, N: int):
        y = [y0]
        x = [x0]
        for n in range(N):
            k1, k2, k3, k4 = self._get_next_ks(x[n], y[n])
            x.append(x[n] + self.h)
            y.append(y[n] + self.h*(k1 + 2*k2 + 2*k3 + k4)/6)
        return x, y
    
    def threading(self, x0: float, y0: np.ndarray, N: int):
        y = [y0]
        x = [x0]
        last_ks = []
        for n in range(N):
            last_ks = k1, k2, k3, k4 = self._get_next_ks(x[n], y[n]) if n == 0 else self._get_next_ks_threading(x[n], y[n], last_ks=last_ks)
            x.append(x[n] + self.h)
            y.append(y[n] + self.h*(k1 + 2*k2 + 2*k3 + k4)/6)
        return x, y

$$
y' = y - t^2 + 1\quad 0\le 1\le 2\quad y(0) = 0.5
$$

In [50]:
rk4 = RK4(h=1e-5)

@rk4.apply_on
def f(t, y):
    return y - t**2 + 1

In [51]:
t, (x, y) = timeit(rk4.pure, x0=0, y0=np.array([0.5]), N=100000)
print(f'Tempo de execução: {t} s')
pd.DataFrame({'x':x, 'y':np.stack(y)[:, 0]}).head()

Tempo de execução: 1.852623462677002 s


Unnamed: 0,x,y
0,0.0,0.5
1,1e-05,0.500015
2,2e-05,0.50003
3,3e-05,0.500045
4,4e-05,0.50006


In [52]:
t, (x, y) = timeit(rk4.threading, x0=0, y0=np.array([0.5]), N=100000)
print(f'Tempo de execução: {t} s')
pd.DataFrame({'x':x, 'y':np.stack(y)[:, 0]}).head()

Tempo de execução: 40.81749939918518 s


Unnamed: 0,x,y
0,0.0,0.5
1,1e-05,0.500015
2,2e-05,0.500038
3,3e-05,0.50007
4,4e-05,0.500116
