In [12]:
from random import randint, randrange
import numpy as np
from math import fabs
from numba import jit, prange, njit, guvectorize, float32, int32, cuda

In [13]:
?cuda.local.array

Object `cuda.local.array` not found.


In [14]:
angles = 10
nofl = 100
eventsperfl = 1000 #1000
dig16codes = 7000 #7000
elms = 10

In [15]:
ar162stress = np.random.rand(dig16codes,)
data = np.random.rand(elms,dig16codes).astype(np.float32)
ar162stress.shape

(7000,)

In [16]:
@njit
def createfldictjit():
    d_fl2code16 = dict()
    numberevents = 0
    for fl in range(nofl):
        events = randint(eventsperfl-200,eventsperfl+200)
        numberevents += events
        arfl = np.empty(shape=(events,),dtype=np.int32)
        for event in range(events):
            code16 = randrange(dig16codes)
            arfl[event] = code16
        d_fl2code16[fl] = arfl
    return d_fl2code16

In [17]:
d_fl2code16 = createfldictjit()

In [18]:
def createfl():
    fl_code16 = []
    numberevents = 0
    maxlength = 0
    for fl in range(nofl):
        events = randint(eventsperfl-300,eventsperfl+300)
        if events > maxlength : maxlength = events
        numberevents += events
        arfl = np.empty(shape=(events,),dtype=np.int32)
        for event in range(events):
            code16 = randrange(dig16codes)
            arfl[event] = code16
        fl_code16.append(arfl)
    return fl_code16, maxlength

In [19]:
fl_code16, maxlength = createfl()
maxlength

1299

In [20]:
def getfldefar(nofl,maxlength,fl_code16):
    fldefar = np.full(shape=(nofl,maxlength),fill_value=-1,dtype=np.int32)
    for i,ar in enumerate(fl_code16):
        events = ar.shape[0]
        fldefar[i,:events] = ar[:]
    return fldefar
fldefar = getfldefar(nofl,maxlength,fl_code16)

In [21]:
fldefar.shape

(100, 1299)

In [22]:
@jit(nopython=True, fastmath=False)
def reversalsnogen(series):
    result = [np.float32(x) for x in range(0)]
    series = iter(series)
    x_last, x = next(series), next(series)
    d_last = (x - x_last)
    result.append(x_last)
    for x_next in series:
        if x_next == x:
            continue
        d_next = x_next - x
        if d_last * d_next < 0:
            result.append(x)
        x_last, x = x, x_next
        d_last = d_next
    result.append(x_next)
    return result

In [23]:
@jit(nopython=True, fastmath=False)
def find_rainflow_cycles(series):
    revs = reversalsnogen(series)
    result1 = [np.float32(x) for x in range(0)]
    result2 = [np.float32(x) for x in range(0)]
    residue = [np.float32(x) for x in range(0)]
    len_residue = 0
    for reversal in revs:
        residue.append(reversal)
        len_residue += 1
        while len_residue >= 4:
            S0, S1, S2, S3 = residue[-4], residue[-3], residue[-2], residue[-1]
            dS1, dS2, dS3 = fabs(S1-S0), fabs(S2-S1), fabs(S3-S2)
            if (dS2 <= dS1) and (dS2 <= dS3):
                result1.append(S1)
                result2.append(S2)
                last = residue.pop()
                residue.pop()
                residue.pop()
                residue.append(last)
                len_residue -= 2
            else:
                break
    return result1, result2

In [24]:
@njit
def mysum(mylist):
    ll = len(mylist)
    a = 0.
    for i in range(ll):
        a += mylist[i]
    return a

In [25]:
@njit
def mysumarr(arr):
    a = 0.
    for i in range(arr.size):
        a += arr[i]
    return a

In [26]:
@jit(nopython=True, fastmath=False)
def mapcode16(arfl, ar162stress):
    height = arfl.shape[0]
    res = []
    for i in range(height):
        code16 = arfl[i]
        if code16 == -1:
            break
        stress = ar162stress[code16]
        res.append(stress)
    return res

In [27]:
@jit(nopython=True, fastmath=False)
def mapcode16a(arfl, ar162stress):
    height = arfl.shape[0]
    for i in range(height):
        code16 = arfl[i]
        if code16 == -1:
            break
        stress = ar162stress[code16]
        arfl[i] = stress
    return arfl

In [28]:
m1 = mapcode16a(fldefar[0],ar162stress)
len

<function len(obj, /)>

In [68]:
@jit(nopython=True, fastmath=False, parallel=True)
def iterflights(angles,ar162stress,d_fl2code16):
    res = 0.0
    for angle in prange(angles):
        for arfl in d_fl2code16.values():
            serfl = mapcode16(arfl, ar162stress)
            r1,r2 = find_rainflow_cycles(serfl)
            res += mysum(r1)
    return res

In [69]:
iterflights(angles,ar162stress,d_fl2code16)

160166.90226313518

In [71]:
%timeit iterflights(angles,ar162stress,d_fl2code16)

25.6 ms ± 893 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [62]:
@jit(nopython=True, fastmath=False, parallel=True)
def iterflightsar(angles,ar162stress,arfls):
    flights = arfls.shape[0]
    res = 0.0
    for angle in prange(angles):
        for fl in prange(flights):
            arfl = arfls[fl]
            serfl = mapcode16(arfl, ar162stress)
            r1,r2 = find_rainflow_cycles(serfl)
            res += mysum(r1)
    return res

In [63]:
%time iterflightsar(angles,ar162stress,fldefar)

CPU times: user 942 ms, sys: 7.85 ms, total: 950 ms
Wall time: 892 ms


161285.1149598107

In [59]:
%timeit iterflightsar(angles,ar162stress,fldefar)

58.3 ms ± 32.2 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [35]:
@guvectorize([(float32[:], float32[:])], "(n) -> ()", target="parallel", nopython=True)
def genseq(x, output) :
    d_fl2code16 = createfldictjit()
    output[0] = iterflights(angles,ar162stress,d_fl2code16)

In [36]:
%timeit genseq(data)

193 ms ± 4.21 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [37]:
genseq(data)

array([157710.23, 158461.98, 160061.4 , 160075.12, 165105.45, 160696.8 ,
       163772.6 , 157017.72, 161864.03, 163102.23], dtype=float32)

## flightdef array

In [38]:
@jit(nopython=True, fastmath=False, parallel=True)
def iterflights2(angles,ardig16,arfls):
    flights = arfls.shape[0]
    dam = 0.0
    for angle in prange(angles):
        for fl in prange(flights):
            arfl = arfls[fl]
            serfl = mapcode16(arfl,ardig16)
            r1,r2 = find_rainflow_cycles(serfl)
            dam += mysum(r1)
    return dam

In [39]:
@jit(nopython=True,parallel=True)
def genseq2(arst,arfls,angles):
    elms = arst.shape[0]
    lendig16 = arst.shape[1]
    flights = arfls.shape[0]
    #events = arfls.shape[1]
    res = []
    for elm in prange(elms):
        #dam = 0.0
        ardig16 = arst[elm]
        dam = iterflights2(angles,ardig16,arfls)
        res.append(dam)
    return res

In [40]:
%timeit genseq2(data,fldefar,angles)

216 ms ± 12.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [41]:
genseq2(data,fldefar,angles)

[163269.2997799837,
 164688.97303545562,
 163131.5433344175,
 165067.22486244747,
 162299.466434856,
 159950.58050184074,
 165165.1988693088,
 164902.98677915707,
 162554.23110819538,
 163494.25863193872]

## flightdef guvec

In [42]:
@guvectorize([(float32[:,:], int32[:,:], float32[:])], "(elms,dig16),(fls,sqc) -> (elms)", target="parallel", nopython=True)
def genseq3(arst,arfls, output):
    elms = arst.shape[0]
    ardig16 = arst.shape[0]
    #flights = arfls.shape[0]
    sqc = arfls.shape[0]
    for elm in range(elms):
        ardig16 = arst[elm]
        dam = iterflights2(10,ardig16,arfls)
        output[elm] = dam

In [43]:
%timeit genseq3(data,fldefar)

263 ms ± 14.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [44]:
genseq3(data,fldefar)

array([163131.55, 159950.58, 164902.98, 165165.2 , 162554.23, 163494.27,
       163269.3 , 162299.47, 164688.97, 165067.22], dtype=float32)