In [11]:
# Set some notebook defaults
ENV["COLUMNS"] = 1000; ENV["LINES"] = 20;

# Display information about the environment
VERSION, Base.Threads.nthreads(), Base.Sys.CPU_THREADS

(v"1.5.3", 8, 8)

In [12]:
using Random, Distributions, BenchmarkTools, Tullio, LoopVectorization, DataFrames, StaticKernels, SantasLittleHelpers

In [13]:
N = 10^8
d = Normal()
df = DataFrame(:data => rand(d, N))

Unnamed: 0_level_0,data
Unnamed: 0_level_1,Float64
1,-1.61589
2,0.958113
3,0.783623
4,0.0208487
5,0.0630693
6,-0.664731
7,-0.190103
8,-0.503945
9,-0.014282
10,0.924778


In [14]:
# Calculate sma using StaticKernels and SantasLittleHelpers
k = makekernel(mean,-9:0)
df[!, "sma_sk"] = applyrolling(k, df.data)
df

Unnamed: 0_level_0,data,sma_sk
Unnamed: 0_level_1,Float64,Float64
1,-1.61589,-1.61589
2,0.958113,-0.328887
3,0.783623,0.0419497
4,0.0208487,0.0366745
5,0.0630693,0.0419534
6,-0.664731,-0.0758274
7,-0.190103,-0.0921525
8,-0.503945,-0.143627
9,-0.014282,-0.129255
10,0.924778,-0.0238516


In [15]:
tmp = Array{Union{Missing, Float64}}(missing, length(df.data));

function f_tullio(data, sma)
    out_view = @view sma[10:end]
    @tullio out_view[i] = 0.1*data[i+o]  o in 0:9  # verbose=true;
    sma
end

df[!, "sma_tullio"] = f_tullio(df.data, tmp)
df

Unnamed: 0_level_0,data,sma_sk,sma_tullio
Unnamed: 0_level_1,Float64,Float64,Float64?
1,-1.61589,-1.61589,missing
2,0.958113,-0.328887,missing
3,0.783623,0.0419497,missing
4,0.0208487,0.0366745,missing
5,0.0630693,0.0419534,missing
6,-0.664731,-0.0758274,missing
7,-0.190103,-0.0921525,missing
8,-0.503945,-0.143627,missing
9,-0.014282,-0.129255,missing
10,0.924778,-0.0238516,-0.0238516


In [16]:
out = Vector{Union{Missing, Float64}}(undef, N)

function rollingmean2(x, m, out)
    N = length(x)
    out[1:(m-1)] .= missing
    sm = mean(@view x[1:m])
    out[m] = sm
    @inbounds @simd for i in (m+1):length(x)
        sm += (x[i] - x[i-m]) / m
        out[i] = sm
    end
    return out 
end

rollingmean2 (generic function with 1 method)

In [17]:
df[!, "sma_simd"] = rollingmean2(df.data, 10, out)
df

Unnamed: 0_level_0,data,sma_sk,sma_tullio,sma_simd
Unnamed: 0_level_1,Float64,Float64,Float64?,Float64?
1,-1.61589,-1.61589,missing,missing
2,0.958113,-0.328887,missing,missing
3,0.783623,0.0419497,missing,missing
4,0.0208487,0.0366745,missing,missing
5,0.0630693,0.0419534,missing,missing
6,-0.664731,-0.0758274,missing,missing
7,-0.190103,-0.0921525,missing,missing
8,-0.503945,-0.143627,missing,missing
9,-0.014282,-0.129255,missing,missing
10,0.924778,-0.0238516,-0.0238516,-0.0238516


In [18]:
@benchmark applyrolling(k, df.data)

BenchmarkTools.Trial: 
  memory estimate:  762.94 MiB
  allocs estimate:  2
  --------------
  minimum time:     1.107 s (0.13% GC)
  median time:      1.170 s (0.12% GC)
  mean time:        1.184 s (3.59% GC)
  maximum time:     1.294 s (9.84% GC)
  --------------
  samples:          5
  evals/sample:     1

In [19]:
@benchmark f_tullio(df.data, tmp)

BenchmarkTools.Trial: 
  memory estimate:  7.58 KiB
  allocs estimate:  114
  --------------
  minimum time:     133.653 ms (0.00% GC)
  median time:      139.918 ms (0.00% GC)
  mean time:        142.657 ms (0.00% GC)
  maximum time:     172.834 ms (0.00% GC)
  --------------
  samples:          36
  evals/sample:     1

In [20]:
@benchmark rollingmean2(df.data, 10, out)

BenchmarkTools.Trial: 
  memory estimate:  0 bytes
  allocs estimate:  0
  --------------
  minimum time:     133.842 ms (0.00% GC)
  median time:      139.875 ms (0.00% GC)
  mean time:        143.018 ms (0.00% GC)
  maximum time:     156.371 ms (0.00% GC)
  --------------
  samples:          36
  evals/sample:     1