Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Render on demand #2336

Closed
wants to merge 15 commits into from
6 changes: 4 additions & 2 deletions GLMakie/src/GLAbstraction/AbstractGPUArray.jl
Expand Up @@ -193,8 +193,10 @@ max_dim(t) = error("max_dim not implemented for: $(typeof(t)). This happen

function (::Type{T})(x::Observable; kw...) where T <: GPUArray
gpu_mem = T(x[]; kw...)
on(x-> update!(gpu_mem, x), x)
gpu_mem
# TODO merge these and handle update tracking during contruction
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we have this todo, and not just:

  on(data) do new_data
      update!(gpu_mem, new_data)
      gpu_mem.requires_update[] = true
  end

(I already have this locally, will push some changes like this later)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I separated them so the update tracking could be removed again when using fps_renderloop

map!(_ -> true, gpu_mem.requires_update, x)
on(x -> update!(gpu_mem, x), x)
return gpu_mem
end

const BaseSerializer = Serialization.AbstractSerializer
Expand Down
4 changes: 3 additions & 1 deletion GLMakie/src/GLAbstraction/GLBuffer.jl
Expand Up @@ -4,6 +4,8 @@ mutable struct GLBuffer{T} <: GPUArray{T, 1}
buffertype ::GLenum
usage ::GLenum
context ::GLContext
# TODO maybe also delay upload to when render happens?
requires_update::Observable{Bool}

function GLBuffer{T}(ptr::Ptr{T}, buff_length::Int, buffertype::GLenum, usage::GLenum) where T
id = glGenBuffers()
Expand All @@ -13,7 +15,7 @@ mutable struct GLBuffer{T} <: GPUArray{T, 1}
glBufferData(buffertype, buff_length * sizeof(T), ptr, usage)
glBindBuffer(buffertype, 0)

obj = new(id, (buff_length,), buffertype, usage, current_context())
obj = new(id, (buff_length,), buffertype, usage, current_context(), Observable(true))
finalizer(free, obj)
obj
end
Expand Down
6 changes: 4 additions & 2 deletions GLMakie/src/GLAbstraction/GLRender.jl
Expand Up @@ -15,7 +15,7 @@ function render(list::Vector{RenderObject{Pre}}) where Pre
glUseProgram(program.id)
bind(vertexarray)
for renderobject in list
Bool(to_value(renderobject.uniforms[:visible])) || continue # skip invisible
renderobject.visible || continue # skip invisible
# make sure we only bind new programs and vertexarray when it is actually
# different from the previous one
if renderobject.vertexarray != vertexarray
Expand Down Expand Up @@ -55,7 +55,9 @@ So rewriting this function could get us a lot of performance for scenes with
a lot of objects.
"""
function render(renderobject::RenderObject, vertexarray=renderobject.vertexarray)
if Bool(to_value(renderobject.uniforms[:visible]))
if renderobject.visible
renderobject.requires_update = false

renderobject.prerenderfunction()
program = vertexarray.program
glUseProgram(program.id)
Expand Down
10 changes: 9 additions & 1 deletion GLMakie/src/GLAbstraction/GLTexture.jl
Expand Up @@ -17,6 +17,7 @@ mutable struct Texture{T <: GLArrayEltypes, NDIM} <: OpenglTexture{T, NDIM}
parameters ::TextureParameters{NDIM}
size ::NTuple{NDIM, Int}
context ::GLContext
requires_update ::Observable{Bool}
function Texture{T, NDIM}(
id ::GLuint,
texturetype ::GLenum,
Expand All @@ -34,7 +35,8 @@ mutable struct Texture{T <: GLArrayEltypes, NDIM} <: OpenglTexture{T, NDIM}
format,
parameters,
size,
current_context()
current_context(),
Observable(true)
)
finalizer(free, tex)
tex
Expand All @@ -45,6 +47,12 @@ end
mutable struct TextureBuffer{T <: GLArrayEltypes} <: OpenglTexture{T, 1}
texture::Texture{T, 1}
buffer::GLBuffer{T}
requires_update::Observable{Bool}

function TextureBuffer(texture::Texture{T, 1}, buffer::GLBuffer{T}) where T
x = map((_, _) -> true, buffer.requires_update, texture.requires_update)
new{T}(texture, buffer, x)
end
end
Base.size(t::TextureBuffer) = size(t.buffer)
Base.size(t::TextureBuffer, i::Integer) = size(t.buffer, i)
Expand Down
80 changes: 75 additions & 5 deletions GLMakie/src/GLAbstraction/GLTypes.jl
Expand Up @@ -174,9 +174,17 @@ mutable struct GLVertexArray{T}
buffers::Dict{String,GLBuffer}
indices::T
context::GLContext
requires_update::Observable{Bool}

function GLVertexArray{T}(program, id, bufferlength, buffers, indices) where T
new(program, id, bufferlength, buffers, indices, current_context())
va = new(program, id, bufferlength, buffers, indices, current_context(), true)
for (name, buffer) in buffers
on(buffer.requires_update) do _ # only triggers true anyway
va.requires_update[] = true
end
end

return va
end
end

Expand Down Expand Up @@ -297,11 +305,15 @@ mutable struct RenderObject{Pre}
prerenderfunction::Pre
postrenderfunction
id::UInt32
requires_update::Bool
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should consistently use the same type for requires_update, and I don't think we need an observable, so maybe use Base.RefValue consistently instead?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see, some do need to be an observable..So maybe use Observable consistently?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For GPUArraywe either need an observable to forward to the information to the renderobject or we need to search through a bunch of stuff every potential frame. Or we need to make their updates aware of the renderobject but that might be a lot of work?

Otherwise I went with Bool since the structs are already mutable. I think that's a bit faster than a Refvalue in a mutable struct too...

visible::Bool

function RenderObject{Pre}(
context,
uniforms::Dict{Symbol,Any}, observables::Vector{Observable},
vertexarray::GLVertexArray,
prerenderfunctions, postrenderfunctions
prerenderfunctions, postrenderfunctions,
track_updates = true
) where Pre
fxaa = to_value(pop!(uniforms, :fxaa, true))
RENDER_OBJECT_ID_COUNTER[] += one(UInt32)
Expand All @@ -311,12 +323,64 @@ mutable struct RenderObject{Pre}
# But with this implementation, the fxaa flag can't be changed,
# and since this is a UUID, it shouldn't matter
id = pack_bool(RENDER_OBJECT_ID_COUNTER[], fxaa)
new(
visible = pop!(uniforms, :visible, Observable(true))

robj = new(
context,
uniforms, observables, vertexarray,
prerenderfunctions, postrenderfunctions,
id
id, true, visible[]
)

if track_updates
# visible changes should always trigger updates so that plots
# actually become invisible when visible is changed.
# Other uniforms and buffers don't need to trigger updates when
# visible = false
on(visible) do visible
robj.visible = visible
robj.requires_update = true
end

function request_update(_::Any)
if robj.visible
robj.requires_update = true
end
return
end

# gather update requests for polling in renderloop
for uniform in values(uniforms)
if uniform isa Observable
on(request_update, uniform)
elseif uniform isa GPUArray
on(request_update, uniform.requires_update)
end
end
on(request_update, vertexarray.requires_update)
else
on(visible) do visible
robj.visible = visible
end

# remove tracking from GPUArrays
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure why that's needed, could you elaborate the comment?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ugh, classic case of not seing the whole picture in the github diff...sorry for the noise

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, it feels cleaner to me, to just let the GPUArrays track their updates, which should be very cheap, but then not connect them further if we don't track updates...

Copy link
Collaborator Author

@ffreyer ffreyer Oct 21, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

on(_ -> robj.requires_update = true, vertexarray.requires_update)

This? If all the buffers are also in uniforms it shouldn't be necessary. I added that early on when I was searching for things that need to be tracked

# remove tracking from GPUArrays

The way I added tracking it gets intialized for every renderloop. That can be a lot of extra useless callbacks and from my testing it can be noticeable (like 5-10% slower with fps_renderloop in what should be a bad case). I tried to restore performance again by removing tracking when it's not needed. That's what the code under the comment does.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I actually removed all the tracking in one go so I'm not sure how much the GPUArrays contribute. The test case I used also didn't trigger updates there, but in simple observables since that should have a higher ratio of tracking cost / total update cost and I wanted to know how bad it can get.

I was planning to test having GPUArray updates wait for frames in the future, which would probably require something like requires_update too. So maybe this will end up as always on/tracking later anyway...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

using BenchmarkTools, GLMakie

scene = Scene()
for _ in 1:100
    heatmap!(scene, rand(16, 16))
end
display(GLMakie.Screen(render_on_demand = false), scene)

function update!(scene)
    for p in scene.plots
        notify(p[3])
    end
end

@benchmark update!($scene)

With cleanup

BenchmarkTools.Trial: 9326 samples with 1 evaluation.
 Range (min … max):  381.502 μs …  32.987 ms  ┊ GC (min … max): 0.00% … 98.11%
 Time  (median):     502.491 μs               ┊ GC (median):    0.00%
 Time  (mean ± σ):   532.654 μs ± 812.194 μs  ┊ GC (mean ± σ):  3.84% ±  2.49%

      ▁           ▃▄▇█▄▃▁▁▁▂▁▁                                   
  ▁▁▅▄█▅▄▄▄▃▄▄▄▄▄▆█████████████▇▇▇▆▇▆▆▅▅▅▅▅▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▂▁▁▁ ▄
  382 μs           Histogram: frequency by time          694 μs <

 Memory estimate: 121.88 KiB, allocs estimate: 900.

Without cleanup (the else branch commented out)

BenchmarkTools.Trial: 4238 samples with 1 evaluation.
 Range (min … max):  803.314 μs …  34.882 ms  ┊ GC (min … max): 0.00% … 96.12%
 Time  (median):       1.153 ms               ┊ GC (median):    0.00%
 Time  (mean ± σ):     1.174 ms ± 745.947 μs  ┊ GC (mean ± σ):  1.33% ±  2.08%

          ▂▃▅▅▅█▃▃▃▄▃▃▃▃▄▄▄▃▃▄▃▃▇▃▅▇▄▃▄▄▃▃▁▂                     
  ▁▄▄▆▄▅▅▃███████████████████████████████████▆▇▆▅▅▄▄▃▄▄▃▂▂▃▃▂▂▂ ▅
  803 μs           Histogram: frequency by time          1.6 ms <

 Memory estimate: 128.12 KiB, allocs estimate: 1300.

Without cleanup & single observer function

BenchmarkTools.Trial: 4747 samples with 1 evaluation.
 Range (min … max):  757.975 μs …  31.849 ms  ┊ GC (min … max): 0.00% … 96.00%
 Time  (median):       1.006 ms               ┊ GC (median):    0.00%
 Time  (mean ± σ):     1.048 ms ± 783.416 μs  ┊ GC (mean ± σ):  1.84% ±  2.42%

          ▃▄▇▅▅█▅▅▅▂▃ ▃▁▁▁▁ ▁▁▂▁▂▁▁  ▁                           
  ▅▃▆▅▄▆▅▆████████████████████████████████▆▆▆▄▄▅▆▄▄▄▃▃▃▃▂▂▂▂▁▂▂ ▅
  758 μs           Histogram: frequency by time         1.43 ms <

 Memory estimate: 128.12 KiB, allocs estimate: 1300.

That's much more than I thought it would be...

for uniform in values(uniforms)
if uniform isa GPUArray
foreach(off, uniform.requires_update.inputs)
empty!(uniform.requires_update.inputs)
end
end
for buffer in vertexarray.buffers
if buffer isa GPUArray
foreach(off, buffer.requires_update.inputs)
empty!(buffer.requires_update.inputs)
end
end
foreach(off, vertexarray.requires_update.inputs)
empty!(vertexarray.requires_update.inputs)
end

return robj
end
end

Expand All @@ -328,6 +392,11 @@ function RenderObject(

switch_context!(context)

# This is a lazy workaround for disabling updates of `requires_update` when
# not rendering on demand. A cleaner implementation should probably go
# through @gen_defaults! and adjust constructors instead.
track_updates = to_value(pop!(data, :track_updates, true))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@gen_defaults is pretty much deprecated (without a new implementation yet, but I think it's pretty clear that it's pretty redundant), so I guess that's fair ;)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The way it works atm is that GPUArrays always attach the update trackers when they get created. When the renderobject is created it either removes the tracking (i.e. observer functions) or connects the remaining uniforms based on what would usually be interpreted as a uniform. Seems quite hacky to me 🤷

A clean implementation wouldn't connect (and maybe also not define) the tracking observables in GPUArrays. I think with how things are organized atm @gen_defaults would need to be adjusted to forward that information. But I don't really want to mess with that macro. I'd probably just break it

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant, @gen_defaults needs to be refactored / removed going forward, so whenever we do that, we can clean up the implementation of track_updates ;)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In other words: please don't waste a second on @gen_defaults ;)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok I think the pr is ready then


targets = get(data, :gl_convert_targets, Dict())
delete!(data, :gl_convert_targets)
passthrough = Dict{Symbol,Any}() # we also save a few non opengl related values in data
Expand Down Expand Up @@ -359,7 +428,7 @@ function RenderObject(
end
buffers = filter(((key, value),) -> isa(value, GLBuffer) || key == :indices, data)
uniforms = filter(((key, value),) -> !isa(value, GLBuffer) && key != :indices, data)
get!(data, :visible, true) # make sure, visibility is set
get!(data, :visible, Observable(true)) # make sure this exists
merge!(data, passthrough) # in the end, we insert back the non opengl data, to keep things simple
p = gl_convert(to_value(program), data) # "compile" lazyshader
vertexarray = GLVertexArray(Dict(buffers), p)
Expand All @@ -370,6 +439,7 @@ function RenderObject(
vertexarray,
pre,
post,
track_updates
)
# automatically integrate object ID, will be discarded if shader doesn't use it
robj[:objectid] = robj.id
Expand Down
7 changes: 4 additions & 3 deletions GLMakie/src/drawing_primitives.jl
Expand Up @@ -83,9 +83,9 @@ function cached_robj!(robj_func, screen, scene, x::AbstractPlot)
robj = get!(screen.cache, objectid(x)) do
filtered = filter(x.attributes) do (k, v)
!in(k, (
:transformation, :tickranges, :ticklabels, :raw, :SSAO,
:lightposition, :material,
:inspector_label, :inspector_hover, :inspector_clear
:transformation, :tickranges, :ticklabels, :raw, :SSAO,
:lightposition, :material,
:inspector_label, :inspector_hover, :inspector_clear, :inspectable
))
end

Expand All @@ -105,6 +105,7 @@ function cached_robj!(robj_func, screen, scene, x::AbstractPlot)
if !isnothing(ambientlight)
gl_attributes[:ambient] = ambientlight.color
end
gl_attributes[:track_updates] = screen.config.render_on_demand

robj = robj_func(gl_attributes)

Expand Down
38 changes: 37 additions & 1 deletion GLMakie/src/screen.jl
Expand Up @@ -17,6 +17,7 @@ function renderloop end

* `pause_renderloop = false`: creates a screen with paused renderlooop. Can be started with `GLMakie.start_renderloop!(screen)` or paused again with `GLMakie.pause_renderloop!(screen)`.
* `vsync = false`: enables vsync for the window.
* `render_on_demand = true`: renders the scene only if something has changed in it.
* `framerate = 30.0`: sets the currently rendered frames per second.

## GLFW window attributes
Expand All @@ -40,6 +41,7 @@ mutable struct ScreenConfig
renderloop::Function
pause_renderloop::Bool
vsync::Bool
render_on_demand::Bool
framerate::Float64

# GLFW window attributes
Expand All @@ -62,6 +64,7 @@ mutable struct ScreenConfig
renderloop::Union{Makie.Automatic, Function},
pause_renderloop::Bool,
vsync::Bool,
render_on_demand::Bool,
framerate::Number,
# GLFW window attributes
float::Bool,
Expand All @@ -82,6 +85,7 @@ mutable struct ScreenConfig
renderloop isa Makie.Automatic ? GLMakie.renderloop : renderloop,
pause_renderloop,
vsync,
render_on_demand,
framerate,
# GLFW window attributes
float,
Expand Down Expand Up @@ -674,12 +678,44 @@ function fps_renderloop(screen::Screen)
end
end


function requires_update(screen::Screen)
for (_, _, robj) in screen.renderlist
robj.requires_update && return true
end
return false
end

function on_demand_renderloop(screen::Screen)
while isopen(screen) && !screen.stop_renderloop
t = time_ns()
time_per_frame = 1.0 / screen.config.framerate
pollevents(screen) # GLFW poll

if !screen.config.pause_renderloop && requires_update(screen)
render_frame(screen)
GLFW.SwapBuffers(to_native(screen))
end

t_elapsed = (time_ns() - t) / 1e9
diff = time_per_frame - t_elapsed
if diff > 0.001 # can't sleep less than 0.001
sleep(diff)
else # if we don't sleep, we still need to yield explicitely to other tasks
yield()
end
end
end

function renderloop(screen)
isopen(screen) || error("Screen most be open to run renderloop!")
# Context needs to be current for GLFW.SwapInterval
ShaderAbstractions.switch_context!(screen.glscreen)
try
if screen.config.vsync
if screen.config.render_on_demand
GLFW.SwapInterval(0)
on_demand_renderloop(screen)
elseif screen.config.vsync
GLFW.SwapInterval(1)
vsynced_renderloop(screen)
else
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Expand Up @@ -2,6 +2,8 @@

## master

- Add "render_on_demand" flag for GLMakie. Setting this to `true` will skip rendering until plots get updated. [#2336](https://github.com/MakieOrg/Makie.jl/pull/2336)

## v0.18.1

- fix heatmap interpolation [#2343](https://github.com/MakieOrg/Makie.jl/pull/2343).
Expand Down
1 change: 1 addition & 0 deletions src/theming.jl
Expand Up @@ -106,6 +106,7 @@ const minimal_default = Attributes(
renderloop = automatic,
pause_renderloop = false,
vsync = false,
render_on_demand = true,
framerate = 30.0,

# GLFW window attributes
Expand Down