Skip to content

Commit

Permalink
Improving performance after working with google-prof, to profile CPU.…
Browse files Browse the repository at this point in the history
… Calling OMP_MP_NUM_PROCESSORS less often, since it's quite expensive according to profiling. Adjusting Timeline final_cache to match the video caching thread max_frames, so one doesn't clobber the other. Also, fixing an issue with openshot-player, where a video file with no audio skips horribly.
  • Loading branch information
jonoomph committed Feb 4, 2021
1 parent f7472d5 commit 0c4e1bc
Show file tree
Hide file tree
Showing 9 changed files with 62 additions and 35 deletions.
7 changes: 6 additions & 1 deletion examples/qt-demo/main.cpp
Expand Up @@ -29,11 +29,16 @@
*/

#include "Qt/PlayerDemo.h"

#include "ZmqLogger.h"
#include <QApplication>

int main(int argc, char *argv[])
{
// Enable logging for openshot-player since this is primarily used for
// profiling and debugging video playback issues.
openshot::ZmqLogger::Instance()->Enable(true);
openshot::ZmqLogger::Instance()->Path("./player.log");

QApplication app(argc, argv);
PlayerDemo demo;
demo.show();
Expand Down
4 changes: 2 additions & 2 deletions src/CMakeLists.txt
Expand Up @@ -35,8 +35,8 @@ if (POLICY CMP0057)
endif()

############### PROFILING #################
#set(PROFILER "/usr/lib//usr/lib/libprofiler.so.0.4.5")
#set(PROFILER "/usr/lib/libtcmalloc.so.4")
#set(PROFILER "/usr/lib/x86_64-linux-gnu/libprofiler.so.0")
#set(PROFILER "/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4")

if(CMAKE_VERSION VERSION_LESS 3.3)
# IWYU wasn't supported internally in 3.2
Expand Down
24 changes: 12 additions & 12 deletions src/FFmpegReader.cpp
Expand Up @@ -93,11 +93,11 @@ FFmpegReader::FFmpegReader(const std::string& path, bool inspect_reader)
check_fps(false), enable_seek(true), is_open(false), seek_audio_frame_found(0), seek_video_frame_found(0),
prev_samples(0), prev_pts(0), pts_total(0), pts_counter(0), is_duration_known(false), largest_frame_processed(0),
current_video_frame(0), has_missing_frames(false), num_packets_since_video_frame(0), num_checks_since_final(0),
packet(NULL) {
packet(NULL), max_concurrent_frames(OPEN_MP_NUM_PROCESSORS) {

// Configure OpenMP parallelism
// Default number of threads per section
omp_set_num_threads(OPEN_MP_NUM_PROCESSORS);
omp_set_num_threads(max_concurrent_frames);
// Allow nested parallel sections as deeply as supported
omp_set_max_active_levels(OPEN_MP_MAX_ACTIVE);

Expand All @@ -106,9 +106,9 @@ FFmpegReader::FFmpegReader(const std::string& path, bool inspect_reader)
AVCODEC_REGISTER_ALL

// Init cache
working_cache.SetMaxBytesFromInfo(OPEN_MP_NUM_PROCESSORS * info.fps.ToDouble() * 2, info.width, info.height, info.sample_rate, info.channels);
missing_frames.SetMaxBytesFromInfo(OPEN_MP_NUM_PROCESSORS * 2, info.width, info.height, info.sample_rate, info.channels);
final_cache.SetMaxBytesFromInfo(OPEN_MP_NUM_PROCESSORS * 2, info.width, info.height, info.sample_rate, info.channels);
working_cache.SetMaxBytesFromInfo(max_concurrent_frames * info.fps.ToDouble() * 2, info.width, info.height, info.sample_rate, info.channels);
missing_frames.SetMaxBytesFromInfo(max_concurrent_frames * 2, info.width, info.height, info.sample_rate, info.channels);
final_cache.SetMaxBytesFromInfo(max_concurrent_frames * 2, info.width, info.height, info.sample_rate, info.channels);

// Open and Close the reader, to populate its attributes (such as height, width, etc...)
if (inspect_reader) {
Expand Down Expand Up @@ -561,9 +561,9 @@ void FFmpegReader::Open() {
previous_packet_location.sample_start = 0;

// Adjust cache size based on size of frame and audio
working_cache.SetMaxBytesFromInfo(OPEN_MP_NUM_PROCESSORS * info.fps.ToDouble() * 2, info.width, info.height, info.sample_rate, info.channels);
missing_frames.SetMaxBytesFromInfo(OPEN_MP_NUM_PROCESSORS * 2, info.width, info.height, info.sample_rate, info.channels);
final_cache.SetMaxBytesFromInfo(OPEN_MP_NUM_PROCESSORS * 2, info.width, info.height, info.sample_rate, info.channels);
working_cache.SetMaxBytesFromInfo(max_concurrent_frames * info.fps.ToDouble() * 2, info.width, info.height, info.sample_rate, info.channels);
missing_frames.SetMaxBytesFromInfo(max_concurrent_frames * 2, info.width, info.height, info.sample_rate, info.channels);
final_cache.SetMaxBytesFromInfo(max_concurrent_frames * 2, info.width, info.height, info.sample_rate, info.channels);

// Mark as "open"
is_open = true;
Expand Down Expand Up @@ -902,11 +902,11 @@ std::shared_ptr<Frame> FFmpegReader::ReadStream(int64_t requested_frame) {

// Minimum number of packets to process (for performance reasons)
int packets_processed = 0;
int minimum_packets = OPEN_MP_NUM_PROCESSORS;
int minimum_packets = max_concurrent_frames;
int max_packets = 4096;

// Debug output
ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ReadStream", "requested_frame", requested_frame, "OPEN_MP_NUM_PROCESSORS", OPEN_MP_NUM_PROCESSORS);
ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ReadStream", "requested_frame", requested_frame, "max_concurrent_frames", max_concurrent_frames);

#pragma omp parallel
{
Expand Down Expand Up @@ -1763,7 +1763,7 @@ void FFmpegReader::Seek(int64_t requested_frame) {
seek_count++;

// If seeking near frame 1, we need to close and re-open the file (this is more reliable than seeking)
int buffer_amount = std::max(OPEN_MP_NUM_PROCESSORS, 8);
int buffer_amount = std::max(max_concurrent_frames, 8);
if (requested_frame - buffer_amount < 20) {
// Close and re-open file (basically seeking to frame 1)
Close();
Expand Down Expand Up @@ -2208,7 +2208,7 @@ void FFmpegReader::CheckWorkingFrames(bool end_of_stream, int64_t requested_fram
break;

// Remove frames which are too old
if (f && f->number < (requested_frame - (OPEN_MP_NUM_PROCESSORS * 2))) {
if (f && f->number < (requested_frame - (max_concurrent_frames * 2))) {
working_cache.Remove(f->number);
}

Expand Down
1 change: 1 addition & 0 deletions src/FFmpegReader.h
Expand Up @@ -108,6 +108,7 @@ namespace openshot {
bool check_interlace;
bool check_fps;
bool has_missing_frames;
int max_concurrent_frames;

CacheMemory working_cache;
CacheMemory missing_frames;
Expand Down
8 changes: 4 additions & 4 deletions src/Qt/VideoCacheThread.cpp
Expand Up @@ -40,7 +40,7 @@ namespace openshot
// Constructor
VideoCacheThread::VideoCacheThread()
: Thread("video-cache"), speed(1), is_playing(false), position(1)
, reader(NULL), max_frames(std::min(OPEN_MP_NUM_PROCESSORS * 8, 64)), current_display_frame(1)
, reader(NULL), max_concurrent_frames(OPEN_MP_NUM_PROCESSORS * 4), current_display_frame(1)
{
}

Expand Down Expand Up @@ -98,13 +98,13 @@ namespace openshot
// Cache frames up to the max frames. Reset to current position
// if cache gets too far away from display frame. Cache frames
// even when player is paused (i.e. speed 0).
while (((position - current_display_frame) < max_frames) && is_playing)
while (((position - current_display_frame) < max_concurrent_frames) && is_playing)
{
// Only cache up till the max_frames amount... then sleep
// Only cache up till the max_concurrent_frames amount... then sleep
try
{
if (reader) {
ZmqLogger::Instance()->AppendDebugMethod("VideoCacheThread::run (cache frame)", "position", position, "current_display_frame", current_display_frame, "max_frames", max_frames, "needed_frames", (position - current_display_frame));
ZmqLogger::Instance()->AppendDebugMethod("VideoCacheThread::run (cache frame)", "position", position, "current_display_frame", current_display_frame, "max_concurrent_frames", max_concurrent_frames, "needed_frames", (position - current_display_frame));

// Force the frame to be generated
if (reader->GetCache()->GetSmallestFrame()) {
Expand Down
2 changes: 1 addition & 1 deletion src/Qt/VideoCacheThread.h
Expand Up @@ -54,7 +54,7 @@ namespace openshot
bool is_playing;
int64_t current_display_frame;
ReaderBase *reader;
int max_frames;
int max_concurrent_frames;

/// Constructor
VideoCacheThread();
Expand Down
18 changes: 17 additions & 1 deletion src/QtPlayer.cpp
Expand Up @@ -90,7 +90,23 @@ namespace openshot
FFmpegReader *ffreader = new FFmpegReader(source);
ffreader->DisplayInfo();

reader = new Timeline(ffreader->info.width, ffreader->info.height, ffreader->info.fps, ffreader->info.sample_rate, ffreader->info.channels, ffreader->info.channel_layout);
// Use default sample rate (or use the FFmpegReader's audio settings if any)
int sample_rate = 44100;
if (ffreader->info.sample_rate > 0)
sample_rate = ffreader->info.sample_rate;

// Use default channels (or use the FFmpegReader's audio settings if any)
int channels = 2;
if (ffreader->info.channels > 0)
channels = ffreader->info.channels;

// Use default channel layout (or use the FFmpegReader's audio settings if any)
openshot::ChannelLayout channel_layout = openshot::LAYOUT_STEREO;
if (channels != 2)
channel_layout = ffreader->info.channel_layout;

// Create timeline instance (720p, since we have no re-scaling in this player yet)
reader = new Timeline(1280, 720, ffreader->info.fps, sample_rate, channels, channel_layout);
Clip *c = new Clip(source);

Timeline* tm = (Timeline*)reader;
Expand Down
32 changes: 18 additions & 14 deletions src/Timeline.cpp
Expand Up @@ -35,7 +35,8 @@ using namespace openshot;

// Default Constructor for the timeline (which sets the canvas width and height)
Timeline::Timeline(int width, int height, Fraction fps, int sample_rate, int channels, ChannelLayout channel_layout) :
is_open(false), auto_map_clips(true), managed_cache(true), path("")
is_open(false), auto_map_clips(true), managed_cache(true), path(""),
max_concurrent_frames(OPEN_MP_NUM_PROCESSORS)
{
// Create CrashHandler and Attach (incase of errors)
CrashHandler::Instance();
Expand Down Expand Up @@ -70,23 +71,23 @@ Timeline::Timeline(int width, int height, Fraction fps, int sample_rate, int cha
info.acodec = "openshot::timeline";
info.vcodec = "openshot::timeline";

// Init cache
final_cache = new CacheMemory();

// Configure OpenMP parallelism
// Default number of threads per block
omp_set_num_threads(OPEN_MP_NUM_PROCESSORS);
omp_set_num_threads(max_concurrent_frames);
// Allow nested parallel sections as deeply as supported
omp_set_max_active_levels(OPEN_MP_MAX_ACTIVE);

// Init max image size
SetMaxSize(info.width, info.height);

// Init cache
final_cache = new CacheMemory();
final_cache->SetMaxBytesFromInfo(OPEN_MP_NUM_PROCESSORS * 2, info.width, info.height, info.sample_rate, info.channels);
}

// Constructor for the timeline (which loads a JSON structure from a file path, and initializes a timeline)
Timeline::Timeline(const std::string& projectPath, bool convert_absolute_paths) :
is_open(false), auto_map_clips(true), managed_cache(true), path(projectPath) {
is_open(false), auto_map_clips(true), managed_cache(true), path(projectPath),
max_concurrent_frames(OPEN_MP_NUM_PROCESSORS) {

// Create CrashHandler and Attach (incase of errors)
CrashHandler::Instance();
Expand Down Expand Up @@ -203,18 +204,17 @@ Timeline::Timeline(const std::string& projectPath, bool convert_absolute_paths)
info.has_video = true;
info.has_audio = true;

// Init cache
final_cache = new CacheMemory();

// Configure OpenMP parallelism
// Default number of threads per section
omp_set_num_threads(OPEN_MP_NUM_PROCESSORS);
omp_set_num_threads(max_concurrent_frames);
// Allow nested parallel sections as deeply as supported
omp_set_max_active_levels(OPEN_MP_MAX_ACTIVE);

// Init max image size
SetMaxSize(info.width, info.height);

// Init cache
final_cache = new CacheMemory();
final_cache->SetMaxBytesFromInfo(OPEN_MP_NUM_PROCESSORS * 2, info.width, info.height, info.sample_rate, info.channels);
}

Timeline::~Timeline() {
Expand Down Expand Up @@ -473,6 +473,7 @@ std::shared_ptr<Frame> Timeline::GetOrCreateFrame(Clip* clip, int64_t number)
new_frame = std::make_shared<Frame>(number, preview_width, preview_height, "#000000", samples_in_frame, info.channels);
#pragma omp critical (T_GetOtCreateFrame)
{
new_frame->AddAudioSilence(samples_in_frame);
new_frame->SampleRate(info.sample_rate);
new_frame->ChannelsLayout(info.channel_layout);
}
Expand Down Expand Up @@ -725,7 +726,7 @@ std::shared_ptr<Frame> Timeline::GetFrame(int64_t requested_frame)
}

// Minimum number of frames to process (for performance reasons)
int minimum_frames = OPEN_MP_NUM_PROCESSORS;
int minimum_frames = max_concurrent_frames;

// Get a list of clips that intersect with the requested section of timeline
// This also opens the readers for intersecting clips, and marks non-intersecting clips as 'needs closing'
Expand All @@ -734,7 +735,7 @@ std::shared_ptr<Frame> Timeline::GetFrame(int64_t requested_frame)
nearby_clips = find_intersecting_clips(requested_frame, minimum_frames, true);

// Debug output
ZmqLogger::Instance()->AppendDebugMethod("Timeline::GetFrame", "requested_frame", requested_frame, "minimum_frames", minimum_frames, "OPEN_MP_NUM_PROCESSORS", OPEN_MP_NUM_PROCESSORS);
ZmqLogger::Instance()->AppendDebugMethod("Timeline::GetFrame", "requested_frame", requested_frame, "minimum_frames", minimum_frames, "max_concurrent_frames", max_concurrent_frames);

// GENERATE CACHE FOR CLIPS (IN FRAME # SEQUENCE)
// Determine all clip frames, and request them in order (to keep resampled audio in sequence)
Expand Down Expand Up @@ -1495,4 +1496,7 @@ void Timeline::SetMaxSize(int width, int height) {
// Update preview settings
preview_width = display_ratio_size.width();
preview_height = display_ratio_size.height();

// Update timeline cache size
final_cache->SetMaxBytesFromInfo(max_concurrent_frames * 4, preview_width, preview_height, info.sample_rate, info.channels);
}
1 change: 1 addition & 0 deletions src/Timeline.h
Expand Up @@ -177,6 +177,7 @@ namespace openshot {
bool managed_cache; ///< Does this timeline instance manage the cache object
std::string path; ///< Optional path of loaded UTF-8 OpenShot JSON project file
std::mutex get_frame_mutex; ///< Mutex to protect GetFrame method from different threads calling it
int max_concurrent_frames; ///< Max concurrent frames to process at one time

/// Process a new layer of video or audio
void add_layer(std::shared_ptr<openshot::Frame> new_frame, openshot::Clip* source_clip, int64_t clip_frame_number, int64_t timeline_frame_number, bool is_top_clip, float max_volume);
Expand Down

0 comments on commit 0c4e1bc

Please sign in to comment.