diff --git a/3rdparty/ffmpeg b/3rdparty/ffmpeg index 9a2df87789eb..10d0ebc0b8c7 160000 --- a/3rdparty/ffmpeg +++ b/3rdparty/ffmpeg @@ -1 +1 @@ -Subproject commit 9a2df87789ebfecf64d35d732e5847662fbd5520 +Subproject commit 10d0ebc0b8c7c4f0b242c9998c8bdc4e55bb5067 diff --git a/rpcs3/Emu/Audio/AudioBackend.h b/rpcs3/Emu/Audio/AudioBackend.h index d6978ea66dd4..497b4c97e665 100644 --- a/rpcs3/Emu/Audio/AudioBackend.h +++ b/rpcs3/Emu/Audio/AudioBackend.h @@ -221,6 +221,45 @@ class AudioBackend } } + static void downmix(u32 sample_cnt, u32 src_ch_cnt, u32 dst_ch_cnt, const f32* src, f32* dst) + { + if (src_ch_cnt <= dst_ch_cnt) + { + return; + } + + if (src_ch_cnt == static_cast(AudioChannelCnt::SURROUND_7_1)) + { + if (dst_ch_cnt == static_cast(AudioChannelCnt::SURROUND_5_1)) + { + AudioBackend::downmix(sample_cnt, src, dst); + } + else if (dst_ch_cnt == static_cast(AudioChannelCnt::STEREO)) + { + AudioBackend::downmix(sample_cnt, src, dst); + } + else + { + fmt::throw_exception("Invalid downmix combination: %u -> %u", src_ch_cnt, dst_ch_cnt); + } + } + else if (src_ch_cnt == static_cast(AudioChannelCnt::SURROUND_5_1)) + { + if (dst_ch_cnt == static_cast(AudioChannelCnt::STEREO)) + { + AudioBackend::downmix(sample_cnt, src, dst); + } + else + { + fmt::throw_exception("Invalid downmix combination: %u -> %u", src_ch_cnt, dst_ch_cnt); + } + } + else + { + fmt::throw_exception("Invalid downmix combination: %u -> %u", src_ch_cnt, dst_ch_cnt); + } + } + protected: AudioSampleSize m_sample_size = AudioSampleSize::FLOAT; AudioFreq m_sampling_rate = AudioFreq::FREQ_48K; diff --git a/rpcs3/Emu/Audio/audio_resampler.cpp b/rpcs3/Emu/Audio/audio_resampler.cpp index c2d318db40d8..b029172dfaee 100644 --- a/rpcs3/Emu/Audio/audio_resampler.cpp +++ b/rpcs3/Emu/Audio/audio_resampler.cpp @@ -33,8 +33,7 @@ void audio_resampler::put_samples(const f32* buf, u32 sample_cnt) std::pair audio_resampler::get_samples(u32 sample_cnt) { - f32 *const buf = resampler.bufBegin(); - return std::make_pair(buf, resampler.receiveSamples(sample_cnt)); + return std::make_pair(resampler.bufBegin(), resampler.receiveSamples(sample_cnt)); } u32 audio_resampler::samples_available() const diff --git a/rpcs3/Emu/Cell/Modules/cellAudio.cpp b/rpcs3/Emu/Cell/Modules/cellAudio.cpp index c46b09b8a5ed..117c21ab71eb 100644 --- a/rpcs3/Emu/Cell/Modules/cellAudio.cpp +++ b/rpcs3/Emu/Cell/Modules/cellAudio.cpp @@ -5,6 +5,7 @@ #include "Emu/Cell/lv2/sys_process.h" #include "Emu/Cell/lv2/sys_event.h" #include "cellAudio.h" +#include "util/video_provider.h" #include @@ -69,7 +70,7 @@ void cell_audio_config::reset(bool backend_changed) const AudioFreq freq = AudioFreq::FREQ_48K; const AudioSampleSize sample_size = raw.convert_to_s16 ? AudioSampleSize::S16 : AudioSampleSize::FLOAT; - const auto [req_ch_cnt, downmix] = AudioBackend::get_channel_count_and_downmixer(0); // CELL_AUDIO_OUT_PRIMARY + const auto& [req_ch_cnt, downmix] = AudioBackend::get_channel_count_and_downmixer(0); // CELL_AUDIO_OUT_PRIMARY f64 cb_frame_len = 0.0; u32 ch_cnt = 2; @@ -276,52 +277,26 @@ void audio_ringbuffer::process_resampled_data() { if (!cfg.time_stretching_enabled) return; - const auto [buffer, samples] = resampler.get_samples(static_cast(cb_ringbuf.get_free_size() / (cfg.audio_sample_size * static_cast(cfg.backend_ch_cnt)))); + const auto& [buffer, samples] = resampler.get_samples(static_cast(cb_ringbuf.get_free_size() / (cfg.audio_sample_size * static_cast(cfg.backend_ch_cnt)))); commit_data(buffer, samples); } void audio_ringbuffer::commit_data(f32* buf, u32 sample_cnt) { - sample_cnt *= cfg.audio_channels; + const u32 sample_cnt_in = sample_cnt * cfg.audio_channels; + const u32 sample_cnt_out = sample_cnt * static_cast(cfg.backend_ch_cnt); // Dump audio if enabled - m_dump.WriteData(buf, sample_cnt * static_cast(AudioSampleSize::FLOAT)); + m_dump.WriteData(buf, sample_cnt_in * static_cast(AudioSampleSize::FLOAT)); - if (cfg.backend_ch_cnt < AudioChannelCnt{cfg.audio_channels}) + // Record audio if enabled + if (utils::video_provider& provider = g_fxo->get(); provider.can_consume_sample()) { - if (AudioChannelCnt{cfg.audio_channels} == AudioChannelCnt::SURROUND_7_1) - { - if (cfg.backend_ch_cnt == AudioChannelCnt::SURROUND_5_1) - { - AudioBackend::downmix(sample_cnt, buf, buf); - } - else if (cfg.backend_ch_cnt == AudioChannelCnt::STEREO) - { - AudioBackend::downmix(sample_cnt, buf, buf); - } - else - { - fmt::throw_exception("Invalid downmix combination: %u -> %u", cfg.audio_channels, static_cast(cfg.backend_ch_cnt)); - } - } - else if (AudioChannelCnt{cfg.audio_channels} == AudioChannelCnt::SURROUND_5_1) - { - if (cfg.backend_ch_cnt == AudioChannelCnt::STEREO) - { - AudioBackend::downmix(sample_cnt, buf, buf); - } - else - { - fmt::throw_exception("Invalid downmix combination: %u -> %u", cfg.audio_channels, static_cast(cfg.backend_ch_cnt)); - } - } - else - { - fmt::throw_exception("Invalid downmix combination: %u -> %u", cfg.audio_channels, static_cast(cfg.backend_ch_cnt)); - } + provider.present_samples(reinterpret_cast(buf), sample_cnt, static_cast(cfg.audio_channels)); } - const u32 sample_cnt_out = sample_cnt / cfg.audio_channels * static_cast(cfg.backend_ch_cnt); + // Downmix if necessary + AudioBackend::downmix(sample_cnt_in, cfg.audio_channels, static_cast(cfg.backend_ch_cnt), buf, buf); if (cfg.backend->get_convert_to_s16()) { diff --git a/rpcs3/Emu/Cell/Modules/cellAudio.h b/rpcs3/Emu/Cell/Modules/cellAudio.h index 0f2bfe7d523d..4225bdbca659 100644 --- a/rpcs3/Emu/Cell/Modules/cellAudio.h +++ b/rpcs3/Emu/Cell/Modules/cellAudio.h @@ -223,8 +223,8 @@ struct cell_audio_config AudioChannelCnt audio_downmix = AudioChannelCnt::SURROUND_7_1; AudioChannelCnt backend_ch_cnt = AudioChannelCnt::SURROUND_7_1; - u32 audio_channels = 0; - u32 audio_sampling_rate = 0; + u32 audio_channels = 2; + u32 audio_sampling_rate = DEFAULT_AUDIO_SAMPLING_RATE; u32 audio_block_period = 0; u32 audio_sample_size = 0; f64 audio_min_buffer_duration = 0.0; diff --git a/rpcs3/Emu/Cell/Modules/cellRec.cpp b/rpcs3/Emu/Cell/Modules/cellRec.cpp index 53165ef6a5dd..c793fbca887f 100644 --- a/rpcs3/Emu/Cell/Modules/cellRec.cpp +++ b/rpcs3/Emu/Cell/Modules/cellRec.cpp @@ -5,6 +5,7 @@ #include "Emu/IdManager.h" #include "Emu/system_config.h" #include "Emu/VFS.h" +#include "Emu/Audio/AudioBackend.h" #include "cellRec.h" #include "cellSysutil.h" #include "util/media_utils.h" @@ -136,36 +137,69 @@ struct rec_param video_input, audio_input, audio_input_mix_vol, reduce_memsize, show_xmb, filename, metadata_filename, spurs_param.pSpurs, spurs_param.spu_usage_rate, priority, movie_metadata.to_string(), scene_metadata.to_string()); } + + bool use_external_audio() const + { + return audio_input != CELL_REC_PARAM_AUDIO_INPUT_DISABLE // != DISABLE means that cellRec will add samples on its own + && audio_input_mix_vol > CELL_REC_PARAM_AUDIO_INPUT_MIX_VOL_MIN; // We need to mix cellRec audio with internal audio + } + + bool use_internal_audio() const + { + return audio_input == CELL_REC_PARAM_AUDIO_INPUT_DISABLE // DISABLE means that cellRec won't add samples on its own + || audio_input_mix_vol < CELL_REC_PARAM_AUDIO_INPUT_MIX_VOL_MAX; // We need to mix cellRec audio with internal audio + } + + bool use_internal_video() const + { + return video_input == CELL_REC_PARAM_VIDEO_INPUT_DISABLE; // DISABLE means that cellRec won't add frames on its own + } }; -constexpr u32 rec_framerate = 30; // Always 30 fps +static constexpr u32 rec_framerate = 30; // Always 30 fps +static constexpr u32 rec_channels = 2; // Always 2 channels -class rec_image_sink : public utils::image_sink +class rec_video_sink : public utils::video_sink { public: - rec_image_sink() : utils::image_sink() + rec_video_sink() : utils::video_sink() { m_framerate = rec_framerate; } + void set_sample_rate(u32 sample_rate) + { + m_sample_rate = sample_rate; + } + void stop(bool flush = true) override { - cellRec.notice("Stopping image sink. flush=%d", flush); + cellRec.notice("Stopping video sink. flush=%d", flush); std::lock_guard lock(m_mtx); m_flush = flush; + m_paused = false; m_frames_to_encode.clear(); + m_samples_to_encode.clear(); has_error = false; } - void add_frame(std::vector& frame, u32 pitch, u32 width, u32 height, s32 pixel_format, usz timestamp_ms) override + void pause(bool flush = true) override { + cellRec.notice("Pausing video sink. flush=%d", flush); + std::lock_guard lock(m_mtx); + m_flush = flush; + m_paused = true; + } - if (m_flush) - return; + void resume() override + { + cellRec.notice("Resuming video sink"); - m_frames_to_encode.emplace_back(timestamp_ms, pitch, width, height, pixel_format, std::move(frame)); + std::lock_guard lock(m_mtx); + m_flush = false; + m_paused = false; } encoder_frame get_frame() @@ -181,6 +215,20 @@ class rec_image_sink : public utils::image_sink return {}; } + + encoder_sample get_sample() + { + std::lock_guard lock(m_mtx); + + if (!m_samples_to_encode.empty()) + { + encoder_sample block = std::move(m_samples_to_encode.front()); + m_samples_to_encode.pop_front(); + return block; + } + + return {}; + } }; struct rec_info @@ -196,11 +244,21 @@ struct rec_info vm::bptr video_input_buffer{}; // Used by the game to inject a frame right before it would render a frame to the screen. vm::bptr audio_input_buffer{}; // Used by the game to inject audio: 2-channel interleaved (left-right) * 256 samples * sizeof(f32) at 48000 kHz - std::vector video_ringbuffer; - std::vector audio_ringbuffer; + // Wrapper for our audio data + struct audio_block + { + // 2-channel interleaved (left-right), 256 samples, float + static constexpr usz block_size = rec_channels * CELL_REC_AUDIO_BLOCK_SAMPLES * sizeof(f32); + std::array block{}; + s64 pts{}; + }; + + std::vector video_ringbuffer; + std::vector audio_ringbuffer; usz video_ring_pos = 0; + usz audio_ring_pos = 0; usz video_ring_frame_count = 0; - usz audio_ring_step = 0; + usz audio_ring_block_count = 0; usz next_video_ring_pos() { @@ -209,11 +267,17 @@ struct rec_info return pos; } - std::shared_ptr image_sink; + usz next_audio_ring_pos() + { + const usz pos = audio_ring_pos; + audio_ring_pos = (audio_ring_pos + 1) % audio_ringbuffer.size(); + return pos; + } + + std::shared_ptr sink; std::shared_ptr encoder; - std::unique_ptr>> image_provider_thread; + std::unique_ptr>> video_provider_thread; atomic_t paused = false; - s64 last_pts = -1; // Video parameters utils::video_encoder::frame_format output_format{}; @@ -221,13 +285,13 @@ struct rec_info u32 video_bps = 512000; s32 video_codec_id = 12; // AV_CODEC_ID_MPEG4 s32 max_b_frames = 2; - const u32 fps = rec_framerate; // Always 30 fps + static constexpr u32 fps = rec_framerate; // Always 30 fps // Audio parameters u32 sample_rate = 48000; u32 audio_bps = 64000; s32 audio_codec_id = 86018; // AV_CODEC_ID_AAC - const u32 channels = 2; // Always 2 channels + static constexpr u32 channels = rec_channels; // Always 2 channels // Recording duration atomic_t recording_time_start = 0; @@ -240,9 +304,9 @@ struct rec_info void set_video_params(s32 video_format); void set_audio_params(s32 audio_format); - void start_image_provider(); - void pause_image_provider(); - void stop_image_provider(bool flush); + void start_video_provider(); + void pause_video_provider(); + void stop_video_provider(bool flush); }; void rec_info::set_video_params(s32 video_format) @@ -507,37 +571,42 @@ void rec_info::set_audio_params(s32 audio_format) cellRec.notice("set_audio_params: audio_format=0x%x, audio_codec_id=%d, sample_rate=%d, audio_bps=%d", audio_format, audio_codec_id, sample_rate, audio_bps); } -void rec_info::start_image_provider() +void rec_info::start_video_provider() { const bool was_paused = paused.exchange(false); utils::video_provider& video_provider = g_fxo->get(); - if (image_provider_thread && was_paused) + if (video_provider_thread && was_paused) { // Resume const u64 pause_time_end = get_system_time(); ensure(pause_time_end > pause_time_start); pause_time_total += (pause_time_end - pause_time_start); - video_provider.set_pause_time(pause_time_total / 1000); - cellRec.notice("Resuming image provider."); + video_provider.set_pause_time_us(pause_time_total); + cellRec.notice("Resuming video provider."); return; } - cellRec.notice("Starting image provider."); + cellRec.notice("Starting video provider."); recording_time_start = get_system_time(); + pause_time_start = 0; pause_time_total = 0; - video_provider.set_pause_time(0); + video_provider.set_pause_time_us(0); - image_provider_thread = std::make_unique>>("cellRec Image Provider", [this]() + video_provider_thread = std::make_unique>>("cellRec video provider", [this]() { - const bool use_internal_audio = param.audio_input == CELL_REC_PARAM_AUDIO_INPUT_DISABLE || param.audio_input_mix_vol < 100; - const bool use_external_audio = param.audio_input != CELL_REC_PARAM_AUDIO_INPUT_DISABLE && param.audio_input_mix_vol > 0; - const bool use_external_video = param.video_input != CELL_REC_PARAM_VIDEO_INPUT_DISABLE; + const bool use_internal_audio = param.use_internal_audio(); + const bool use_external_audio = param.use_external_audio(); + const bool use_external_video = !param.use_internal_video(); const bool use_ring_buffer = param.ring_sec > 0; const usz frame_size = input_format.pitch * input_format.height; + audio_block buffer_external{}; // for cellRec input + audio_block buffer_internal{}; // for cellAudio input + s64 last_video_pts = -1; + s64 last_audio_pts = -1; - cellRec.notice("image_provider_thread: use_ring_buffer=%d, video_ringbuffer_size=%d, audio_ringbuffer_size=%d, ring_sec=%d, frame_size=%d, use_external_video=%d, use_external_audio=%d, use_internal_audio=%d", use_ring_buffer, video_ringbuffer.size(), audio_ringbuffer.size(), param.ring_sec, frame_size, use_external_video, use_external_audio, use_internal_audio); + cellRec.notice("video_provider_thread: use_ring_buffer=%d, video_ringbuffer_size=%d, audio_ringbuffer_size=%d, ring_sec=%d, frame_size=%d, use_internal_video=%d, use_external_audio=%d, use_internal_audio=%d", use_ring_buffer, video_ringbuffer.size(), audio_ringbuffer.size(), param.ring_sec, frame_size, encoder->use_internal_video, use_external_audio, encoder->use_internal_audio); while (thread_ctrl::state() != thread_state::aborting && encoder) { @@ -563,19 +632,25 @@ void rec_info::start_image_provider() continue; } + // We only care for new video frames or audio samples that can be properly encoded, so we check the timestamps and pts. const usz timestamp_ms = (get_system_time() - recording_time_start - pause_time_total) / 1000; - // We only care for new video frames that can be properly encoded + ///////////////// + // VIDEO // + ///////////////// + // TODO: wait for flip before adding a frame if (use_external_video) { - if (const s64 pts = encoder->get_pts(timestamp_ms); pts > last_pts) + // The video frames originate from cellRec instead of our render pipeline. + if (const s64 pts = encoder->get_pts(timestamp_ms); pts > last_video_pts) { if (video_input_buffer) { if (use_ring_buffer) { - utils::image_sink::encoder_frame& frame_data = video_ringbuffer[next_video_ring_pos()]; + // The video frames originate from cellRec and are stored in a ringbuffer. + utils::video_sink::encoder_frame& frame_data = video_ringbuffer[next_video_ring_pos()]; frame_data.pts = pts; frame_data.width = input_format.width; frame_data.height = input_format.height; @@ -586,107 +661,225 @@ void rec_info::start_image_provider() } else { + // The video frames originate from cellRec and are pushed to the encoder immediately. std::vector frame(frame_size); std::memcpy(frame.data(), video_input_buffer.get_ptr(), frame.size()); encoder->add_frame(frame, input_format.pitch, input_format.width, input_format.height, input_format.av_pixel_format, timestamp_ms); } } - last_pts = pts; + last_video_pts = pts; } } - else if (use_ring_buffer && image_sink) + else if (sink) { - utils::image_sink::encoder_frame frame = image_sink->get_frame(); + // The video frames originate from our render pipeline. + utils::video_sink::encoder_frame frame = sink->get_frame(); - if (const s64 pts = encoder->get_pts(frame.timestamp_ms); pts > last_pts && frame.data.size() > 0) + if (const s64 pts = encoder->get_pts(frame.timestamp_ms); pts > last_video_pts && !frame.data.empty()) { ensure(frame.data.size() == frame_size); - utils::image_sink::encoder_frame& frame_data = video_ringbuffer[next_video_ring_pos()]; - frame_data = std::move(frame); - frame_data.pts = pts; - last_pts = pts; - video_ring_frame_count++; + + if (use_ring_buffer) + { + // The video frames originate from our render pipeline and are stored in a ringbuffer. + frame.pts = pts; + video_ringbuffer[next_video_ring_pos()] = std::move(frame); + video_ring_frame_count++; + } + else + { + // The video frames originate from our render pipeline and are directly encoded by the encoder. + encoder->add_frame(frame.data, frame.pitch, frame.width, frame.height, frame.av_pixel_format, frame.timestamp_ms); + } + + last_video_pts = pts; } } - if (use_internal_audio) - { - // TODO: fetch audio - } + ///////////////// + // AUDIO // + ///////////////// - if (use_external_audio && audio_input_buffer) + const usz timestamp_us = get_system_time() - recording_time_start - pause_time_total; + bool got_new_samples = false; + + if (use_external_audio) { - // 2-channel interleaved (left-right), 256 samples, float - std::array audio_data{}; - std::memcpy(audio_data.data(), audio_input_buffer.get_ptr(), audio_data.size() * sizeof(f32)); + if (const s64 pts = encoder->get_audio_pts(timestamp_us); pts > last_audio_pts) + { + if (audio_input_buffer) + { + // The audio samples originate from cellRec instead of our render pipeline. + // TODO: This needs to be synchronized with the game somehow if possible. + std::memcpy(buffer_external.block.data(), audio_input_buffer.get_ptr(), buffer_external.block.size()); + buffer_external.pts = pts; + got_new_samples = true; + } - // TODO: mix audio with param.audio_input_mix_vol + last_audio_pts = pts; + } } - if (use_ring_buffer) + if (sink && use_internal_audio) { - // TODO: add audio properly - //std::memcpy(&ringbuffer[get_ring_pos(pts) + ring_audio_offset], audio_data.data(), audio_data.size()); + // The audio samples originate from cellAudio and are stored in a ringbuffer. + utils::video_sink::encoder_sample sample = sink->get_sample(); + + if (!sample.data.empty() && sample.channels >= channels && sample.sample_count >= CELL_REC_AUDIO_BLOCK_SAMPLES) + { + s64 pts = encoder->get_audio_pts(sample.timestamp_us); + + // Each encoder_sample can have more than one block + for (u32 i = 0; i < sample.sample_count; i += CELL_REC_AUDIO_BLOCK_SAMPLES) + { + if (pts > last_audio_pts) + { + const f32* src = reinterpret_cast(&sample.data[i * sample.channels * sizeof(f32)]); + + // Copy the new samples to the internal buffer if we need them for volume mixing below. + // Otherwise copy them directly to the external buffer which is used for output later. + audio_block& dst_buffer = got_new_samples ? buffer_internal : buffer_external; + + if (sample.channels > channels) + { + // Downmix channels + AudioBackend::downmix(CELL_REC_AUDIO_BLOCK_SAMPLES * sample.channels, sample.channels, channels, src, reinterpret_cast(dst_buffer.block.data())); + } + else + { + std::memcpy(dst_buffer.block.data(), src, audio_block::block_size); + } + + // Mix external and internal audio with param.audio_input_mix_vol if we already got samples from cellRec. + if (got_new_samples) + { + const float volume = std::clamp(param.audio_input_mix_vol / 100.0f, 0.0f, 1.0f); + const f32* src = reinterpret_cast(buffer_internal.block.data()); + f32* dst = reinterpret_cast(buffer_external.block.data()); + + for (u32 sample = 0; sample < (CELL_REC_AUDIO_BLOCK_SAMPLES * channels); sample++) + { + *dst = std::clamp(*dst + (*src++ * volume), -1.0f, 1.0f); + ++dst; + } + } + + last_audio_pts = std::max(pts, last_audio_pts); // The cellAudio pts may be older than the pts from cellRec + buffer_external.pts = last_audio_pts; + got_new_samples = true; + } + + // We only take the first sample for simplicity for now + break; + + // Increase pts for each sample block + //pts++; + } + } } - else + + if (got_new_samples) { - // TODO: add audio to encoder + if (use_ring_buffer) + { + // Copy new sample to ringbuffer + audio_ringbuffer[next_audio_ring_pos()] = buffer_external; + audio_ring_block_count++; + } + else + { + // Push new sample to encoder + encoder->add_audio_samples(buffer_external.block.data(), CELL_REC_AUDIO_BLOCK_SAMPLES, channels, timestamp_us); + } } // Update recording time - recording_time_total = encoder->get_timestamp_ms(encoder->last_pts()); + recording_time_total = encoder->get_timestamp_ms(encoder->last_video_pts()); - thread_ctrl::wait_for(100); + thread_ctrl::wait_for(1); } }); } -void rec_info::pause_image_provider() +void rec_info::pause_video_provider() { - cellRec.notice("Pausing image provider."); + cellRec.notice("Pausing video provider."); - if (image_provider_thread) + if (video_provider_thread) { paused = true; pause_time_start = get_system_time(); } } -void rec_info::stop_image_provider(bool flush) +void rec_info::stop_video_provider(bool flush) { - cellRec.notice("Stopping image provider."); + cellRec.notice("Stopping video provider."); - if (image_provider_thread) + if (video_provider_thread) { - auto& thread = *image_provider_thread; + auto& thread = *video_provider_thread; thread = thread_state::aborting; thread(); - image_provider_thread.reset(); + video_provider_thread.reset(); } - if (flush && param.ring_sec > 0 && !video_ringbuffer.empty()) + // Flush the ringbuffer if necessary. + // This should only happen if the video sink is not the encoder itself. + // In this case the encoder should have been idle until now. + if (flush && param.ring_sec > 0 && (!video_ringbuffer.empty() || !audio_ringbuffer.empty())) { cellRec.notice("Flushing video ringbuffer."); // Fill encoder with data from ringbuffer // TODO: ideally the encoder should do this on the fly and overwrite old frames in the file. ensure(encoder); + encoder->encode(); const usz frame_count = std::min(video_ringbuffer.size(), video_ring_frame_count); - const usz start_offset = video_ring_frame_count < video_ringbuffer.size() ? 0 : video_ring_frame_count; - const s64 start_pts = video_ringbuffer[start_offset % video_ringbuffer.size()].pts; + const usz video_start_offset = video_ring_frame_count < video_ringbuffer.size() ? 0 : video_ring_frame_count; + const s64 video_start_pts = video_ringbuffer.empty() ? 0 : video_ringbuffer[video_start_offset % video_ringbuffer.size()].pts; + + const usz block_count = std::min(audio_ringbuffer.size(), audio_ring_block_count); + const usz audio_start_offset = audio_ring_block_count < audio_ringbuffer.size() ? 0 : audio_ring_block_count; + const s64 audio_start_pts = audio_ringbuffer.empty() ? 0 : audio_ringbuffer[audio_start_offset % audio_ringbuffer.size()].pts; - for (usz i = 0; i < frame_count; i++) + // Try to add the frames and samples in proper order + for (usz sync_timestamp_us = 0, frame = 0, block = 0; frame < frame_count || block < block_count; frame++) { - const usz pos = (start_offset + i) % video_ringbuffer.size(); - utils::image_sink::encoder_frame& frame_data = video_ringbuffer[pos]; - encoder->add_frame(frame_data.data, frame_data.pitch, frame_data.width, frame_data.height, frame_data.av_pixel_format, encoder->get_timestamp_ms(frame_data.pts - start_pts)); + // Add one frame + if (frame < frame_count) + { + const usz pos = (video_start_offset + frame) % video_ringbuffer.size(); + utils::video_sink::encoder_frame& frame_data = video_ringbuffer[pos]; + const usz timestamp_ms = encoder->get_timestamp_ms(frame_data.pts - video_start_pts); + encoder->add_frame(frame_data.data, frame_data.pitch, frame_data.width, frame_data.height, frame_data.av_pixel_format, timestamp_ms); - // TODO: add audio data to encoder + // Increase sync timestamp + sync_timestamp_us = timestamp_ms * 1000; + } + + // Add all the samples that fit into the last frame + for (usz i = block; i < block_count; i++) + { + const usz pos = (audio_start_offset + i) % audio_ringbuffer.size(); + const audio_block& sample_block = audio_ringbuffer[pos]; + const usz timestamp_us = encoder->get_audio_timestamp_us(sample_block.pts - audio_start_pts); + + // Stop adding new samples if the sync timestamp is exceeded, unless we already added all the frames. + if (timestamp_us > sync_timestamp_us && frame < frame_count) + { + break; + } + + encoder->add_audio_samples(sample_block.block.data(), CELL_REC_AUDIO_BLOCK_SAMPLES, channels, timestamp_us); + block++; + } } video_ringbuffer.clear(); + audio_ringbuffer.clear(); } } @@ -927,11 +1120,11 @@ error_code cellRecOpen(vm::cptr pDirName, vm::cptr pFileName, vm::cp if (opt.value.audio_input == CELL_REC_PARAM_AUDIO_INPUT_DISABLE) { - rec.param.audio_input_mix_vol = 0; + rec.param.audio_input_mix_vol = CELL_REC_PARAM_AUDIO_INPUT_MIX_VOL_MIN; } else { - rec.param.audio_input_mix_vol = 100; + rec.param.audio_input_mix_vol = CELL_REC_PARAM_AUDIO_INPUT_MIX_VOL_MAX; } break; } @@ -1051,11 +1244,16 @@ error_code cellRecOpen(vm::cptr pDirName, vm::cptr pFileName, vm::cp rec.cb = cb; rec.cbUserData = cbUserData; - rec.last_pts = -1; - rec.audio_ringbuffer.clear(); rec.video_ringbuffer.clear(); - rec.video_ring_frame_count = 0; + rec.audio_ringbuffer.clear(); rec.video_ring_pos = 0; + rec.audio_ring_pos = 0; + rec.video_ring_frame_count = 0; + rec.audio_ring_block_count = 0; + rec.recording_time_start = 0; + rec.recording_time_total = 0; + rec.pause_time_start = 0; + rec.pause_time_total = 0; rec.paused = false; rec.set_video_params(pParam->videoFmt); @@ -1063,25 +1261,32 @@ error_code cellRecOpen(vm::cptr pDirName, vm::cptr pFileName, vm::cp if (rec.param.ring_sec > 0) { - const u32 audio_size_per_sample = rec.channels * sizeof(float); - const u32 audio_size_per_second = rec.sample_rate * audio_size_per_sample; - const usz audio_ring_buffer_size = rec.param.ring_sec * audio_size_per_second; + const usz audio_ring_buffer_size = static_cast(std::ceil((rec.param.ring_sec * rec.sample_rate) / static_cast(CELL_REC_AUDIO_BLOCK_SAMPLES))); const usz video_ring_buffer_size = rec.param.ring_sec * rec.fps; cellRec.notice("Preparing ringbuffer for %d seconds. video_ring_buffer_size=%d, audio_ring_buffer_size=%d, pitch=%d, width=%d, height=%d", rec.param.ring_sec, video_ring_buffer_size, audio_ring_buffer_size, rec.input_format.pitch, rec.input_format.width, rec.input_format.height); rec.audio_ringbuffer.resize(audio_ring_buffer_size); - rec.audio_ring_step = audio_size_per_sample; - rec.video_ringbuffer.resize(video_ring_buffer_size, {}); - rec.image_sink = std::make_shared(); + rec.video_ringbuffer.resize(video_ring_buffer_size); + } + + if (rec.param.use_internal_audio() || rec.param.use_internal_video()) + { + rec.sink = std::make_shared(); + rec.sink->use_internal_audio = rec.param.use_internal_audio(); + rec.sink->use_internal_video = rec.param.use_internal_video(); + rec.sink->set_sample_rate(rec.sample_rate); } rec.encoder = std::make_shared(); + rec.encoder->use_internal_audio = false; // We use the other sink + rec.encoder->use_internal_video = false; // We use the other sink rec.encoder->set_path(vfs::get(rec.param.filename)); rec.encoder->set_framerate(rec.fps); rec.encoder->set_video_bitrate(rec.video_bps); rec.encoder->set_video_codec(rec.video_codec_id); rec.encoder->set_sample_rate(rec.sample_rate); + rec.encoder->set_audio_channels(rec.channels); rec.encoder->set_audio_bitrate(rec.audio_bps); rec.encoder->set_audio_codec(rec.audio_codec_id); rec.encoder->set_output_format(rec.output_format); @@ -1113,13 +1318,13 @@ error_code cellRecClose(s32 isDiscard) if (isDiscard) { - // No need to flush - rec.stop_image_provider(false); + // No need to flush the encoder + rec.stop_video_provider(false); rec.encoder->stop(false); - if (rec.image_sink) + if (rec.sink) { - rec.image_sink->stop(false); + rec.sink->stop(true); } if (fs::is_file(rec.param.filename)) @@ -1135,18 +1340,18 @@ error_code cellRecClose(s32 isDiscard) else { // Flush to make sure we encode all remaining frames - rec.stop_image_provider(true); + rec.stop_video_provider(true); rec.encoder->stop(true); - rec.recording_time_total = rec.encoder->get_timestamp_ms(rec.encoder->last_pts()); + rec.recording_time_total = rec.encoder->get_timestamp_ms(rec.encoder->last_video_pts()); - if (rec.image_sink) + if (rec.sink) { - rec.image_sink->stop(true); + rec.sink->stop(true); } const s64 start_pts = rec.encoder->get_pts(rec.param.scene_metadata.start_time); const s64 end_pts = rec.encoder->get_pts(rec.param.scene_metadata.end_time); - const s64 last_pts = rec.encoder->last_pts(); + const s64 last_pts = rec.encoder->last_video_pts(); is_valid_range = start_pts >= 0 && end_pts <= last_pts; } @@ -1157,8 +1362,8 @@ error_code cellRecClose(s32 isDiscard) g_fxo->need(); utils::video_provider& video_provider = g_fxo->get(); - // Release the image sink if it was used - if (rec.param.video_input == CELL_REC_PARAM_VIDEO_INPUT_DISABLE) + // Release the video sink if it was used + if (rec.param.use_internal_video() || rec.param.use_internal_audio()) { const recording_mode old_mode = g_recording_mode.exchange(recording_mode::stopped); @@ -1167,15 +1372,15 @@ error_code cellRecClose(s32 isDiscard) cellRec.error("cellRecClose: Unexpected recording mode %s found while stopping video capture.", old_mode); } - if (!video_provider.set_image_sink(nullptr, recording_mode::cell)) + if (!video_provider.set_video_sink(nullptr, recording_mode::cell)) { - cellRec.error("cellRecClose failed to release image sink"); + cellRec.error("cellRecClose failed to release video sink"); } } rec.param = {}; rec.encoder.reset(); - rec.image_sink.reset(); + rec.sink.reset(); rec.audio_ringbuffer.clear(); rec.video_ringbuffer.clear(); rec.state = rec_state::closed; @@ -1207,24 +1412,18 @@ error_code cellRecStop() sysutil_register_cb([&rec](ppu_thread& ppu) -> s32 { - // Disable image sink if it was used - if (rec.param.video_input == CELL_REC_PARAM_VIDEO_INPUT_DISABLE) - { - const recording_mode old_mode = g_recording_mode.exchange(recording_mode::stopped); + // cellRecStop actually just pauses the recording + rec.pause_video_provider(); - if (old_mode != recording_mode::cell && old_mode != recording_mode::stopped) - { - cellRec.error("cellRecStop: Unexpected recording mode %s found while stopping video capture. (ring_sec=%d)", old_mode, rec.param.ring_sec); - } + if (rec.sink) + { + rec.sink->pause(true); } - // cellRecStop actually just pauses the recording - rec.pause_image_provider(); - ensure(!!rec.encoder); rec.encoder->pause(true); - rec.recording_time_total = rec.encoder->get_timestamp_ms(rec.encoder->last_pts()); + rec.recording_time_total = rec.encoder->get_timestamp_ms(rec.encoder->last_video_pts()); rec.state = rec_state::stopped; rec.cb(ppu, CELL_REC_STATUS_STOP, CELL_OK, rec.cbUserData); @@ -1249,33 +1448,23 @@ error_code cellRecStart() { // Start/resume the recording ensure(!!rec.encoder); - rec.encoder->encode(); + + if (rec.param.ring_sec == 0) + { + rec.encoder->encode(); + } g_fxo->need(); utils::video_provider& video_provider = g_fxo->get(); - // Setup an image sink if it is needed - if (rec.param.video_input == CELL_REC_PARAM_VIDEO_INPUT_DISABLE) + // Setup a video sink if it is needed + if (rec.param.use_internal_video() || rec.param.use_internal_audio()) { - if (rec.param.ring_sec <= 0) + if (rec.sink && !video_provider.set_video_sink(rec.sink, recording_mode::cell)) { - // Regular recording - if (!video_provider.set_image_sink(rec.encoder, recording_mode::cell)) - { - cellRec.error("Failed to set image sink"); - rec.cb(ppu, CELL_REC_STATUS_ERR, CELL_REC_ERROR_FATAL, rec.cbUserData); - return CELL_OK; - } - } - else - { - // Ringbuffer recording - if (!video_provider.set_image_sink(rec.image_sink, recording_mode::cell)) - { - cellRec.error("Failed to set image sink"); - rec.cb(ppu, CELL_REC_STATUS_ERR, CELL_REC_ERROR_FATAL, rec.cbUserData); - return CELL_OK; - } + cellRec.error("Failed to set video sink"); + rec.cb(ppu, CELL_REC_STATUS_ERR, CELL_REC_ERROR_FATAL, rec.cbUserData); + return CELL_OK; } // Force rsx recording @@ -1287,7 +1476,12 @@ error_code cellRecStart() g_recording_mode = recording_mode::stopped; } - rec.start_image_provider(); + rec.start_video_provider(); + + if (rec.sink) + { + rec.sink->resume(); + } if (rec.encoder->has_error) { diff --git a/rpcs3/Emu/Cell/Modules/cellSail.cpp b/rpcs3/Emu/Cell/Modules/cellSail.cpp index 3fd558beb6dc..76a6d3c6fda0 100644 --- a/rpcs3/Emu/Cell/Modules/cellSail.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSail.cpp @@ -639,7 +639,7 @@ error_code cellSailPlayerInitialize2(ppu_thread& ppu, pSelf->paused = true; { - CellSailEvent event; + CellSailEvent event{}; event.u32x2.major = CELL_SAIL_EVENT_PLAYER_STATE_CHANGED; event.u32x2.minor = 0; pSelf->callback(ppu, pSelf->callbackArg, event, CELL_SAIL_PLAYER_STATE_INITIALIZED, 0); @@ -778,7 +778,7 @@ error_code cellSailPlayerBoot(ppu_thread& ppu, vm::ptr pSelf, u6 cellSail.warning("cellSailPlayerBoot(pSelf=*0x%x, userParam=%d)", pSelf, userParam); { - CellSailEvent event; + CellSailEvent event{}; event.u32x2.major = CELL_SAIL_EVENT_PLAYER_STATE_CHANGED; event.u32x2.minor = 0; pSelf->callback(ppu, pSelf->callbackArg, event, CELL_SAIL_PLAYER_STATE_BOOT_TRANSITION, 0); @@ -788,7 +788,7 @@ error_code cellSailPlayerBoot(ppu_thread& ppu, vm::ptr pSelf, u6 pSelf->booted = true; { - CellSailEvent event; + CellSailEvent event{}; event.u32x2.major = CELL_SAIL_EVENT_PLAYER_CALL_COMPLETED; event.u32x2.minor = CELL_SAIL_PLAYER_CALL_BOOT; pSelf->callback(ppu, pSelf->callbackArg, event, 0, 0); diff --git a/rpcs3/Emu/Cell/lv2/sys_rsxaudio.cpp b/rpcs3/Emu/Cell/lv2/sys_rsxaudio.cpp index 701d741341e4..b11065748b1b 100644 --- a/rpcs3/Emu/Cell/lv2/sys_rsxaudio.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_rsxaudio.cpp @@ -1842,39 +1842,8 @@ u32 rsxaudio_backend_thread::write_data_callback(u32 bytes, void* buf) return bytes; } - if (cb_cfg.input_ch_cnt > cb_cfg.output_ch_cnt) - { - if (cb_cfg.input_ch_cnt == static_cast(AudioChannelCnt::SURROUND_7_1)) - { - if (cb_cfg.output_ch_cnt == static_cast(AudioChannelCnt::SURROUND_5_1)) - { - AudioBackend::downmix(sample_cnt, callback_tmp_buf.data(), callback_tmp_buf.data()); - } - else if (cb_cfg.output_ch_cnt == static_cast(AudioChannelCnt::STEREO)) - { - AudioBackend::downmix(sample_cnt, callback_tmp_buf.data(), callback_tmp_buf.data()); - } - else - { - fmt::throw_exception("Invalid downmix combination: %u -> %u", cb_cfg.input_ch_cnt, cb_cfg.output_ch_cnt); - } - } - else if (cb_cfg.input_ch_cnt == static_cast(AudioChannelCnt::SURROUND_5_1)) - { - if (cb_cfg.output_ch_cnt == static_cast(AudioChannelCnt::STEREO)) - { - AudioBackend::downmix(sample_cnt, callback_tmp_buf.data(), callback_tmp_buf.data()); - } - else - { - fmt::throw_exception("Invalid downmix combination: %u -> %u", cb_cfg.input_ch_cnt, cb_cfg.output_ch_cnt); - } - } - else - { - fmt::throw_exception("Invalid downmix combination: %u -> %u", cb_cfg.input_ch_cnt, cb_cfg.output_ch_cnt); - } - } + // Downmix if necessary + AudioBackend::downmix(sample_cnt, cb_cfg.input_ch_cnt, cb_cfg.output_ch_cnt, callback_tmp_buf.data(), callback_tmp_buf.data()); if (cb_cfg.target_volume != cb_cfg.current_volume) { diff --git a/rpcs3/Emu/Io/recording_config.h b/rpcs3/Emu/Io/recording_config.h index f1e2e58242ae..127d24015fd5 100644 --- a/rpcs3/Emu/Io/recording_config.h +++ b/rpcs3/Emu/Io/recording_config.h @@ -8,14 +8,29 @@ struct cfg_recording final : cfg::node bool load(); void save() const; - cfg::uint<0, 60> framerate{this, "Framerate", 30}; - cfg::uint<0, 7680> width{this, "Width", 1280}; - cfg::uint<0, 4320> height{this, "Height", 720}; - cfg::uint<0, 192> pixel_format{this, "AVPixelFormat", 0}; // AVPixelFormat::AV_PIX_FMT_YUV420P - cfg::uint<0, 32813> video_codec{this, "AVCodecID", 12}; // AVCodecID::AV_CODEC_ID_MPEG4 - cfg::uint<0, 25000000> video_bps{this, "Video Bitrate", 4000000}; - cfg::uint<0, 5> max_b_frames{this, "Max B-Frames", 2}; - cfg::uint<0, 20> gop_size{this, "Group of Pictures Size", 12}; + struct node_video : cfg::node + { + node_video(cfg::node* _this) : cfg::node(_this, "Video") {} + + cfg::uint<0, 60> framerate{this, "Framerate", 30}; + cfg::uint<0, 7680> width{this, "Width", 1280}; + cfg::uint<0, 4320> height{this, "Height", 720}; + cfg::uint<0, 192> pixel_format{this, "AVPixelFormat", 0}; // AVPixelFormat::AV_PIX_FMT_YUV420P + cfg::uint<0, 0xFFFF> video_codec{this, "AVCodecID", 12}; // AVCodecID::AV_CODEC_ID_MPEG4 + cfg::uint<0, 25000000> video_bps{this, "Video Bitrate", 4000000}; + cfg::uint<0, 5> max_b_frames{this, "Max B-Frames", 2}; + cfg::uint<0, 20> gop_size{this, "Group of Pictures Size", 12}; + + } video{ this }; + + struct node_audio : cfg::node + { + node_audio(cfg::node* _this) : cfg::node(_this, "Audio") {} + + cfg::uint<0x10000, 0x17000> audio_codec{this, "AVCodecID", 86018}; // AVCodecID::AV_CODEC_ID_AAC + cfg::uint<0, 25000000> audio_bps{this, "Audio Bitrate", 320000}; + + } audio{ this }; const std::string path; }; diff --git a/rpcs3/Loader/PSF.cpp b/rpcs3/Loader/PSF.cpp index 7d7e878bee5c..e455a71404bd 100644 --- a/rpcs3/Loader/PSF.cpp +++ b/rpcs3/Loader/PSF.cpp @@ -295,7 +295,7 @@ namespace psf for (const auto& entry : psf) { - def_table_t index; + def_table_t index{}; index.key_off = ::narrow(key_offset); index.param_fmt = entry.second.type(); index.param_len = entry.second.size(); @@ -313,7 +313,7 @@ namespace psf key_offset = utils::align(key_offset, 4); // Generate header - header_t header; + header_t header{}; header.magic = "\0PSF"_u32; header.version = 0x101; header.off_key_table = ::narrow(sizeof(header_t) + sizeof(def_table_t) * psf.size()); diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index cc7c60f51d01..0d7a9a74ea1d 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -618,7 +618,7 @@ - + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 5a11ef535faf..447430681ef8 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -2275,7 +2275,7 @@ Utilities - + Utilities diff --git a/rpcs3/rpcs3qt/gs_frame.cpp b/rpcs3/rpcs3qt/gs_frame.cpp index f14213cb5275..498e4ac3f544 100644 --- a/rpcs3/rpcs3qt/gs_frame.cpp +++ b/rpcs3/rpcs3qt/gs_frame.cpp @@ -12,6 +12,7 @@ #include "Emu/IdManager.h" #include "Emu/Cell/Modules/cellScreenshot.h" #include "Emu/Cell/Modules/cellVideoOut.h" +#include "Emu/Cell/Modules/cellAudio.h" #include "Emu/RSX/rsx_utils.h" #include "Emu/RSX/Overlays/overlay_message.h" #include "Emu/Io/recording_config.h" @@ -445,9 +446,9 @@ void gs_frame::toggle_recording() { m_video_encoder->stop(); - if (!video_provider.set_image_sink(nullptr, recording_mode::rpcs3)) + if (!video_provider.set_video_sink(nullptr, recording_mode::rpcs3)) { - gui_log.warning("The video provider could not release the image sink. A sink with higher priority must have been set."); + gui_log.warning("The video provider could not release the video sink. A sink with higher priority must have been set."); } // Play a sound @@ -489,21 +490,24 @@ void gs_frame::toggle_recording() video_path += "recording_" + date_time::current_time_narrow<'_'>() + ".mp4"; utils::video_encoder::frame_format output_format{}; - output_format.av_pixel_format = static_cast(g_cfg_recording.pixel_format.get()); - output_format.width = g_cfg_recording.width; - output_format.height = g_cfg_recording.height; - output_format.pitch = g_cfg_recording.width * 4; + output_format.av_pixel_format = static_cast(g_cfg_recording.video.pixel_format.get()); + output_format.width = g_cfg_recording.video.width; + output_format.height = g_cfg_recording.video.height; + output_format.pitch = g_cfg_recording.video.width * 4; + m_video_encoder->use_internal_audio = true; + m_video_encoder->use_internal_video = true; m_video_encoder->set_path(video_path); - m_video_encoder->set_framerate(g_cfg_recording.framerate); - m_video_encoder->set_video_bitrate(g_cfg_recording.video_bps); - m_video_encoder->set_video_codec(g_cfg_recording.video_codec); - m_video_encoder->set_max_b_frames(g_cfg_recording.max_b_frames); - m_video_encoder->set_gop_size(g_cfg_recording.gop_size); + m_video_encoder->set_framerate(g_cfg_recording.video.framerate); + m_video_encoder->set_video_bitrate(g_cfg_recording.video.video_bps); + m_video_encoder->set_video_codec(g_cfg_recording.video.video_codec); + m_video_encoder->set_max_b_frames(g_cfg_recording.video.max_b_frames); + m_video_encoder->set_gop_size(g_cfg_recording.video.gop_size); m_video_encoder->set_output_format(output_format); - m_video_encoder->set_sample_rate(0); // TODO - m_video_encoder->set_audio_bitrate(0); // TODO - m_video_encoder->set_audio_codec(0); // TODO + m_video_encoder->set_sample_rate(g_fxo->get().cfg.audio_sampling_rate); + m_video_encoder->set_audio_channels(static_cast(g_fxo->get().cfg.audio_channels)); + m_video_encoder->set_audio_bitrate(g_cfg_recording.audio.audio_bps); + m_video_encoder->set_audio_codec(g_cfg_recording.audio.audio_codec); m_video_encoder->encode(); if (m_video_encoder->has_error) @@ -513,15 +517,15 @@ void gs_frame::toggle_recording() return; } - if (!video_provider.set_image_sink(m_video_encoder, recording_mode::rpcs3)) + if (!video_provider.set_video_sink(m_video_encoder, recording_mode::rpcs3)) { - gui_log.warning("The video provider could not set the image sink. A sink with higher priority must have been set."); + gui_log.warning("The video provider could not set the video sink. A sink with higher priority must have been set."); rsx::overlays::queue_message(tr("Recording not possible").toStdString()); m_video_encoder->stop(); return; } - video_provider.set_pause_time(0); + video_provider.set_pause_time_us(0); g_recording_mode = recording_mode::rpcs3; diff --git a/rpcs3/rpcs3qt/gs_frame.h b/rpcs3/rpcs3qt/gs_frame.h index 4872de1dad43..a5e69fa958b8 100644 --- a/rpcs3/rpcs3qt/gs_frame.h +++ b/rpcs3/rpcs3qt/gs_frame.h @@ -5,7 +5,6 @@ #include "util/types.hpp" #include "util/atomic.hpp" #include "util/media_utils.h" -#include "util/video_provider.h" #include "Emu/RSX/GSFrameBase.h" #include diff --git a/rpcs3/util/image_sink.h b/rpcs3/util/image_sink.h deleted file mode 100644 index 3c23eca514ce..000000000000 --- a/rpcs3/util/image_sink.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include "util/types.hpp" -#include "util/atomic.hpp" -#include "Utilities/mutex.h" - -#include -#include - -namespace utils -{ - class image_sink - { - public: - image_sink() = default; - - virtual void stop(bool flush = true) = 0; - virtual void add_frame(std::vector& frame, u32 pitch, u32 width, u32 height, s32 pixel_format, usz timestamp_ms) = 0; - - s64 get_pts(usz timestamp_ms) const - { - return static_cast(std::round((timestamp_ms * m_framerate) / 1000.f)); - } - - usz get_timestamp_ms(s64 pts) const - { - return static_cast(std::round((pts * 1000) / static_cast(m_framerate))); - } - - atomic_t has_error{false}; - - struct encoder_frame - { - encoder_frame() = default; - encoder_frame(usz timestamp_ms, u32 pitch, u32 width, u32 height, s32 av_pixel_format, std::vector&& data) - : timestamp_ms(timestamp_ms), pitch(pitch), width(width), height(height), av_pixel_format(av_pixel_format), data(std::move(data)) - {} - - s64 pts = -1; // Optional - usz timestamp_ms = 0; - u32 pitch = 0; - u32 width = 0; - u32 height = 0; - s32 av_pixel_format = 0; // NOTE: Make sure this is a valid AVPixelFormat - std::vector data; - }; - - protected: - shared_mutex m_mtx; - std::deque m_frames_to_encode; - atomic_t m_flush = false; - u32 m_framerate = 0; - }; -} diff --git a/rpcs3/util/media_utils.cpp b/rpcs3/util/media_utils.cpp index 3dddd5b0f872..9e21298dabf1 100644 --- a/rpcs3/util/media_utils.cpp +++ b/rpcs3/util/media_utils.cpp @@ -32,6 +32,28 @@ LOG_CHANNEL(media_log, "Media"); namespace utils { + template + static inline void write_byteswapped(const u8* src, u8* dst) + { + *reinterpret_cast(dst) = *reinterpret_cast*>(src); + } + + template + static inline void copy_samples(const u8* src, u8* dst, usz sample_count, bool swap_endianness) + { + if (swap_endianness) + { + for (usz i = 0; i < sample_count; i++) + { + write_byteswapped(src + i * sizeof(T), dst + i * sizeof(T)); + } + } + else + { + std::memcpy(dst, src, sample_count * sizeof(T)); + } + } + template <> std::string media_info::get_metadata(const std::string& key, const std::string& def) const { @@ -204,11 +226,19 @@ namespace utils struct scoped_av { - AVFormatContext* format = nullptr; - const AVCodec* codec = nullptr; - AVCodecContext* context = nullptr; - AVFrame* frame = nullptr; - AVStream* stream = nullptr; + struct ctx + { + const AVCodec* codec = nullptr; + AVCodecContext* context = nullptr; + AVStream* stream = nullptr; + AVPacket* packet = nullptr; + AVFrame* frame = nullptr; + }; + + ctx audio{}; + ctx video{}; + + AVFormatContext* format_context = nullptr; SwrContext* swr = nullptr; SwsContext* sws = nullptr; std::function kill_callback = nullptr; @@ -216,21 +246,38 @@ namespace utils ~scoped_av() { // Clean up - if (frame) + if (audio.frame) + { + av_frame_unref(audio.frame); + av_frame_free(&audio.frame); + } + if (video.frame) + { + av_frame_unref(video.frame); + av_frame_free(&video.frame); + } + if (audio.packet) + { + av_packet_unref(audio.packet); + av_packet_free(&audio.packet); + } + if (video.packet) { - av_frame_unref(frame); - av_frame_free(&frame); + av_packet_unref(video.packet); + av_packet_free(&video.packet); } if (swr) swr_free(&swr); if (sws) sws_freeContext(sws); - if (context) - avcodec_close(context); + if (audio.context) + avcodec_close(audio.context); + if (video.context) + avcodec_close(video.context); // AVCodec is managed by libavformat, no need to free it // see: https://stackoverflow.com/a/18047320 - if (format) - avformat_free_context(format); + if (format_context) + avformat_free_context(format_context); //if (stream) // av_free(stream); if (kill_callback) @@ -238,6 +285,107 @@ namespace utils } }; + static std::string channel_layout_name(const AVChannelLayout& ch_layout) + { + std::vector ch_layout_buf(64); + int len = av_channel_layout_describe(&ch_layout, ch_layout_buf.data(), ch_layout_buf.size()); + if (len < 0) + { + media_log.error("av_channel_layout_describe failed. Error: %d='%s'", len, av_error_to_string(len)); + return {}; + } + + if (len > static_cast(ch_layout_buf.size())) + { + // Try again with a bigger buffer + media_log.notice("av_channel_layout_describe needs a bigger buffer: len=%d", len); + ch_layout_buf.clear(); + ch_layout_buf.resize(len); + + len = av_channel_layout_describe(&ch_layout, ch_layout_buf.data(), ch_layout_buf.size()); + if (len < 0) + { + media_log.error("av_channel_layout_describe failed. Error: %d='%s'", len, av_error_to_string(len)); + return {}; + } + } + + return ch_layout_buf.data(); + } + + // check that a given sample format is supported by the encoder + static bool check_sample_fmt(const AVCodec* codec, enum AVSampleFormat sample_fmt) + { + if (!codec) return false; + + for (const AVSampleFormat* p = codec->sample_fmts; p && *p != AV_SAMPLE_FMT_NONE; p++) + { + if (*p == sample_fmt) + { + return true; + } + } + return false; + } + + // just pick the highest supported samplerate + static int select_sample_rate(const AVCodec* codec) + { + if (!codec || !codec->supported_samplerates) + return 48000; + + int best_samplerate = 0; + for (const int* samplerate = codec->supported_samplerates; samplerate && *samplerate != 0; samplerate++) + { + if (!best_samplerate || abs(48000 - *samplerate) < abs(48000 - best_samplerate)) + { + best_samplerate = *samplerate; + } + } + return best_samplerate; + } + + AVChannelLayout get_preferred_channel_layout(int channels) + { + switch (channels) + { + case 2: + return AV_CHANNEL_LAYOUT_STEREO; + case 6: + return AV_CHANNEL_LAYOUT_5POINT1; + case 8: + return AV_CHANNEL_LAYOUT_7POINT1; + default: + break; + } + return {}; + } + + static constexpr AVChannelLayout empty_ch_layout = {}; + + // select layout with the exact channel count + static const AVChannelLayout* select_channel_layout(const AVCodec* codec, int channels) + { + if (!codec) return nullptr; + + const AVChannelLayout preferred_ch_layout = get_preferred_channel_layout(channels); + const AVChannelLayout* found_ch_layout = nullptr; + + for (const AVChannelLayout* ch_layout = codec->ch_layouts; + ch_layout && memcmp(ch_layout, &empty_ch_layout, sizeof(AVChannelLayout)) != 0; + ch_layout++) + { + media_log.notice("select_channel_layout: listing channel layout '%s' with %d channels", channel_layout_name(*ch_layout), ch_layout->nb_channels); + + if (ch_layout->nb_channels == channels && memcmp(ch_layout, &preferred_ch_layout, sizeof(AVChannelLayout)) == 0) + { + found_ch_layout = ch_layout; + } + } + + return found_ch_layout; + } + audio_decoder::audio_decoder() { } @@ -263,7 +411,6 @@ namespace utils track_fully_consumed = 0; has_error = false; m_size = 0; - duration_ms = 0; timestamps_ms.clear(); data.clear(); } @@ -295,14 +442,14 @@ namespace utils scoped_av av; // Get format from audio file - av.format = avformat_alloc_context(); - if (int err = avformat_open_input(&av.format, path.c_str(), nullptr, nullptr); err < 0) + av.format_context = avformat_alloc_context(); + if (int err = avformat_open_input(&av.format_context, path.c_str(), nullptr, nullptr); err < 0) { media_log.error("audio_decoder: Could not open file '%s'. Error: %d='%s'", path, err, av_error_to_string(err)); has_error = true; return; } - if (int err = avformat_find_stream_info(av.format, nullptr); err < 0) + if (int err = avformat_find_stream_info(av.format_context, nullptr); err < 0) { media_log.error("audio_decoder: Could not retrieve stream info from file '%s'. Error: %d='%s'", path, err, av_error_to_string(err)); has_error = true; @@ -312,11 +459,11 @@ namespace utils // Find the first audio stream AVStream* stream = nullptr; unsigned int stream_index; - for (stream_index = 0; stream_index < av.format->nb_streams; stream_index++) + for (stream_index = 0; stream_index < av.format_context->nb_streams; stream_index++) { - if (av.format->streams[stream_index]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) + if (av.format_context->streams[stream_index]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { - stream = av.format->streams[stream_index]; + stream = av.format_context->streams[stream_index]; break; } } @@ -328,8 +475,8 @@ namespace utils } // Find decoder - av.codec = avcodec_find_decoder(stream->codecpar->codec_id); - if (!av.codec) + av.audio.codec = avcodec_find_decoder(stream->codecpar->codec_id); + if (!av.audio.codec) { media_log.error("audio_decoder: Failed to find decoder for stream #%u in file '%s'", stream_index, path); has_error = true; @@ -337,8 +484,8 @@ namespace utils } // Allocate context - av.context = avcodec_alloc_context3(av.codec); - if (!av.context) + av.audio.context = avcodec_alloc_context3(av.audio.codec); + if (!av.audio.context) { media_log.error("audio_decoder: Failed to allocate context for stream #%u in file '%s'", stream_index, path); has_error = true; @@ -346,7 +493,7 @@ namespace utils } // Open decoder - if (int err = avcodec_open2(av.context, av.codec, nullptr); err < 0) + if (int err = avcodec_open2(av.audio.context, av.audio.codec, nullptr); err < 0) { media_log.error("audio_decoder: Failed to open decoder for stream #%u in file '%s'. Error: %d='%s'", stream_index, path, err, av_error_to_string(err)); has_error = true; @@ -389,23 +536,21 @@ namespace utils } // Prepare to read data - av.frame = av_frame_alloc(); - if (!av.frame) + av.audio.frame = av_frame_alloc(); + if (!av.audio.frame) { media_log.error("audio_decoder: Error allocating the frame"); has_error = true; return; } - duration_ms = stream->duration / 1000; - AVPacket* packet = av_packet_alloc(); std::unique_ptr packet_(packet); // Iterate through frames - while (thread_ctrl::state() != thread_state::aborting && av_read_frame(av.format, packet) >= 0) + while (thread_ctrl::state() != thread_state::aborting && av_read_frame(av.format_context, packet) >= 0) { - if (int err = avcodec_send_packet(av.context, packet); err < 0) + if (int err = avcodec_send_packet(av.audio.context, packet); err < 0) { media_log.error("audio_decoder: Queuing error: %d='%s'", err, av_error_to_string(err)); has_error = true; @@ -414,7 +559,7 @@ namespace utils while (thread_ctrl::state() != thread_state::aborting) { - if (int err = avcodec_receive_frame(av.context, av.frame); err < 0) + if (int err = avcodec_receive_frame(av.audio.context, av.audio.frame); err < 0) { if (err == AVERROR(EAGAIN) || err == averror_eof) break; @@ -427,7 +572,7 @@ namespace utils // Resample frames u8* buffer; const int align = 1; - const int buffer_size = av_samples_alloc(&buffer, nullptr, dst_channels, av.frame->nb_samples, dst_format, align); + const int buffer_size = av_samples_alloc(&buffer, nullptr, dst_channels, av.audio.frame->nb_samples, dst_format, align); if (buffer_size < 0) { media_log.error("audio_decoder: Error allocating buffer: %d='%s'", buffer_size, av_error_to_string(buffer_size)); @@ -435,7 +580,7 @@ namespace utils return; } - const int frame_count = swr_convert(av.swr, &buffer, av.frame->nb_samples, const_cast(av.frame->data), av.frame->nb_samples); + const int frame_count = swr_convert(av.swr, &buffer, av.audio.frame->nb_samples, const_cast(av.audio.frame->data), av.audio.frame->nb_samples); if (frame_count < 0) { media_log.error("audio_decoder: Error converting frame: %d='%s'", frame_count, av_error_to_string(frame_count)); @@ -450,25 +595,10 @@ namespace utils std::scoped_lock lock(m_mtx); data.resize(m_size + buffer_size); - if (m_swap_endianness) - { - // The format is float 32bit per channel. - const auto write_byteswapped = [](const void* src, void* dst) -> void - { - *static_cast(dst) = *static_cast*>(src); - }; + // The format is float 32bit per channel. + copy_samples(buffer, &data[m_size], buffer_size / sizeof(f32), m_swap_endianness); - for (size_t i = 0; i < (buffer_size - sizeof(f32)); i += sizeof(f32)) - { - write_byteswapped(buffer + i, data.data() + m_size + i); - } - } - else - { - memcpy(&data[m_size], buffer, buffer_size); - } - - const s64 timestamp_ms = stream->time_base.den ? (1000 * av.frame->best_effort_timestamp * stream->time_base.num) / stream->time_base.den : 0; + const s64 timestamp_ms = stream->time_base.den ? (1000 * av.audio.frame->best_effort_timestamp * stream->time_base.num) / stream->time_base.den : 0; timestamps_ms.push_back({m_size, timestamp_ms}); m_size += buffer_size; } @@ -476,7 +606,7 @@ namespace utils if (buffer) av_free(buffer); - media_log.notice("audio_decoder: decoded frame_count=%d buffer_size=%d timestamp_us=%d", frame_count, buffer_size, av.frame->best_effort_timestamp); + media_log.notice("audio_decoder: decoded frame_count=%d buffer_size=%d timestamp_us=%d", frame_count, buffer_size, av.audio.frame->best_effort_timestamp); } } }; @@ -535,7 +665,7 @@ namespace utils } video_encoder::video_encoder() - : utils::image_sink() + : utils::video_sink() { } @@ -549,9 +679,9 @@ namespace utils return m_path; } - s64 video_encoder::last_pts() const + s64 video_encoder::last_video_pts() const { - return m_last_pts; + return m_last_video_pts; } void video_encoder::set_path(const std::string& path) @@ -594,6 +724,11 @@ namespace utils m_sample_rate = sample_rate; } + void video_encoder::set_audio_channels(u32 channels) + { + m_channels = channels; + } + void video_encoder::set_audio_bitrate(u32 bitrate) { m_audio_bitrate_bps = bitrate; @@ -604,16 +739,6 @@ namespace utils m_audio_codec_id = codec_id; } - void video_encoder::add_frame(std::vector& frame, u32 pitch, u32 width, u32 height, s32 pixel_format, usz timestamp_ms) - { - // Do not allow new frames while flushing - if (m_flush) - return; - - std::lock_guard lock(m_mtx); - m_frames_to_encode.emplace_back(timestamp_ms, pitch, width, height, pixel_format, std::move(frame)); - } - void video_encoder::pause(bool flush) { if (m_thread) @@ -658,24 +783,33 @@ namespace utils std::lock_guard lock(m_mtx); m_frames_to_encode.clear(); + m_samples_to_encode.clear(); has_error = false; m_flush = false; m_paused = false; m_running = false; } + void video_encoder::resume() + { + media_log.notice("video_encoder: Resuming video encoder"); + + m_flush = false; + m_paused = false; + } + void video_encoder::encode() { if (m_running) { // Resume - m_flush = false; - m_paused = false; + resume(); media_log.success("video_encoder: resuming recording of '%s'", m_path); return; } - m_last_pts = 0; + m_last_audio_pts = 0; + m_last_video_pts = 0; stop(); @@ -692,7 +826,34 @@ namespace utils { m_running = true; - // TODO: audio encoding + av_log_set_callback([](void* avcl, int level, const char* fmt, va_list vl) -> void + { + if (level > av_log_get_level()) + { + return; + } + + constexpr int line_size = 1024; + char line[line_size]{}; + int print_prefix = 1; + + if (int err = av_log_format_line2(avcl, level, fmt, vl, line, line_size, &print_prefix); err < 0) + { + media_log.error("av_log: av_log_format_line2 failed. Error: %d='%s'", err, av_error_to_string(err)); + return; + } + + std::string msg = line; + fmt::trim_back(msg, "\n\r\t "); + + if (level <= AV_LOG_ERROR) + media_log.error("av_log: %s", msg); + else if (level <= AV_LOG_WARNING) + media_log.warning("av_log: %s", msg); + else + media_log.notice("av_log: %s", msg); + }); + av_log_set_level(AV_LOG_ERROR); // Reset variables at all costs scoped_av av; @@ -702,140 +863,183 @@ namespace utils m_running = false; }; - const AVPixelFormat out_format = static_cast(m_out_format.av_pixel_format); - const char* av_output_format = nullptr; - - const auto find_format = [&](const AVCodec* codec) -> const char* + // Let's list the encoders first + std::vector audio_codecs; + std::vector video_codecs; + void* opaque = nullptr; + while (const AVCodec* codec = av_codec_iterate(&opaque)) { - if (!codec) - return nullptr; + if (codec->type == AVMediaType::AVMEDIA_TYPE_AUDIO) + { + media_log.notice("video_encoder: Found audio codec %d = %s", static_cast(codec->id), codec->name); + audio_codecs.push_back(codec); + } + else if (codec->type == AVMediaType::AVMEDIA_TYPE_VIDEO) + { + media_log.notice("video_encoder: Found video codec %d = %s", static_cast(codec->id), codec->name); + video_codecs.push_back(codec); + } + } + const AVPixelFormat out_pix_format = static_cast(m_out_format.av_pixel_format); + + const auto find_format = [&](AVCodecID video_codec, AVCodecID audio_codec) -> const AVOutputFormat* + { // Try to find a preferable output format std::vector oformats; void* opaque = nullptr; for (const AVOutputFormat* oformat = av_muxer_iterate(&opaque); !!oformat; oformat = av_muxer_iterate(&opaque)) { - if (avformat_query_codec(oformat, codec->id, FF_COMPLIANCE_STRICT) == 1) + media_log.notice("video_encoder: Listing output format '%s' (video_codec=%d, audio_codec=%d)", oformat->name, static_cast(oformat->video_codec), static_cast(oformat->audio_codec)); + if (avformat_query_codec(oformat, video_codec, FF_COMPLIANCE_NORMAL) == 1 && + avformat_query_codec(oformat, audio_codec, FF_COMPLIANCE_NORMAL) == 1) { - media_log.notice("video_encoder: Found output format '%s'", oformat->name); + oformats.push_back(oformat); + } + } - switch (codec->id) - { - case AV_CODEC_ID_MPEG4: - if (strcmp(oformat->name, "avi") == 0) - return oformat->name; - break; - case AV_CODEC_ID_H264: - case AV_CODEC_ID_MJPEG: - // TODO - break; - default: - break; - } + for (const AVOutputFormat* oformat : oformats) + { + if (!oformat) continue; + media_log.notice("video_encoder: Found compatible output format '%s' (video_codec=%d, audio_codec=%d)", oformat->name, static_cast(oformat->video_codec), static_cast(oformat->audio_codec)); + } - oformats.push_back(oformat); + // Select best match + for (const AVOutputFormat* oformat : oformats) + { + if (oformat && oformat->video_codec == video_codec && oformat->audio_codec == audio_codec) + { + media_log.notice("video_encoder: Using matching output format '%s' (video_codec=%d, audio_codec=%d)", oformat->name, static_cast(oformat->video_codec), static_cast(oformat->audio_codec)); + return oformat; } } // Fallback to first found format - if (!oformats.empty() && oformats.front()) + if (const AVOutputFormat* oformat = oformats.empty() ? nullptr : oformats.front()) { - const AVOutputFormat* oformat = oformats.front(); - media_log.notice("video_encoder: Falling back to output format '%s'", oformat->name); - return oformat->name; + media_log.notice("video_encoder: Using suboptimal output format '%s' (video_codec=%d, audio_codec=%d)", oformat->name, static_cast(oformat->video_codec), static_cast(oformat->audio_codec)); + return oformat; } return nullptr; }; - AVCodecID used_codec = static_cast(m_video_codec_id); + const AVCodecID video_codec = static_cast(m_video_codec_id); + const AVCodecID audio_codec = static_cast(m_audio_codec_id); + const AVOutputFormat* out_format = find_format(video_codec, audio_codec); - // Find specified codec first - if (const AVCodec* encoder = avcodec_find_encoder(used_codec); !!encoder) + if (out_format) { - media_log.success("video_encoder: Found requested video_codec %d = %s", static_cast(used_codec), encoder->name); - av_output_format = find_format(encoder); - - if (av_output_format) - { - media_log.success("video_encoder: Found requested output format '%s'", av_output_format); - } - else - { - media_log.error("video_encoder: Could not find a format for the requested video_codec %d = %s", static_cast(used_codec), encoder->name); - } + media_log.success("video_encoder: Found requested output format '%s'", out_format->name); } else { - media_log.error("video_encoder: Could not find requested video_codec %d", static_cast(used_codec)); - } + media_log.error("video_encoder: Could not find a format for the requested video_codec %d and audio_codec %d", m_video_codec_id, m_audio_codec_id); - // Fallback to some other codec - if (!av_output_format) - { - void* opaque = nullptr; - for (const AVCodec* codec = av_codec_iterate(&opaque); !!codec; codec = av_codec_iterate(&opaque)) + // Fallback to some other codec + for (const AVCodec* video_codec : video_codecs) { - if (av_codec_is_encoder(codec)) + for (const AVCodec* audio_codec : audio_codecs) { - media_log.notice("video_encoder: Found video_codec %d = %s", static_cast(codec->id), codec->name); - av_output_format = find_format(codec); + out_format = find_format(video_codec->id, audio_codec->id); - if (av_output_format) + if (out_format) { - media_log.success("video_encoder: Found fallback output format '%s'", av_output_format); + media_log.success("video_encoder: Found fallback output format '%s'", out_format->name); break; } } + + if (out_format) + { + break; + } } } - if (!av_output_format) + if (!out_format) { media_log.error("video_encoder: Could not find any output format"); has_error = true; return; } - if (int err = avformat_alloc_output_context2(&av.format, nullptr, av_output_format, path.c_str()); err < 0) + if (int err = avformat_alloc_output_context2(&av.format_context, out_format, nullptr, nullptr); err < 0) { - media_log.error("video_encoder: avformat_alloc_output_context2 failed. Error: %d='%s'", err, av_error_to_string(err)); + media_log.error("video_encoder: avformat_alloc_output_context2 for '%s' failed. Error: %d='%s'", out_format->name, err, av_error_to_string(err)); has_error = true; return; } - if (!av.format) + if (!av.format_context) { media_log.error("video_encoder: avformat_alloc_output_context2 failed"); has_error = true; return; } - if (!(av.codec = avcodec_find_encoder(av.format->oformat->video_codec))) + const auto create_context = [this, &av](bool is_video) -> bool { - media_log.error("video_encoder: avcodec_find_encoder failed"); - has_error = true; - return; - } + const std::string type = is_video ? "video" : "audio"; + scoped_av::ctx& ctx = is_video ? av.video : av.audio; + + if (is_video) + { + if (!(ctx.codec = avcodec_find_encoder(av.format_context->oformat->video_codec))) + { + media_log.error("video_encoder: avcodec_find_encoder for video failed. video_codec=%d", static_cast(av.format_context->oformat->video_codec)); + return false; + } + } + else + { + if (!(ctx.codec = avcodec_find_encoder(av.format_context->oformat->audio_codec))) + { + media_log.error("video_encoder: avcodec_find_encoder for audio failed. audio_codec=%d", static_cast(av.format_context->oformat->audio_codec)); + return false; + } + } + + if (!(ctx.stream = avformat_new_stream(av.format_context, nullptr))) + { + media_log.error("video_encoder: avformat_new_stream for %s failed", type); + return false; + } + + ctx.stream->id = is_video ? 0 : 1; + + if (!(ctx.context = avcodec_alloc_context3(ctx.codec))) + { + media_log.error("video_encoder: avcodec_alloc_context3 for %s failed", type); + return false; + } + + if (av.format_context->oformat->flags & AVFMT_GLOBALHEADER) + { + ctx.context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; + } + + return true; + }; - if (!(av.stream = avformat_new_stream(av.format, nullptr))) + if (!create_context(true)) { - media_log.error("video_encoder: avformat_new_stream failed"); has_error = true; return; } - av.stream->id = static_cast(av.format->nb_streams - 1); - - if (!(av.context = avcodec_alloc_context3(av.codec))) + if (!create_context(false)) { - media_log.error("video_encoder: avcodec_alloc_context3 failed"); has_error = true; return; } - media_log.notice("video_encoder: using video_codec = %d", static_cast(av.format->oformat->video_codec)); + media_log.notice("video_encoder: using audio_codec = %d", static_cast(av.format_context->oformat->audio_codec)); + media_log.notice("video_encoder: using sample_rate = %d", m_sample_rate); + media_log.notice("video_encoder: using audio_bitrate = %d", m_audio_bitrate_bps); + media_log.notice("video_encoder: using audio channels = %d", m_channels); + media_log.notice("video_encoder: using video_codec = %d", static_cast(av.format_context->oformat->video_codec)); media_log.notice("video_encoder: using video_bitrate = %d", m_video_bitrate_bps); media_log.notice("video_encoder: using out width = %d", m_out_format.width); media_log.notice("video_encoder: using out height = %d", m_out_format.height); @@ -843,67 +1047,186 @@ namespace utils media_log.notice("video_encoder: using gop_size = %d", m_gop_size); media_log.notice("video_encoder: using max_b_frames = %d", m_max_b_frames); - av.context->codec_id = av.format->oformat->video_codec; - av.context->bit_rate = m_video_bitrate_bps; - av.context->width = static_cast(m_out_format.width); - av.context->height = static_cast(m_out_format.height); - av.context->time_base = {.num = 1, .den = static_cast(m_framerate)}; - av.context->framerate = {.num = static_cast(m_framerate), .den = 1}; - av.context->pix_fmt = out_format; - av.context->gop_size = m_gop_size; - av.context->max_b_frames = m_max_b_frames; - - if (av.format->oformat->flags & AVFMT_GLOBALHEADER) + // select audio parameters supported by the encoder + if (av.audio.context) { - av.context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; - } + if (const AVChannelLayout* ch_layout = select_channel_layout(av.audio.codec, m_channels)) + { + media_log.notice("video_encoder: found channel layout '%s' with %d channels", channel_layout_name(*ch_layout), ch_layout->nb_channels); - if (int err = avcodec_open2(av.context, av.codec, nullptr); err != 0) - { - media_log.error("video_encoder: avcodec_open2 failed. Error: %d='%s'", err, av_error_to_string(err)); - has_error = true; - return; + if (int err = av_channel_layout_copy(&av.audio.context->ch_layout, ch_layout); err != 0) + { + media_log.error("video_encoder: av_channel_layout_copy failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } + } + else + { + media_log.notice("video_encoder: select_channel_layout returned nullptr, trying with own layout..."); + + const AVChannelLayout new_ch_layout = get_preferred_channel_layout(m_channels); + + if (memcmp(&new_ch_layout, &empty_ch_layout, sizeof(AVChannelLayout)) == 0) + { + media_log.error("video_encoder: unsupported audio channel count: %d", m_channels); + has_error = true; + return; + } + + if (int err = av_channel_layout_copy(&av.audio.context->ch_layout, &new_ch_layout); err != 0) + { + media_log.error("video_encoder: av_channel_layout_copy failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } + } + + m_sample_rate = select_sample_rate(av.audio.codec); + + av.audio.context->codec_id = av.format_context->oformat->audio_codec; + av.audio.context->codec_type = AVMEDIA_TYPE_AUDIO; + av.audio.context->bit_rate = m_audio_bitrate_bps; + av.audio.context->sample_rate = m_sample_rate; + av.audio.context->time_base = {.num = 1, .den = av.audio.context->sample_rate}; + av.audio.context->sample_fmt = AV_SAMPLE_FMT_FLTP; // AV_SAMPLE_FMT_FLT is not supported in regular AC3 + av.audio.stream->time_base = av.audio.context->time_base; + + // check that the encoder supports the format + if (!check_sample_fmt(av.audio.codec, av.audio.context->sample_fmt)) + { + media_log.error("video_encoder: Audio encoder does not support sample format %s", av_get_sample_fmt_name(av.audio.context->sample_fmt)); + has_error = true; + return; + } + + if (int err = avcodec_open2(av.audio.context, av.audio.codec, nullptr); err != 0) + { + media_log.error("video_encoder: avcodec_open2 for audio failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } + + if (!(av.audio.packet = av_packet_alloc())) + { + media_log.error("video_encoder: av_packet_alloc for audio packet failed"); + has_error = true; + return; + } + + if (!(av.audio.frame = av_frame_alloc())) + { + media_log.error("video_encoder: av_frame_alloc for audio frame failed"); + has_error = true; + return; + } + + av.audio.frame->format = AV_SAMPLE_FMT_FLTP; + av.audio.frame->nb_samples = av.audio.context->frame_size; + + if (int err = av_channel_layout_copy(&av.audio.frame->ch_layout, &av.audio.context->ch_layout); err < 0) + { + media_log.error("video_encoder: av_channel_layout_copy for audio frame failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } + + if (int err = av_frame_get_buffer(av.audio.frame, 0); err < 0) + { + media_log.error("video_encoder: av_frame_get_buffer for audio frame failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } + + if (int err = avcodec_parameters_from_context(av.audio.stream->codecpar, av.audio.context); err < 0) + { + media_log.error("video_encoder: avcodec_parameters_from_context for audio failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } + + // Log channel layout + media_log.notice("video_encoder: av_channel_layout='%s'", channel_layout_name(av.audio.frame->ch_layout)); } - if (!(av.frame = av_frame_alloc())) + // select video parameters supported by the encoder + if (av.video.context) { - media_log.error("video_encoder: av_frame_alloc failed"); - has_error = true; - return; - } + av.video.context->codec_id = av.format_context->oformat->video_codec; + av.video.context->codec_type = AVMEDIA_TYPE_VIDEO; + av.video.context->frame_number = 0; + av.video.context->bit_rate = m_video_bitrate_bps; + av.video.context->width = static_cast(m_out_format.width); + av.video.context->height = static_cast(m_out_format.height); + av.video.context->time_base = {.num = 1, .den = static_cast(m_framerate)}; + av.video.context->framerate = {.num = static_cast(m_framerate), .den = 1}; + av.video.context->pix_fmt = out_pix_format; + av.video.context->gop_size = m_gop_size; + av.video.context->max_b_frames = m_max_b_frames; + av.video.stream->time_base = av.video.context->time_base; + + if (int err = avcodec_open2(av.video.context, av.video.codec, nullptr); err != 0) + { + media_log.error("video_encoder: avcodec_open2 for video failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } - av.frame->format = av.context->pix_fmt; - av.frame->width = av.context->width; - av.frame->height = av.context->height; + if (!(av.video.packet = av_packet_alloc())) + { + media_log.error("video_encoder: av_packet_alloc for video packet failed"); + has_error = true; + return; + } - if (int err = av_frame_get_buffer(av.frame, 32); err < 0) - { - media_log.error("video_encoder: av_frame_get_buffer failed. Error: %d='%s'", err, av_error_to_string(err)); - has_error = true; - return; + if (!(av.video.frame = av_frame_alloc())) + { + media_log.error("video_encoder: av_frame_alloc for video frame failed"); + has_error = true; + return; + } + + av.video.frame->format = av.video.context->pix_fmt; + av.video.frame->width = av.video.context->width; + av.video.frame->height = av.video.context->height; + + if (int err = av_frame_get_buffer(av.video.frame, 0); err < 0) + { + media_log.error("video_encoder: av_frame_get_buffer for video frame failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } + + if (int err = avcodec_parameters_from_context(av.video.stream->codecpar, av.video.context); err < 0) + { + media_log.error("video_encoder: avcodec_parameters_from_context for video failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } } - if (int err = avcodec_parameters_from_context(av.stream->codecpar, av.context); err < 0) + media_log.notice("video_encoder: av_dump_format"); + for (u32 i = 0; i < av.format_context->nb_streams; i++) { - media_log.error("video_encoder: avcodec_parameters_from_context failed. Error: %d='%s'", err, av_error_to_string(err)); - has_error = true; - return; + av_dump_format(av.format_context, i, path.c_str(), 1); } - av_dump_format(av.format, 0, path.c_str(), 1); - - if (int err = avio_open(&av.format->pb, path.c_str(), AVIO_FLAG_WRITE); err != 0) + // open the output file, if needed + if (!(av.format_context->flags & AVFMT_NOFILE)) { - media_log.error("video_encoder: avio_open failed. Error: %d='%s'", err, av_error_to_string(err)); - has_error = true; - return; + if (int err = avio_open(&av.format_context->pb, path.c_str(), AVIO_FLAG_WRITE); err != 0) + { + media_log.error("video_encoder: avio_open failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } } - if (int err = avformat_write_header(av.format, nullptr); err < 0) + if (int err = avformat_write_header(av.format_context, nullptr); err < 0) { media_log.error("video_encoder: avformat_write_header failed. Error: %d='%s'", err, av_error_to_string(err)); - if (int err = avio_close(av.format->pb); err != 0) + if (int err = avio_close(av.format_context->pb); err != 0) { media_log.error("video_encoder: avio_close failed. Error: %d='%s'", err, av_error_to_string(err)); } @@ -912,21 +1235,11 @@ namespace utils return; } - const auto flush = [&]() + const auto flush = [&](scoped_av::ctx& ctx) { - while ((thread_ctrl::state() != thread_state::aborting || m_flush) && !has_error) + while ((thread_ctrl::state() != thread_state::aborting || m_flush) && !has_error && ctx.context) { - AVPacket* packet = av_packet_alloc(); - std::unique_ptr packet_(packet); - - if (!packet) - { - media_log.error("video_encoder: av_packet_alloc failed"); - has_error = true; - return; - } - - if (int err = avcodec_receive_packet(av.context, packet); err < 0) + if (int err = avcodec_receive_packet(ctx.context, ctx.packet); err < 0) { if (err == AVERROR(EAGAIN) || err == averror_eof) break; @@ -936,133 +1249,363 @@ namespace utils return; } - av_packet_rescale_ts(packet, av.context->time_base, av.stream->time_base); - packet->stream_index = av.stream->index; + av_packet_rescale_ts(ctx.packet, ctx.context->time_base, ctx.stream->time_base); + ctx.packet->stream_index = ctx.stream->index; - if (int err = av_interleaved_write_frame(av.format, packet); err < 0) + if (int err = av_interleaved_write_frame(av.format_context, ctx.packet); err < 0) { - media_log.error("video_encoder: av_interleaved_write_frame failed. Error: %d='%s'", err, av_error_to_string(err)); + media_log.error("video_encoder: av_write_frame failed. Error: %d='%s'", err, av_error_to_string(err)); has_error = true; return; } } }; - s64 last_pts = -1; + u32 audio_sample_remainder = 0; + s64 last_audio_pts = -1; + s64 last_audio_frame_pts = 0; + s64 last_video_pts = -1; + + // Allocate audio buffer for our audio frame + std::vector audio_frame; + u32 audio_frame_sample_count = 0; + const bool sample_fmt_is_planar = av.audio.context && av_sample_fmt_is_planar(av.audio.context->sample_fmt) != 0; + const int sample_fmt_bytes = av.audio.context ? av_get_bytes_per_sample(av.audio.context->sample_fmt) : 0; + ensure(sample_fmt_bytes == sizeof(f32)); // We only support FLT or FLTP for now + + if (av.audio.frame) + { + audio_frame.resize(av.audio.frame->nb_samples * av.audio.frame->ch_layout.nb_channels * sizeof(f32)); + last_audio_frame_pts -= av.audio.frame->nb_samples; + } + + encoder_sample last_samples; + u32 leftover_sample_count = 0; while ((thread_ctrl::state() != thread_state::aborting || m_flush) && !has_error) { + // Fetch video frame encoder_frame frame_data; + bool got_frame = false; { m_mtx.lock(); if (m_frames_to_encode.empty()) { m_mtx.unlock(); + } + else + { + frame_data = std::move(m_frames_to_encode.front()); + m_frames_to_encode.pop_front(); + m_mtx.unlock(); + + got_frame = true; + + // Calculate presentation timestamp. + const s64 pts = get_pts(frame_data.timestamp_ms); - if (m_flush) + // We need to skip this frame if it has the same timestamp. + if (pts <= last_video_pts) { - m_flush = false; + media_log.trace("video_encoder: skipping frame. last_pts=%d, pts=%d, timestamp_ms=%d", last_video_pts, pts, frame_data.timestamp_ms); + } + else if (av.video.context) + { + media_log.trace("video_encoder: adding new frame. timestamp_ms=%d", frame_data.timestamp_ms); - if (!m_paused) + if (int err = av_frame_make_writable(av.video.frame); err < 0) { - // We only stop the thread after a flush if we are not paused + media_log.error("video_encoder: av_frame_make_writable failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; break; } - } - // We only actually pause after we process all frames - const u64 sleeptime = m_paused ? 10000 : 1; - thread_ctrl::wait_for(sleeptime); - continue; - } + u8* in_data[4]{}; + int in_line[4]{}; - frame_data = std::move(m_frames_to_encode.front()); - m_frames_to_encode.pop_front(); + const AVPixelFormat in_format = static_cast(frame_data.av_pixel_format); - m_mtx.unlock(); + if (int ret = av_image_fill_linesizes(in_line, in_format, frame_data.width); ret < 0) + { + fmt::throw_exception("video_encoder: av_image_fill_linesizes failed (ret=0x%x): %s", ret, utils::av_error_to_string(ret)); + } - media_log.trace("video_encoder: adding new frame. timestamp=%d", frame_data.timestamp_ms); - } + if (int ret = av_image_fill_pointers(in_data, in_format, frame_data.height, frame_data.data.data(), in_line); ret < 0) + { + fmt::throw_exception("video_encoder: av_image_fill_pointers failed (ret=0x%x): %s", ret, utils::av_error_to_string(ret)); + } - // Calculate presentation timestamp. - const s64 pts = get_pts(frame_data.timestamp_ms); + // Update the context in case the frame format has changed + av.sws = sws_getCachedContext(av.sws, frame_data.width, frame_data.height, in_format, + av.video.context->width, av.video.context->height, out_pix_format, SWS_BICUBIC, nullptr, nullptr, nullptr); + if (!av.sws) + { + media_log.error("video_encoder: sws_getCachedContext failed"); + has_error = true; + break; + } - // We need to skip this frame if it has the same timestamp. - if (pts <= last_pts) - { - media_log.notice("video_encoder: skipping frame. last_pts=%d, pts=%d", last_pts, pts); - continue; - } + if (int err = sws_scale(av.sws, in_data, in_line, 0, frame_data.height, av.video.frame->data, av.video.frame->linesize); err < 0) + { + media_log.error("video_encoder: sws_scale failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + break; + } - if (int err = av_frame_make_writable(av.frame); err < 0) - { - media_log.error("video_encoder: av_frame_make_writable failed. Error: %d='%s'", err, av_error_to_string(err)); - has_error = true; - break; - } + av.video.frame->pts = pts; - u8* in_data[4]{}; - int in_line[4]{}; + if (int err = avcodec_send_frame(av.video.context, av.video.frame); err < 0) + { + media_log.error("video_encoder: avcodec_send_frame for video failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + break; + } - const AVPixelFormat in_format = static_cast(frame_data.av_pixel_format); + flush(av.video); - if (int ret = av_image_fill_linesizes(in_line, in_format, frame_data.width); ret < 0) - { - fmt::throw_exception("video_encoder: av_image_fill_linesizes failed (ret=0x%x): %s", ret, utils::av_error_to_string(ret)); + last_video_pts = av.video.frame->pts; + m_last_video_pts = last_video_pts; + } + } } - if (int ret = av_image_fill_pointers(in_data, in_format, frame_data.height, frame_data.data.data(), in_line); ret < 0) + // Fetch audio sample + encoder_sample sample_data; + bool got_sample = false; { - fmt::throw_exception("video_encoder: av_image_fill_pointers failed (ret=0x%x): %s", ret, utils::av_error_to_string(ret)); - } + m_audio_mtx.lock(); - // Update the context in case the frame format has changed - av.sws = sws_getCachedContext(av.sws, frame_data.width, frame_data.height, in_format, - av.context->width, av.context->height, out_format, SWS_BICUBIC, nullptr, nullptr, nullptr); - if (!av.sws) - { - media_log.error("video_encoder: sws_getCachedContext failed"); - has_error = true; - break; - } + if (m_samples_to_encode.empty()) + { + m_audio_mtx.unlock(); + } + else + { + sample_data = std::move(m_samples_to_encode.front()); + m_samples_to_encode.pop_front(); + m_audio_mtx.unlock(); - if (int err = sws_scale(av.sws, in_data, in_line, 0, frame_data.height, av.frame->data, av.frame->linesize); err < 0) - { - media_log.error("video_encoder: sws_scale failed. Error: %d='%s'", err, av_error_to_string(err)); - has_error = true; - break; - } + got_sample = true; - av.frame->pts = pts; + if (sample_data.channels != av.audio.frame->ch_layout.nb_channels) + { + fmt::throw_exception("video_encoder: Audio sample channel count %d does not match frame channel count %d", sample_data.channels, av.audio.frame->ch_layout.nb_channels); + } - if (int err = avcodec_send_frame(av.context, av.frame); err < 0) - { - media_log.error("video_encoder: avcodec_send_frame failed. Error: %d='%s'", err, av_error_to_string(err)); - has_error = true; - break; + // Calculate presentation timestamp. + const s64 pts = get_audio_pts(sample_data.timestamp_us); + + // We need to skip this frame if it has the same timestamp. + if (pts <= last_audio_pts) + { + media_log.trace("video_encoder: skipping sample. last_pts=%d, pts=%d, timestamp_us=%d", last_audio_pts, pts, sample_data.timestamp_us); + } + else if (av.audio.context) + { + media_log.trace("video_encoder: adding new sample. timestamp_us=%d", sample_data.timestamp_us); + + static constexpr bool swap_endianness = false; + + const auto send_frame = [&]() + { + if (audio_frame_sample_count < static_cast(av.audio.frame->nb_samples)) + { + return; + } + + audio_frame_sample_count = 0; + + if (int err = av_frame_make_writable(av.audio.frame); err < 0) + { + media_log.error("video_encoder: av_frame_make_writable failed. Error: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } + + // NOTE: The ffmpeg channel layout should match our downmix channel layout + if (sample_fmt_is_planar) + { + const int channels = av.audio.frame->ch_layout.nb_channels; + const int samples = av.audio.frame->nb_samples; + + for (int ch = 0; ch < channels; ch++) + { + f32* dst = reinterpret_cast(av.audio.frame->data[ch]); + + for (int sample = 0; sample < samples; sample++) + { + dst[sample] = *reinterpret_cast(&audio_frame[(sample * channels + ch) * sizeof(f32)]); + } + } + } + else + { + std::memcpy(av.audio.frame->data[0], audio_frame.data(), audio_frame.size()); + } + + av.audio.frame->pts = last_audio_frame_pts + av.audio.frame->nb_samples; + + if (int err = avcodec_send_frame(av.audio.context, av.audio.frame); err < 0) + { + media_log.error("video_encoder: avcodec_send_frame failed: %d='%s'", err, av_error_to_string(err)); + has_error = true; + return; + } + + flush(av.audio); + + last_audio_frame_pts = av.audio.frame->pts; + }; + + const auto add_encoder_sample = [&](bool add_new_sample, u32 silence_to_add = 0) + { + const auto update_last_pts = [&](u32 samples_to_add) + { + const u32 sample_count = audio_sample_remainder + samples_to_add; + const u32 pts_to_add = sample_count / m_samples_per_block; + audio_sample_remainder = sample_count % m_samples_per_block; + last_audio_pts += pts_to_add; + }; + + // Copy as many old samples to our audio frame as possible + if (leftover_sample_count > 0) + { + const u32 samples_to_add = std::min(leftover_sample_count, av.audio.frame->nb_samples - audio_frame_sample_count); + + if (samples_to_add > 0) + { + const u8* src = &last_samples.data[(last_samples.sample_count - leftover_sample_count) * last_samples.channels * sizeof(f32)]; + u8* dst = &audio_frame[audio_frame_sample_count * last_samples.channels * sizeof(f32)]; + copy_samples(src, dst, samples_to_add * last_samples.channels, swap_endianness); + audio_frame_sample_count += samples_to_add; + leftover_sample_count -= samples_to_add; + update_last_pts(samples_to_add); + } + + if (samples_to_add < leftover_sample_count) + { + media_log.error("video_encoder: audio frame buffer is already filled entirely by last sample package..."); + } + } + else if (silence_to_add > 0) + { + const u32 samples_to_add = std::min(silence_to_add, av.audio.frame->nb_samples - audio_frame_sample_count); + + if (samples_to_add > 0) + { + u8* dst = &audio_frame[audio_frame_sample_count * av.audio.frame->ch_layout.nb_channels * sizeof(f32)]; + std::memset(dst, 0, samples_to_add * sample_data.channels * sizeof(f32)); + audio_frame_sample_count += samples_to_add; + update_last_pts(samples_to_add); + } + } + else if (add_new_sample) + { + // Copy as many new samples to our audio frame as possible + const u32 samples_to_add = std::min(sample_data.sample_count, av.audio.frame->nb_samples - audio_frame_sample_count); + + if (samples_to_add > 0) + { + const u8* src = sample_data.data.data(); + u8* dst = &audio_frame[audio_frame_sample_count * sample_data.channels * sizeof(f32)]; + copy_samples(src, dst, samples_to_add * sample_data.channels, swap_endianness); + audio_frame_sample_count += samples_to_add; + update_last_pts(samples_to_add); + } + + if (samples_to_add < sample_data.sample_count) + { + // Save this sample package for the next loop if it wasn't fully used. + leftover_sample_count = sample_data.sample_count - samples_to_add; + } + else + { + // Mark this sample package as fully used. + leftover_sample_count = 0; + } + + last_samples = std::move(sample_data); + } + + send_frame(); + }; + + for (u32 sample = 0; !has_error;) + { + if (leftover_sample_count > 0) + { + // Add leftover samples + add_encoder_sample(false); + } + else if (pts > (last_audio_pts + 1)) + { + // Add silence to fill the gap + const u32 silence_to_add = static_cast(pts - (last_audio_pts + 1)); + add_encoder_sample(false, silence_to_add); + } + else if (sample == 0) + { + // Add new samples + add_encoder_sample(true); + sample++; + } + else + { + break; + } + } + + m_last_audio_pts = last_audio_pts; + } + } } - flush(); + if (!got_frame && !got_sample) + { + if (m_flush) + { + m_flush = false; + + if (!m_paused) + { + // We only stop the thread after a flush if we are not paused + break; + } + } - last_pts = av.frame->pts; + // We only actually pause after we process all frames + const u64 sleeptime_us = m_paused ? 10000 : 1; + thread_ctrl::wait_for(sleeptime_us); + continue; + } + } - m_last_pts = last_pts; + if (av.video.context) + { + if (int err = avcodec_send_frame(av.video.context, nullptr); err != 0) + { + media_log.error("video_encoder: final avcodec_send_frame failed. Error: %d='%s'", err, av_error_to_string(err)); + } } - if (int err = avcodec_send_frame(av.context, nullptr); err != 0) + if (av.audio.context) { - media_log.error("video_encoder: final avcodec_send_frame failed. Error: %d='%s'", err, av_error_to_string(err)); + if (int err = avcodec_send_frame(av.audio.context, nullptr); err != 0) + { + media_log.error("video_encoder: final avcodec_send_frame failed. Error: %d='%s'", err, av_error_to_string(err)); + } } - flush(); + flush(av.video); + flush(av.audio); - if (int err = av_write_trailer(av.format); err != 0) + if (int err = av_write_trailer(av.format_context); err != 0) { media_log.error("video_encoder: av_write_trailer failed. Error: %d='%s'", err, av_error_to_string(err)); } - if (int err = avio_close(av.format->pb); err != 0) + if (int err = avio_close(av.format_context->pb); err != 0) { media_log.error("video_encoder: avio_close failed. Error: %d='%s'", err, av_error_to_string(err)); } diff --git a/rpcs3/util/media_utils.h b/rpcs3/util/media_utils.h index 2718a8061765..9666e53d8644 100644 --- a/rpcs3/util/media_utils.h +++ b/rpcs3/util/media_utils.h @@ -73,10 +73,9 @@ namespace utils u32 set_next_index(bool next); shared_mutex m_mtx; - const s32 sample_rate = 48000; + static constexpr s32 sample_rate = 48000; std::vector data; atomic_t m_size = 0; - atomic_t duration_ms = 0; atomic_t track_fully_decoded{0}; atomic_t track_fully_consumed{0}; atomic_t has_error{false}; @@ -88,7 +87,7 @@ namespace utils std::unique_ptr>> m_thread; }; - class video_encoder : public utils::image_sink + class video_encoder : public utils::video_sink { public: video_encoder(); @@ -108,7 +107,7 @@ namespace utils }; std::string path() const; - s64 last_pts() const; + s64 last_video_pts() const; void set_path(const std::string& path); void set_framerate(u32 framerate); @@ -118,32 +117,33 @@ namespace utils void set_max_b_frames(s32 max_b_frames); void set_gop_size(s32 gop_size); void set_sample_rate(u32 sample_rate); + void set_audio_channels(u32 channels); void set_audio_bitrate(u32 bitrate); void set_audio_codec(s32 codec_id); - void add_frame(std::vector& frame, u32 pitch, u32 width, u32 height, s32 pixel_format, usz timestamp_ms) override; - void pause(bool flush = true); + void pause(bool flush = true) override; void stop(bool flush = true) override; + void resume() override; void encode(); private: std::string m_path; - s64 m_last_pts = 0; + s64 m_last_audio_pts = 0; + s64 m_last_video_pts = 0; // Thread control std::unique_ptr>> m_thread; atomic_t m_running = false; - atomic_t m_paused = false; // Video parameters u32 m_video_bitrate_bps = 0; - s32 m_video_codec_id = 12; // AV_CODEC_ID_MPEG4; + s32 m_video_codec_id = 12; // AV_CODEC_ID_MPEG4 s32 m_max_b_frames = 2; s32 m_gop_size = 12; frame_format m_out_format{}; // Audio parameters - u32 m_sample_rate = 48000; - u32 m_audio_bitrate_bps = 96000; + u32 m_channels = 2; + u32 m_audio_bitrate_bps = 320000; s32 m_audio_codec_id = 86018; // AV_CODEC_ID_AAC }; } diff --git a/rpcs3/util/video_provider.cpp b/rpcs3/util/video_provider.cpp index d919137733b6..a5888daddef2 100644 --- a/rpcs3/util/video_provider.cpp +++ b/rpcs3/util/video_provider.cpp @@ -34,37 +34,37 @@ namespace utils g_recording_mode = recording_mode::stopped; } - bool video_provider::set_image_sink(std::shared_ptr sink, recording_mode type) + bool video_provider::set_video_sink(std::shared_ptr sink, recording_mode type) { - media_log.notice("video_provider: setting new image sink. sink=%d, type=%s", !!sink, type); + media_log.notice("video_provider: setting new video sink. sink=%d, type=%s", !!sink, type); if (type == recording_mode::stopped) { // Prevent misuse. type is supposed to be a valid state. - media_log.error("video_provider: cannot set image sink with type %s", type); + media_log.error("video_provider: cannot set video sink with type %s", type); return false; } std::lock_guard lock(m_mutex); - if (m_image_sink) + if (m_video_sink) { // cell has preference if (m_type == recording_mode::cell && m_type != type) { - media_log.warning("video_provider: cannot set image sink with type %s if type %s is active", type, m_type); + media_log.warning("video_provider: cannot set video sink with type %s if type %s is active", type, m_type); return false; } - if (m_type != type || m_image_sink != sink) + if (m_type != type || m_video_sink != sink) { - media_log.warning("video_provider: stopping current image sink of type %s", m_type); - m_image_sink->stop(); + media_log.warning("video_provider: stopping current video sink of type %s", m_type); + m_video_sink->stop(); } } m_type = sink ? type : recording_mode::stopped; - m_image_sink = sink; + m_video_sink = sink; if (m_type == recording_mode::stopped) { @@ -74,64 +74,132 @@ namespace utils return true; } - void video_provider::set_pause_time(usz pause_time_ms) + void video_provider::set_pause_time_us(usz pause_time_us) { std::lock_guard lock(m_mutex); - m_pause_time_ms = pause_time_ms; + m_pause_time_us = pause_time_us; + } + + recording_mode video_provider::check_mode() + { + if (!m_video_sink || m_video_sink->has_error) + { + g_recording_mode = recording_mode::stopped; + rsx::overlays::queue_message(localized_string_id::RECORDING_ABORTED); + } + + if (g_recording_mode == recording_mode::stopped) + { + m_active = false; + return g_recording_mode; + } + + if (!m_active.exchange(true)) + { + m_current_encoder_frame = 0; + m_current_encoder_sample = 0; + m_last_video_pts_incoming = -1; + m_last_audio_pts_incoming = -1; + } + + if (m_current_encoder_frame == 0 && m_current_encoder_sample == 0) + { + m_encoder_start = steady_clock::now(); + } + + return g_recording_mode; } bool video_provider::can_consume_frame() { std::lock_guard lock(m_mutex); - if (!m_image_sink) + if (!m_video_sink || !m_video_sink->use_internal_video) return false; - const usz timestamp_ms = std::chrono::duration_cast(steady_clock::now() - m_encoder_start).count() - m_pause_time_ms; - const s64 pts = m_image_sink->get_pts(timestamp_ms); - return pts > m_last_pts_incoming; + const usz elapsed_us = std::chrono::duration_cast(steady_clock::now() - m_encoder_start).count(); + ensure(elapsed_us >= m_pause_time_us); + + const usz timestamp_ms = (elapsed_us - m_pause_time_us) / 1000; + const s64 pts = m_video_sink->get_pts(timestamp_ms); + return pts > m_last_video_pts_incoming; } void video_provider::present_frame(std::vector& data, u32 pitch, u32 width, u32 height, bool is_bgra) { std::lock_guard lock(m_mutex); - if (!m_image_sink || m_image_sink->has_error) + if (check_mode() == recording_mode::stopped) { - g_recording_mode = recording_mode::stopped; - rsx::overlays::queue_message(localized_string_id::RECORDING_ABORTED); + return; } - if (g_recording_mode == recording_mode::stopped) + // Calculate presentation timestamp. + const usz elapsed_us = std::chrono::duration_cast(steady_clock::now() - m_encoder_start).count(); + ensure(elapsed_us >= m_pause_time_us); + + const usz timestamp_ms = (elapsed_us - m_pause_time_us) / 1000; + const s64 pts = m_video_sink->get_pts(timestamp_ms); + + // We can just skip this frame if it has the same timestamp. + if (pts <= m_last_video_pts_incoming) { - m_active = false; return; } - if (!m_active.exchange(true)) + if (m_video_sink->add_frame(data, pitch, width, height, is_bgra ? AVPixelFormat::AV_PIX_FMT_BGRA : AVPixelFormat::AV_PIX_FMT_RGBA, timestamp_ms)) { - m_current_encoder_frame = 0; - m_last_pts_incoming = -1; + m_last_video_pts_incoming = pts; + m_current_encoder_frame++; + } + } + + bool video_provider::can_consume_sample() + { + std::lock_guard lock(m_mutex); + + if (!m_video_sink || !m_video_sink->use_internal_audio) + return false; + + const usz elapsed_us = std::chrono::duration_cast(steady_clock::now() - m_encoder_start).count(); + ensure(elapsed_us >= m_pause_time_us); + + const usz timestamp_us = elapsed_us - m_pause_time_us; + const s64 pts = m_video_sink->get_audio_pts(timestamp_us); + return pts > m_last_audio_pts_incoming; + } + + void video_provider::present_samples(u8* buf, u32 sample_count, u16 channels) + { + if (!buf || !sample_count || !channels) + { + return; } - if (m_current_encoder_frame == 0) + std::lock_guard lock(m_mutex); + + if (check_mode() == recording_mode::stopped) { - m_encoder_start = steady_clock::now(); + return; } // Calculate presentation timestamp. - const usz timestamp_ms = std::chrono::duration_cast(steady_clock::now() - m_encoder_start).count() - m_pause_time_ms; - const s64 pts = m_image_sink->get_pts(timestamp_ms); + const usz elapsed_us = std::chrono::duration_cast(steady_clock::now() - m_encoder_start).count(); + ensure(elapsed_us >= m_pause_time_us); - // We can just skip this frame if it has the same timestamp. - if (pts <= m_last_pts_incoming) + const usz timestamp_us = elapsed_us - m_pause_time_us; + const s64 pts = m_video_sink->get_audio_pts(timestamp_us); + + // We can just skip this sample if it has the same timestamp. + if (pts <= m_last_audio_pts_incoming) { return; } - m_last_pts_incoming = pts; - - m_current_encoder_frame++; - m_image_sink->add_frame(data, pitch, width, height, is_bgra ? AVPixelFormat::AV_PIX_FMT_BGRA : AVPixelFormat::AV_PIX_FMT_RGBA, timestamp_ms); + if (m_video_sink->add_audio_samples(buf, sample_count, channels, timestamp_us)) + { + m_last_audio_pts_incoming = pts; + m_current_encoder_sample += sample_count; + } } } diff --git a/rpcs3/util/video_provider.h b/rpcs3/util/video_provider.h index 31a051a11283..0e30b01f7ec1 100644 --- a/rpcs3/util/video_provider.h +++ b/rpcs3/util/video_provider.h @@ -1,6 +1,6 @@ #pragma once -#include "image_sink.h" +#include "video_sink.h" enum class recording_mode { @@ -17,20 +17,28 @@ namespace utils video_provider() = default; ~video_provider(); - bool set_image_sink(std::shared_ptr sink, recording_mode type); - void set_pause_time(usz pause_time_ms); + bool set_video_sink(std::shared_ptr sink, recording_mode type); + void set_pause_time_us(usz pause_time_us); + bool can_consume_frame(); void present_frame(std::vector& data, u32 pitch, u32 width, u32 height, bool is_bgra); + bool can_consume_sample(); + void present_samples(u8* buf, u32 sample_count, u16 channels); + private: + recording_mode check_mode(); + recording_mode m_type = recording_mode::stopped; - std::shared_ptr m_image_sink; + std::shared_ptr m_video_sink; shared_mutex m_mutex{}; atomic_t m_active{false}; atomic_t m_current_encoder_frame{0}; + atomic_t m_current_encoder_sample{0}; steady_clock::time_point m_encoder_start{}; - s64 m_last_pts_incoming = -1; - usz m_pause_time_ms = 0; + s64 m_last_video_pts_incoming = -1; + s64 m_last_audio_pts_incoming = -1; + usz m_pause_time_us = 0; }; } // namespace utils diff --git a/rpcs3/util/video_sink.h b/rpcs3/util/video_sink.h new file mode 100644 index 000000000000..9f1aadd65e21 --- /dev/null +++ b/rpcs3/util/video_sink.h @@ -0,0 +1,115 @@ +#pragma once + +#include "util/types.hpp" +#include "util/atomic.hpp" +#include "Utilities/mutex.h" + +#include +#include + +namespace utils +{ + class video_sink + { + public: + video_sink() = default; + + virtual void stop(bool flush = true) = 0; + virtual void pause(bool flush = true) = 0; + virtual void resume() = 0; + + bool add_frame(std::vector& frame, u32 pitch, u32 width, u32 height, s32 pixel_format, usz timestamp_ms) + { + // Do not allow new frames while flushing or paused + if (m_flush || m_paused) + return false; + + std::lock_guard lock(m_mtx); + m_frames_to_encode.emplace_back(timestamp_ms, pitch, width, height, pixel_format, std::move(frame)); + return true; + } + + bool add_audio_samples(const u8* buf, u32 sample_count, u16 channels, usz timestamp_us) + { + // Do not allow new samples while flushing or paused + if (m_flush || m_paused || !buf || !sample_count || !channels) + return false; + + std::vector sample(buf, buf + sample_count * channels * sizeof(f32)); + std::lock_guard lock(m_audio_mtx); + m_samples_to_encode.emplace_back(timestamp_us, sample_count, channels, std::move(sample)); + return true; + } + + s64 get_pts(usz timestamp_ms) const + { + return static_cast(std::round((timestamp_ms * m_framerate) / 1000.0)); + } + + s64 get_audio_pts(usz timestamp_us) const + { + static constexpr f64 us_per_sec = 1000000.0; + const f64 us_per_block = us_per_sec / (m_sample_rate / static_cast(m_samples_per_block)); + return static_cast(std::round(timestamp_us / us_per_block)); + } + + usz get_timestamp_ms(s64 pts) const + { + return static_cast(std::round((pts * 1000) / static_cast(m_framerate))); + } + + usz get_audio_timestamp_us(s64 pts) const + { + static constexpr f64 us_per_sec = 1000000.0; + const f64 us_per_block = us_per_sec / (m_sample_rate / static_cast(m_samples_per_block)); + return static_cast(pts * us_per_block); + } + + atomic_t has_error{false}; + + struct encoder_frame + { + encoder_frame() = default; + encoder_frame(usz timestamp_ms, u32 pitch, u32 width, u32 height, s32 av_pixel_format, std::vector&& data) + : timestamp_ms(timestamp_ms), pitch(pitch), width(width), height(height), av_pixel_format(av_pixel_format), data(std::move(data)) + {} + + s64 pts = -1; // Optional + usz timestamp_ms = 0; + u32 pitch = 0; + u32 width = 0; + u32 height = 0; + s32 av_pixel_format = 0; // NOTE: Make sure this is a valid AVPixelFormat + std::vector data; + }; + + struct encoder_sample + { + encoder_sample() = default; + encoder_sample(usz timestamp_us, u32 sample_count, u16 channels, std::vector&& data) + : timestamp_us(timestamp_us), sample_count(sample_count), channels(channels), data(std::move(data)) + { + } + + usz timestamp_us = 0; + u32 sample_count = 0; + u16 channels = 0; + std::vector data; + }; + + // These two variables should only be set once before we start encoding, so we don't need mutexes or atomics. + bool use_internal_audio = false; // True if we want to fetch samples from cellAudio + bool use_internal_video = false; // True if we want to fetch frames from rsx + + protected: + shared_mutex m_mtx; + std::deque m_frames_to_encode; + shared_mutex m_audio_mtx; + std::deque m_samples_to_encode; + atomic_t m_paused = false; + atomic_t m_flush = false; + u32 m_framerate = 30; + u32 m_sample_rate = 48000; + static constexpr u32 m_samples_per_block = 256; + }; +}