Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FrameMapper Audio Crackles/Popping Fixes #653

Merged
merged 3 commits into from
Apr 9, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 0 additions & 4 deletions src/CVObjectDetection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -477,10 +477,6 @@ bool CVObjectDetection::_LoadObjDetectdData(){
detectionsData[id] = CVDetectionData(classIds, confidences, boxes, id);
}

// Show the time stamp from the last update in object detector data file
if (objMessage.has_last_updated())
cout << " Loaded Data. Saved Time Stamp: " << TimeUtil::ToString(objMessage.last_updated()) << endl;

// Delete all global objects allocated by libprotobuf.
google::protobuf::ShutdownProtobufLibrary();

Expand Down
11 changes: 0 additions & 11 deletions src/CVStabilization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,6 @@ void CVStabilization::stabilizeClip(openshot::Clip& video, size_t _start, size_t
// Update progress
processingController->SetProgress(uint(100*(frame_number-start)/(end-start)));
}
// Show average and max transformation parameters
std::cout<<"\nAVERAGE DX: "<<avr_dx/(frame_number-1)<<" AVERAGE DY: "<<avr_dy/(frame_number-1)<<" AVERAGE A: "<<avr_da/(frame_number-1)<<"\n";
std::cout<<"MAX X: "<<max_dx<<" MAX Y: "<<max_dy<<" MAX A: "<<max_da<<"\n\n";

// Calculate trajectory data
std::vector <CamTrajectory> trajectory = ComputeFramesTrajectory();
Expand Down Expand Up @@ -194,9 +191,6 @@ bool CVStabilization::TrackFrameFeatures(cv::Mat frame, size_t frameNum){
prev_to_cur_transform.push_back(TransformParam(dx, dy, da));
frame.copyTo(prev_grey);

// Show processing info
cout << "Frame: " << frameNum << " - good optical flow: " << prev_corner2.size() << endl;

return true;
}

Expand Down Expand Up @@ -423,11 +417,6 @@ bool CVStabilization::_LoadStabilizedData(){
transformationData[id] = TransformParam(dx,dy,da);
}

// Show the time stamp from the last update in stabilization data file
if (stabilizationMessage.has_last_updated()) {
cout << " Loaded Data. Saved Time Stamp: " << TimeUtil::ToString(stabilizationMessage.last_updated()) << endl;
}

// Delete all global objects allocated by libprotobuf.
google::protobuf::ShutdownProtobufLibrary();

Expand Down
5 changes: 0 additions & 5 deletions src/CVTracker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,11 +348,6 @@ bool CVTracker::_LoadTrackedData(){
trackedDataById[id] = FrameData(id, rotation, x1, y1, x2, y2);
}

// Show the time stamp from the last update in tracker data file
if (trackerMessage.has_last_updated()) {
cout << " Loaded Data. Saved Time Stamp: " << TimeUtil::ToString(trackerMessage.last_updated()) << endl;
}

// Delete all global objects allocated by libprotobuf.
google::protobuf::ShutdownProtobufLibrary();

Expand Down
6 changes: 2 additions & 4 deletions src/FFmpegWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1669,10 +1669,8 @@ void FFmpegWriter::write_audio_packets(bool is_final) {
av_opt_set_int(avr, "out_channels", info.channels, 0);
SWR_INIT(avr);
}
int nb_samples = 0;

// Convert audio samples
nb_samples = SWR_CONVERT(
int nb_samples = SWR_CONVERT(
avr, // audio resample context
audio_converted->data, // output data pointers
audio_converted->linesize[0], // output plane size, in bytes. (0 if unknown)
Expand All @@ -1683,7 +1681,7 @@ void FFmpegWriter::write_audio_packets(bool is_final) {
);

// Set remaining samples
remaining_frame_samples = nb_samples * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
remaining_frame_samples = total_frame_samples;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This fixes a bug where MONO channel layout crashes libopenshot


// Create a new array (to hold all resampled S16 audio samples)
all_resampled_samples = (int16_t *) av_malloc(
Expand Down
32 changes: 27 additions & 5 deletions src/FrameMapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ using namespace std;
using namespace openshot;

FrameMapper::FrameMapper(ReaderBase *reader, Fraction target, PulldownType target_pulldown, int target_sample_rate, int target_channels, ChannelLayout target_channel_layout) :
reader(reader), target(target), pulldown(target_pulldown), is_dirty(true), avr(NULL)
reader(reader), target(target), pulldown(target_pulldown), is_dirty(true), avr(NULL), parent_position(0.0)
{
// Set the original frame rate from the reader
original = Fraction(reader->info.fps.num, reader->info.fps.den);
Expand Down Expand Up @@ -112,6 +112,16 @@ void FrameMapper::Init()
fields.clear();
frames.clear();

// Find parent position (if any)
Clip *parent = (Clip *) ParentClip();
if (parent) {
parent_position = parent->Position();
parent_start = parent->Start();
} else {
parent_position = 0.0;
parent_start = 0.0;
}

// Mark as not dirty
is_dirty = false;

Expand Down Expand Up @@ -263,8 +273,9 @@ void FrameMapper::Init()

while (remaining_samples > 0)
{
// get original samples
int original_samples = Frame::GetSamplesPerFrame(AdjustFrameNumber(end_samples_frame), original, reader->info.sample_rate, reader->info.channels) - end_samples_position;
// Get original samples (with NO framerate adjustments)
// This is the original reader's frame numbers
int original_samples = Frame::GetSamplesPerFrame(end_samples_frame, original, reader->info.sample_rate, reader->info.channels) - end_samples_position;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing AdjustFrameNumber() method from original samples calculation, since this access the original reader, and is always in original frame #'s... never mapped to the Timeline offset.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great! Does this also fix the export issue? When we were getting deformed waveforms when exporting to different FPS?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably! But I would love to test that again. The new unit test had hundreds of thousands of failures without this fix. And zero failures with it. But I didn't notice the failures until the Timeline merged the audio, and some frames had the wrong # of samples, and thus introduced some high frequency pops.


// Enough samples
if (original_samples >= remaining_samples)
Expand Down Expand Up @@ -395,9 +406,20 @@ std::shared_ptr<Frame> FrameMapper::GetFrame(int64_t requested_frame)
// Create a scoped lock, allowing only a single thread to run the following code at one time
const GenericScopedLock<CriticalSection> lock(getFrameCriticalSection);

// Check if mappings are dirty (and need to be recalculated)
// Find parent properties (if any)
Clip *parent = (Clip *) ParentClip();
if (parent) {
float position = parent->Position();
float start = parent->Start();
if (parent_position != position || parent_start != start) {
// Force dirty if parent clip has moved or been trimmed
// since this heavily affects frame #s and audio mappings
is_dirty = true;
}
}

// Check if mappings are dirty (and need to be recalculated)
if (is_dirty)
// Recalculate mappings
Init();

// Check final cache a 2nd time (due to potential lock already generating this frame)
Expand Down
2 changes: 2 additions & 0 deletions src/FrameMapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ namespace openshot
ReaderBase *reader; // The source video reader
CacheMemory final_cache; // Cache of actual Frame objects
bool is_dirty; // When this is true, the next call to GetFrame will re-init the mapping
float parent_position; // Position of parent clip (which is used to generate the audio mapping)
float parent_start; // Start of parent clip (which is used to generate the audio mapping)
SWRCONTEXT *avr; // Audio resampling context object

// Internal methods used by init
Expand Down
5 changes: 0 additions & 5 deletions src/effects/Stabilizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,11 +156,6 @@ bool Stabilizer::LoadStabilizedData(std::string inputFilePath){
transformationData[id] = EffectTransformParam(dx,dy,da);
}

// Show the time stamp from the last update in stabilization data file
if (stabilizationMessage.has_last_updated()) {
cout << " Loaded Data. Saved Time Stamp: " << TimeUtil::ToString(stabilizationMessage.last_updated()) << endl;
}

// Delete all global objects allocated by libprotobuf.
google::protobuf::ShutdownProtobufLibrary();

Expand Down
5 changes: 0 additions & 5 deletions src/effects/Tracker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,6 @@ bool Tracker::LoadTrackedData(std::string inputFilePath){
trackedDataById[id] = EffectFrameData(id, rotation, x1, y1, x2, y2);
}

// Show the time stamp from the last update in tracker data file
if (trackerMessage.has_last_updated()) {
cout << " Loaded Data. Saved Time Stamp: " << TimeUtil::ToString(trackerMessage.last_updated()) << endl;
}

// Delete all global objects allocated by libprotobuf.
google::protobuf::ShutdownProtobufLibrary();

Expand Down
131 changes: 130 additions & 1 deletion tests/FrameMapper_Tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ TEST(redistribute_samples_per_frame) {
// (i.e. same exact audio sample data). We use a Timeline to overlap these clips
// (and offset 1 clip by 1 frame), and we verify that the correct # of samples is returned by each
// Clip Frame instance. In the past, FrameMappers would sometimes generate the wrong # of samples
// in a frame, and the Timeline recieve mismatching # of audio samples from 2 or more clips...
// in a frame, and the Timeline receive mismatching # of audio samples from 2 or more clips...
// causing audio data to be truncated and lost (i.e. creating a pop).

// Create cache object to hold test frames
Expand Down Expand Up @@ -485,6 +485,135 @@ TEST(redistribute_samples_per_frame) {
r.Close();
}

TEST(distribute_samples) {
// This test verifies that audio data can be redistributed correctly
// between common and uncommon frame rates
int sample_rate = 48000;
int channels = 2;
int num_seconds = 1;

// Source frame rates (varies the # of samples per frame)
vector<openshot::Fraction> rates = { openshot::Fraction(30,1),
openshot::Fraction(24,1) ,
openshot::Fraction(119,4),
openshot::Fraction(30000,1001) };
for (auto& frame_rate : rates) {
// Init sin wave variables
int OFFSET = 0;
float AMPLITUDE = 0.75;
double ANGLE = 0.0;
int NUM_SAMPLES = 100;

// Create cache object to hold test frames
CacheMemory cache;

// Let's create some test frames
for (int64_t frame_number = 1; frame_number <= (frame_rate.ToFloat() * num_seconds * 2); frame_number++) {
// Create blank frame (with specific frame #, samples, and channels)
int sample_count = openshot::Frame::GetSamplesPerFrame(frame_number, frame_rate, sample_rate, channels);
std::shared_ptr<openshot::Frame> f(new openshot::Frame(frame_number, sample_count, channels));
f->SampleRate(sample_rate);

// Create test samples with sin wave (predictable values)
float *audio_buffer = new float[sample_count * 2];
for (int sample_number = 0; sample_number < sample_count; sample_number++) {
// Calculate sin wave
float sample_value = float(AMPLITUDE * sin(ANGLE) + OFFSET);
audio_buffer[sample_number] = abs(sample_value);
ANGLE += (2 * M_PI) / NUM_SAMPLES;
}

// Add custom audio samples to Frame (bool replaceSamples, int destChannel, int destStartSample, const float* source,
f->AddAudio(true, 0, 0, audio_buffer, sample_count, 1.0); // add channel 1
f->AddAudio(true, 1, 0, audio_buffer, sample_count, 1.0); // add channel 2

// Add test frame to dummy reader
cache.Add(f);
}

// Create a default fraction (should be 1/1)
openshot::DummyReader r(frame_rate, 1920, 1080, sample_rate, channels, 30.0, &cache);
r.Open(); // Open the reader

// Target frame rates
vector<openshot::Fraction> mapped_rates = { openshot::Fraction(30,1),
openshot::Fraction(24,1) ,
openshot::Fraction(119,4),
openshot::Fraction(30000,1001) };
for (auto &mapped_rate : mapped_rates) {
// Reset SIN wave
ANGLE = 0.0;

// Map to different fps
FrameMapper map(&r, mapped_rate, PULLDOWN_NONE, sample_rate, channels, LAYOUT_STEREO);
map.info.has_audio = true;
map.Open();

// Loop through samples, and verify FrameMapper didn't mess up individual sample values
int num_samples = 0;
for (int frame_index = 1; frame_index <= (map.info.fps.ToInt() * num_seconds); frame_index++) {
int sample_count = map.GetFrame(frame_index)->GetAudioSamplesCount();
for (int sample_index = 0; sample_index < sample_count; sample_index++) {

// Calculate sin wave
float predicted_value = abs(float(AMPLITUDE * sin(ANGLE) + OFFSET));
ANGLE += (2 * M_PI) / NUM_SAMPLES;

// Verify each mapped sample value is correct (after being redistributed by the FrameMapper)
float mapped_value = map.GetFrame(frame_index)->GetAudioSample(0, sample_index, 1.0);
CHECK_CLOSE(predicted_value, mapped_value, 0.001);
}
// Increment sample value
num_samples += map.GetFrame(frame_index)->GetAudioSamplesCount();
}

float clip_position = 3.77;
int starting_clip_frame = round(clip_position * map.info.fps.ToFloat()) + 1;

// Create Timeline (same specs as reader)
Timeline t1(map.info.width, map.info.height, map.info.fps, map.info.sample_rate, map.info.channels,
map.info.channel_layout);

Clip c1;
c1.Reader(&map);
c1.Layer(1);
c1.Position(clip_position);
c1.Start(0.0);
c1.End(10.0);

// Add clips
t1.AddClip(&c1);
t1.Open();

// Reset SIN wave
ANGLE = 0.0;

for (int frame_index = starting_clip_frame; frame_index < (starting_clip_frame + (t1.info.fps.ToFloat() * num_seconds)); frame_index++) {
for (int sample_index = 0; sample_index < t1.GetFrame(frame_index)->GetAudioSamplesCount(); sample_index++) {
// Calculate sin wave
float predicted_value = abs(float(AMPLITUDE * sin(ANGLE) + OFFSET));
ANGLE += (2 * M_PI) / NUM_SAMPLES;

// Verify each mapped sample value is correct (after being redistributed by the FrameMapper)
float timeline_value = t1.GetFrame(frame_index)->GetAudioSample(0, sample_index, 1.0);

// Testing wave value X 2, since we have 2 overlapping clips
CHECK_CLOSE(predicted_value, timeline_value, 0.001);
}
}

// Close mapper
map.Close();
t1.Close();
}

// Clean up reader
r.Close();
cache.Clear();

} // for rates
}

TEST(Json)
{
DummyReader r(Fraction(30,1), 1280, 720, 48000, 2, 5.0);
Expand Down