Discontinuous timestamp support

When we index a format that is allowed to have discontinuous timestamps, try and break it down into distinct sections based on where the discontinuities occur, and reorder by PTS within them. We define a discontinuity as a packet timestamp difference that cannot possibly be frame reordering; in this case, more than 16 frames away, based on the duration of the first frame in the section. This probably breaks on insane VFR content. The correct way to do this is probably to fully decode, but that is a non-starter for a lot of usecases. By default, we try and normalize the sections into fully continuous timestamps. This adds a new field to the public FrameInfo struct to denote the original un-massaged PTS, and is thus an ABI break. It also adds a new field to the ffindex for this. The soversion, FFMS_VERSION, and ffindex version have been bumped. Implements #258. Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
FFMS · Jul 20, 2017 · f3fb0e0 · f3fb0e0
1 parent 5eb741e
commit f3fb0e0
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 9 deletions.
diff --git a/include/ffms.h b/include/ffms.h
@@ -402,6 +402,7 @@ typedef struct FFMS_FrameInfo {
     int64_t PTS;
     int RepeatPict;
     int KeyFrame;
+    int64_t OriginalPTS;
 } FFMS_FrameInfo;
 
 typedef struct FFMS_VideoProperties {

diff --git a/src/core/indexing.cpp b/src/core/indexing.cpp
@@ -35,7 +35,7 @@ extern "C" {
 }
 
 #define INDEXID 0x53920873
-#define INDEX_VERSION 3
+#define INDEX_VERSION 4
 
 SharedAVContext::~SharedAVContext() {
     avcodec_free_context(&CodecContext);
@@ -387,6 +387,7 @@ FFMS_Index *FFMS_Indexer::DoIndexing() {
         TrackIndices->emplace_back((int64_t)FormatContext->streams[i]->time_base.num * 1000,
             FormatContext->streams[i]->time_base.den,
             static_cast<FFMS_TrackType>(FormatContext->streams[i]->codecpar->codec_type),
+            !!(FormatContext->iformat->flags & AVFMT_TS_DISCONT),
             UseDTS);
 
         if (IndexMask.count(i) && FormatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {

diff --git a/src/core/track.cpp b/src/core/track.cpp
@@ -36,6 +36,7 @@ namespace {
 FrameInfo ReadFrame(ZipFile &stream, FrameInfo const& prev, const FFMS_TrackType TT) {
     FrameInfo f{};
     f.PTS = stream.Read<int64_t>() + prev.PTS;
+    f.OriginalPTS = stream.Read<int64_t>() + prev.OriginalPTS;
     f.KeyFrame = !!stream.Read<int8_t>();
     f.FilePos = stream.Read<int64_t>() + prev.FilePos;
     f.Hidden = !!stream.Read<int8_t>();
@@ -52,6 +53,7 @@ FrameInfo ReadFrame(ZipFile &stream, FrameInfo const& prev, const FFMS_TrackType
 
 static void WriteFrame(ZipFile &stream, FrameInfo const& f, FrameInfo const& prev, const FFMS_TrackType TT) {
     stream.Write(f.PTS - prev.PTS);
+    stream.Write(f.OriginalPTS - prev.OriginalPTS);
     stream.Write<int8_t>(f.KeyFrame);
     stream.Write(f.FilePos - prev.FilePos);
     stream.Write<uint8_t>(f.Hidden);
@@ -70,11 +72,12 @@ FFMS_Track::FFMS_Track()
 {
 }
 
-FFMS_Track::FFMS_Track(int64_t Num, int64_t Den, FFMS_TrackType TT, bool UseDTS, bool HasTS)
+FFMS_Track::FFMS_Track(int64_t Num, int64_t Den, FFMS_TrackType TT, bool HasTS, bool HasDiscontTS, bool UseDTS)
     : Data(std::make_shared<TrackData>())
     , TT(TT)
     , UseDTS(UseDTS)
-    , HasTS(HasTS) {
+    , HasTS(HasTS)
+    , HasDiscontTS(HasDiscontTS) {
     TB.Num = Num;
     TB.Den = Den;
 }
@@ -119,12 +122,12 @@ void FFMS_Track::Write(ZipFile &stream) const {
 }
 
 void FFMS_Track::AddVideoFrame(int64_t PTS, int RepeatPict, bool KeyFrame, int FrameType, int64_t FilePos, bool Hidden) {
-    Data->Frames.push_back({ PTS, FilePos, 0, 0, 0, FrameType, RepeatPict, KeyFrame, Hidden });
+    Data->Frames.push_back({ PTS, 0, FilePos, 0, 0, 0, FrameType, RepeatPict, KeyFrame, Hidden });
 }
 
 void FFMS_Track::AddAudioFrame(int64_t PTS, int64_t SampleStart, uint32_t SampleCount, bool KeyFrame, int64_t FilePos, bool Hidden) {
     if (SampleCount > 0) {
-        Data->Frames.push_back({ PTS, FilePos, SampleStart, SampleCount,
+        Data->Frames.push_back({ PTS, 0, FilePos, SampleStart, SampleCount,
             0, 0, 0, KeyFrame, Hidden });
     }
 }
@@ -309,12 +312,46 @@ void FFMS_Track::FinalizeTrack() {
     if (TT != FFMS_TYPE_VIDEO)
         return;
 
-    for (size_t i = 0; i < size(); i++)
+    for (size_t i = 0; i < size(); i++) {
         Frames[i].OriginalPos = i;
+        Frames[i].OriginalPTS = Frames[i].PTS;
+    }
 
     MaybeReorderFrames();
 
-    sort(Frames.begin(), Frames.end(), PTSComparison);
+    if (size() > 2 && HasDiscontTS) {
+        std::vector<size_t> secs = { 0 };
+
+        auto lastPTS = Frames[0].PTS;
+        const auto thresh = (Frames[1].PTS - Frames[0].PTS) * 16; // A bad approximation of 16 frames, the max reorder buffer size.
+        for (size_t i = 0; i < size(); i++) {
+            if (Frames[i].PTS < lastPTS && (lastPTS - Frames[i].PTS) > thresh && i + 1 < size()) {
+                secs.push_back(i);
+                i++; // Sections must be at least 2 frames long.
+            }
+            lastPTS = Frames[i].PTS;
+        }
+
+        // We need to sort each distinct sections by PTS to account for any reordering.
+        for (size_t i = 0; i < secs.size() - 1; i++)
+            sort(Frames.begin() + secs[i], Frames.begin() + secs[i + 1], PTSComparison);
+        sort(Frames.begin() + secs.back(), Frames.end(), PTSComparison);
+
+        // Try and make up some sane timestamps based on previous sections, while
+        // keeping the same frame durations.
+        for (size_t i = 1; i < secs.size(); i++) {
+            const auto shift = -(Frames[secs[i]].PTS) + (Frames[secs[i] + 1].PTS - Frames[secs[i]].PTS) + Frames[secs[i] - 1].PTS;
+            size_t end;
+            if (i == secs.size() - 1)
+                end = Frames.size();
+            else
+                end = secs[i + 1];
+            for (size_t j = secs[i]; j < end; j++)
+                Frames[j].PTS += shift;
+        }
+    } else {
+        sort(Frames.begin(), Frames.end(), PTSComparison);
+    }
 
     std::vector<size_t> ReorderTemp;
     ReorderTemp.reserve(size());
@@ -339,7 +376,7 @@ void FFMS_Track::GeneratePublicInfo() {
             continue;
         RealFrameNumbers.push_back(static_cast<int>(i));
 
-        FFMS_FrameInfo info = { Frames[i].PTS, Frames[i].RepeatPict, Frames[Frames[i].OriginalPos].KeyFrame };
+        FFMS_FrameInfo info = { Frames[i].PTS, Frames[i].RepeatPict, Frames[Frames[i].OriginalPos].KeyFrame, Frames[i].OriginalPTS };
         PublicFrameInfo.push_back(info);
     }
 }

diff --git a/src/core/track.h b/src/core/track.h
@@ -31,6 +31,7 @@ class ZipFile;
 
 struct FrameInfo {
     int64_t PTS;
+    int64_t OriginalPTS;
     int64_t FilePos;
     int64_t SampleStart;
     uint32_t SampleCount;
@@ -62,6 +63,7 @@ struct FFMS_Track {
     int MaxBFrames = 0;
     bool UseDTS = false;
     bool HasTS = false;
+    bool HasDiscontTS = false;
     int SampleRate = 0; // not persisted
 
     void AddVideoFrame(int64_t PTS, int RepeatPict, bool KeyFrame, int FrameType, int64_t FilePos = 0, bool Invisible = false);
@@ -105,7 +107,7 @@ struct FFMS_Track {
 
     FFMS_Track();
     FFMS_Track(ZipFile &Stream);
-    FFMS_Track(int64_t Num, int64_t Den, FFMS_TrackType TT, bool UseDTS, bool HasTS = true);
+    FFMS_Track(int64_t Num, int64_t Den, FFMS_TrackType TT, bool HasDiscontTS, bool UseDTS, bool HasTS = true);
 };
 
 #endif