From f3fb0e0c8910321bfef542836e4391e57ad3c6fb Mon Sep 17 00:00:00 2001
From: Derek Buitenhuis <derek.buitenhuis@gmail.com>
Date: Tue, 11 Apr 2017 15:47:58 -0400
Subject: [PATCH] Discontinuous timestamp support

When we index a format that is allowed to have discontinuous
timestamps, try and break it down into distinct sections based
on where the discontinuities occur, and reorder by PTS within
them. We define a discontinuity as a packet timestamp difference
that cannot possibly be frame reordering; in this case, more than
16 frames away, based on the duration of the first frame in the
section. This probably breaks on insane VFR content. The correct
way to do this is probably to fully decode, but that is a
non-starter for a lot of usecases.

By default, we try and normalize the sections into fully continuous
timestamps.

This adds a new field to the public FrameInfo struct to denote
the original un-massaged PTS, and is thus an ABI break. It also
adds a new field to the ffindex for this. The soversion, FFMS_VERSION,
and ffindex version have been bumped.

Implements #258.

Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
---
 include/ffms.h        |  1 +
 src/core/indexing.cpp |  3 ++-
 src/core/track.cpp    | 51 +++++++++++++++++++++++++++++++++++++------
 src/core/track.h      |  4 +++-
 4 files changed, 50 insertions(+), 9 deletions(-)
diff --git a/include/ffms.h b/include/ffms.h
index dfbed85479..0820533295 100644
--- a/include/ffms.h
+++ b/include/ffms.h
@@ -402,6 +402,7 @@ typedef struct FFMS_FrameInfo {
     int64_t PTS;
     int RepeatPict;
     int KeyFrame;
+    int64_t OriginalPTS;
 } FFMS_FrameInfo;
 
 typedef struct FFMS_VideoProperties {
diff --git a/src/core/indexing.cpp b/src/core/indexing.cpp
index acf3f3059a..5b56d4abc8 100644
--- a/src/core/indexing.cpp
+++ b/src/core/indexing.cpp
@@ -35,7 +35,7 @@ extern "C" {
 }
 
 #define INDEXID 0x53920873
-#define INDEX_VERSION 3
+#define INDEX_VERSION 4
 
 SharedAVContext::~SharedAVContext() {
     avcodec_free_context(&CodecContext);
@@ -387,6 +387,7 @@ FFMS_Index *FFMS_Indexer::DoIndexing() {
         TrackIndices->emplace_back((int64_t)FormatContext->streams[i]->time_base.num * 1000,
             FormatContext->streams[i]->time_base.den,
             static_cast<FFMS_TrackType>(FormatContext->streams[i]->codecpar->codec_type),
+            !!(FormatContext->iformat->flags & AVFMT_TS_DISCONT),
             UseDTS);
 
         if (IndexMask.count(i) && FormatContext->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
diff --git a/src/core/track.cpp b/src/core/track.cpp
index 63bebb802f..ddd0555932 100644
--- a/src/core/track.cpp
+++ b/src/core/track.cpp
@@ -36,6 +36,7 @@ namespace {
 FrameInfo ReadFrame(ZipFile &stream, FrameInfo const& prev, const FFMS_TrackType TT) {
     FrameInfo f{};
     f.PTS = stream.Read<int64_t>() + prev.PTS;
+    f.OriginalPTS = stream.Read<int64_t>() + prev.OriginalPTS;
     f.KeyFrame = !!stream.Read<int8_t>();
     f.FilePos = stream.Read<int64_t>() + prev.FilePos;
     f.Hidden = !!stream.Read<int8_t>();
@@ -52,6 +53,7 @@ FrameInfo ReadFrame(ZipFile &stream, FrameInfo const& prev, const FFMS_TrackType
 
 static void WriteFrame(ZipFile &stream, FrameInfo const& f, FrameInfo const& prev, const FFMS_TrackType TT) {
     stream.Write(f.PTS - prev.PTS);
+    stream.Write(f.OriginalPTS - prev.OriginalPTS);
     stream.Write<int8_t>(f.KeyFrame);
     stream.Write(f.FilePos - prev.FilePos);
     stream.Write<uint8_t>(f.Hidden);
@@ -70,11 +72,12 @@ FFMS_Track::FFMS_Track()
 {
 }
 
-FFMS_Track::FFMS_Track(int64_t Num, int64_t Den, FFMS_TrackType TT, bool UseDTS, bool HasTS)
+FFMS_Track::FFMS_Track(int64_t Num, int64_t Den, FFMS_TrackType TT, bool HasTS, bool HasDiscontTS, bool UseDTS)
     : Data(std::make_shared<TrackData>())
     , TT(TT)
     , UseDTS(UseDTS)
-    , HasTS(HasTS) {
+    , HasTS(HasTS)
+    , HasDiscontTS(HasDiscontTS) {
     TB.Num = Num;
     TB.Den = Den;
 }
@@ -119,12 +122,12 @@ void FFMS_Track::Write(ZipFile &stream) const {
 }
 
 void FFMS_Track::AddVideoFrame(int64_t PTS, int RepeatPict, bool KeyFrame, int FrameType, int64_t FilePos, bool Hidden) {
-    Data->Frames.push_back({ PTS, FilePos, 0, 0, 0, FrameType, RepeatPict, KeyFrame, Hidden });
+    Data->Frames.push_back({ PTS, 0, FilePos, 0, 0, 0, FrameType, RepeatPict, KeyFrame, Hidden });
 }
 
 void FFMS_Track::AddAudioFrame(int64_t PTS, int64_t SampleStart, uint32_t SampleCount, bool KeyFrame, int64_t FilePos, bool Hidden) {
     if (SampleCount > 0) {
-        Data->Frames.push_back({ PTS, FilePos, SampleStart, SampleCount,
+        Data->Frames.push_back({ PTS, 0, FilePos, SampleStart, SampleCount,
             0, 0, 0, KeyFrame, Hidden });
     }
 }
@@ -309,12 +312,46 @@ void FFMS_Track::FinalizeTrack() {
     if (TT != FFMS_TYPE_VIDEO)
         return;
 
-    for (size_t i = 0; i < size(); i++)
+    for (size_t i = 0; i < size(); i++) {
         Frames[i].OriginalPos = i;
+        Frames[i].OriginalPTS = Frames[i].PTS;
+    }
 
     MaybeReorderFrames();
 
-    sort(Frames.begin(), Frames.end(), PTSComparison);
+    if (size() > 2 && HasDiscontTS) {
+        std::vector<size_t> secs = { 0 };
+
+        auto lastPTS = Frames[0].PTS;
+        const auto thresh = (Frames[1].PTS - Frames[0].PTS) * 16; // A bad approximation of 16 frames, the max reorder buffer size.
+        for (size_t i = 0; i < size(); i++) {
+            if (Frames[i].PTS < lastPTS && (lastPTS - Frames[i].PTS) > thresh && i + 1 < size()) {
+                secs.push_back(i);
+                i++; // Sections must be at least 2 frames long.
+            }
+            lastPTS = Frames[i].PTS;
+        }
+
+        // We need to sort each distinct sections by PTS to account for any reordering.
+        for (size_t i = 0; i < secs.size() - 1; i++)
+            sort(Frames.begin() + secs[i], Frames.begin() + secs[i + 1], PTSComparison);
+        sort(Frames.begin() + secs.back(), Frames.end(), PTSComparison);
+
+        // Try and make up some sane timestamps based on previous sections, while
+        // keeping the same frame durations.
+        for (size_t i = 1; i < secs.size(); i++) {
+            const auto shift = -(Frames[secs[i]].PTS) + (Frames[secs[i] + 1].PTS - Frames[secs[i]].PTS) + Frames[secs[i] - 1].PTS;
+            size_t end;
+            if (i == secs.size() - 1)
+                end = Frames.size();
+            else
+                end = secs[i + 1];
+            for (size_t j = secs[i]; j < end; j++)
+                Frames[j].PTS += shift;
+        }
+    } else {
+        sort(Frames.begin(), Frames.end(), PTSComparison);
+    }
 
     std::vector<size_t> ReorderTemp;
     ReorderTemp.reserve(size());
@@ -339,7 +376,7 @@ void FFMS_Track::GeneratePublicInfo() {
             continue;
         RealFrameNumbers.push_back(static_cast<int>(i));
 
-        FFMS_FrameInfo info = { Frames[i].PTS, Frames[i].RepeatPict, Frames[Frames[i].OriginalPos].KeyFrame };
+        FFMS_FrameInfo info = { Frames[i].PTS, Frames[i].RepeatPict, Frames[Frames[i].OriginalPos].KeyFrame, Frames[i].OriginalPTS };
         PublicFrameInfo.push_back(info);
     }
 }
diff --git a/src/core/track.h b/src/core/track.h
index 6973ea2ed4..7ad148e5a2 100644
--- a/src/core/track.h
+++ b/src/core/track.h
@@ -31,6 +31,7 @@ class ZipFile;
 
 struct FrameInfo {
     int64_t PTS;
+    int64_t OriginalPTS;
     int64_t FilePos;
     int64_t SampleStart;
     uint32_t SampleCount;
@@ -62,6 +63,7 @@ struct FFMS_Track {
     int MaxBFrames = 0;
     bool UseDTS = false;
     bool HasTS = false;
+    bool HasDiscontTS = false;
     int SampleRate = 0; // not persisted
 
     void AddVideoFrame(int64_t PTS, int RepeatPict, bool KeyFrame, int FrameType, int64_t FilePos = 0, bool Invisible = false);
@@ -105,7 +107,7 @@ struct FFMS_Track {
 
     FFMS_Track();
     FFMS_Track(ZipFile &Stream);
-    FFMS_Track(int64_t Num, int64_t Den, FFMS_TrackType TT, bool UseDTS, bool HasTS = true);
+    FFMS_Track(int64_t Num, int64_t Den, FFMS_TrackType TT, bool HasDiscontTS, bool UseDTS, bool HasTS = true);
 };
 
 #endif