ffmpeg: use new decode API

This is a bit messy, mainly due to timestamp handling. decode_video() relied on the fact that it could set dts on a flush/drain packet. This is not possible with the old API, and won't be. (I think doing this was very questionable with the old API. Flush packets should not contain any information; they just cause a FIFO to be emptied.) This is replaced with checking the best_effort_timestamp for AV_NOPTS_VALUE, and using the suggested DTS in the drain case. The fate-cavs test still fails due to dropping the last frame. This happens because the timestamp of the last frame goes backwards (ffprobe -show_frames shows the same thing). I suspect that this "worked" due to the best effort timestamp logic picking the DTS over the decreasing PTS. Since this logic is in libavcodec (where it probably shouldn't be), this can't be easily fixed. The timestamps of the cavs samples are weird anyway, so I chose not to fix it. Another strange thing is the timestamp handling in the video path of process_input_packet (after the decode_video() call). It looks like the code to increase next_dts and next_pts should be run every time a frame is decoded - but it's needed even if output is skipped.
BtbN · Sep 18, 2016 · 5bc339b · 5bc339b
1 parent 8fab5c7
commit 5bc339b
Show file tree

Hide file tree

Showing 2 changed files with 75 additions and 52 deletions.
diff --git a/ffmpeg.c b/ffmpeg.c
@@ -1943,6 +1943,33 @@ static void check_decode_result(InputStream *ist, int *got_output, int ret)
     }
 }
 
+// This does not quite work like avcodec_decode_audio4/avcodec_decode_video2.
+// There is the following difference: if you got a frame, you must call
+// it again with pkt=NULL. pkt==NULL is treated differently from pkt.size==0
+// (pkt==NULL means get more output, pkt.size==0 is a flush/drain packet)
+static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacket *pkt)
+{
+    int ret;
+
+    *got_frame = 0;
+
+    if (pkt) {
+        ret = avcodec_send_packet(avctx, pkt);
+        // In particular, we don't expect AVERROR(EAGAIN), because we read all
+        // decoded frames with avcodec_receive_frame() until done.
+        if (ret < 0)
+            return ret == AVERROR_EOF ? 0 : ret;
+    }
+
+    ret = avcodec_receive_frame(avctx, frame);
+    if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
+        return ret;
+    if (ret >= 0)
+        *got_frame = 1;
+
+    return 0;
+}
+
 static int decode_audio(InputStream *ist, AVPacket *pkt, int *got_output)
 {
     AVFrame *decoded_frame, *f;
@@ -1957,7 +1984,7 @@ static int decode_audio(InputStream *ist, AVPacket *pkt, int *got_output)
     decoded_frame = ist->decoded_frame;
 
     update_benchmark(NULL);
-    ret = avcodec_decode_audio4(avctx, decoded_frame, got_output, pkt);
+    ret = decode(avctx, decoded_frame, got_output, pkt);
     update_benchmark("decode_audio %d.%d", ist->file_index, ist->st->index);
 
     if (ret >= 0 && avctx->sample_rate <= 0) {
@@ -2033,14 +2060,13 @@ static int decode_audio(InputStream *ist, AVPacket *pkt, int *got_output)
     } else if (decoded_frame->pkt_pts != AV_NOPTS_VALUE) {
         decoded_frame->pts = decoded_frame->pkt_pts;
         decoded_frame_tb   = ist->st->time_base;
-    } else if (pkt->pts != AV_NOPTS_VALUE) {
+    } else if (pkt && pkt->pts != AV_NOPTS_VALUE) {
         decoded_frame->pts = pkt->pts;
         decoded_frame_tb   = ist->st->time_base;
     }else {
         decoded_frame->pts = ist->dts;
         decoded_frame_tb   = AV_TIME_BASE_Q;
     }
-    pkt->pts           = AV_NOPTS_VALUE;
     if (decoded_frame->pts != AV_NOPTS_VALUE)
         decoded_frame->pts = av_rescale_delta(decoded_frame_tb, decoded_frame->pts,
                                               (AVRational){1, avctx->sample_rate}, decoded_frame->nb_samples, &ist->filter_in_rescale_delta_last,
@@ -2068,23 +2094,28 @@ static int decode_audio(InputStream *ist, AVPacket *pkt, int *got_output)
     return err < 0 ? err : ret;
 }
 
-static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output)
+static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output, int eof)
 {
     AVFrame *decoded_frame, *f;
     int i, ret = 0, err = 0, resample_changed;
     int64_t best_effort_timestamp;
+    int64_t dts;
     AVRational *frame_sample_aspect;
+    AVPacket avpkt;
 
     if (!ist->decoded_frame && !(ist->decoded_frame = av_frame_alloc()))
         return AVERROR(ENOMEM);
     if (!ist->filter_frame && !(ist->filter_frame = av_frame_alloc()))
         return AVERROR(ENOMEM);
     decoded_frame = ist->decoded_frame;
-    pkt->dts  = av_rescale_q(ist->dts, AV_TIME_BASE_Q, ist->st->time_base);
+    dts = av_rescale_q(ist->dts, AV_TIME_BASE_Q, ist->st->time_base);
+    if (pkt) {
+        avpkt = *pkt;
+        avpkt.dts = dts;
+    }
 
     update_benchmark(NULL);
-    ret = avcodec_decode_video2(ist->dec_ctx,
-                                decoded_frame, got_output, pkt);
+    ret = decode(ist->dec_ctx, decoded_frame, got_output, pkt ? &avpkt : NULL);
     update_benchmark("decode_video %d.%d", ist->file_index, ist->st->index);
 
     // The following line may be required in some cases where there is no parser
@@ -2134,6 +2165,8 @@ static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output)
     ist->hwaccel_retrieved_pix_fmt = decoded_frame->format;
 
     best_effort_timestamp= av_frame_get_best_effort_timestamp(decoded_frame);
+    if(eof && best_effort_timestamp == AV_NOPTS_VALUE)
+        best_effort_timestamp = dts;
     if(best_effort_timestamp != AV_NOPTS_VALUE) {
         int64_t ts = av_rescale_q(decoded_frame->pts = best_effort_timestamp, ist->st->time_base, AV_TIME_BASE_Q);
 
@@ -2152,8 +2185,6 @@ static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output)
                ist->st->time_base.num, ist->st->time_base.den);
     }
 
-    pkt->size = 0;
-
     if (ist->st->sample_aspect_ratio.num)
         decoded_frame->sample_aspect_ratio = ist->st->sample_aspect_ratio;
 
@@ -2192,12 +2223,12 @@ static int decode_video(InputStream *ist, AVPacket *pkt, int *got_output)
                 break;
         } else
             f = decoded_frame;
-        ret = av_buffersrc_add_frame_flags(ist->filters[i]->filter, f, AV_BUFFERSRC_FLAG_PUSH);
-        if (ret == AVERROR_EOF) {
-            ret = 0; /* ignore */
-        } else if (ret < 0) {
+        err = av_buffersrc_add_frame_flags(ist->filters[i]->filter, f, AV_BUFFERSRC_FLAG_PUSH);
+        if (err == AVERROR_EOF) {
+            err = 0; /* ignore */
+        } else if (err < 0) {
             av_log(NULL, AV_LOG_FATAL,
-                   "Failed to inject frame into filter network: %s\n", av_err2str(ret));
+                   "Failed to inject frame into filter network: %s\n", av_err2str(err));
             exit_program(1);
         }
     }
@@ -2282,7 +2313,8 @@ static int send_filter_eof(InputStream *ist)
 static int process_input_packet(InputStream *ist, const AVPacket *pkt, int no_eof)
 {
     int ret = 0, i;
-    int got_output = 0;
+    int repeating = 0;
+    int eof_reached = !pkt;
 
     AVPacket avpkt;
     if (!ist->saw_first_ts) {
@@ -2305,50 +2337,52 @@ static int process_input_packet(InputStream *ist, const AVPacket *pkt, int no_eo
         av_init_packet(&avpkt);
         avpkt.data = NULL;
         avpkt.size = 0;
-        goto handle_eof;
     } else {
         avpkt = *pkt;
     }
 
-    if (pkt->dts != AV_NOPTS_VALUE) {
+    if (pkt && pkt->dts != AV_NOPTS_VALUE) {
         ist->next_dts = ist->dts = av_rescale_q(pkt->dts, ist->st->time_base, AV_TIME_BASE_Q);
         if (ist->dec_ctx->codec_type != AVMEDIA_TYPE_VIDEO || !ist->decoding_needed)
             ist->next_pts = ist->pts = ist->dts;
     }
 
     // while we have more to decode or while the decoder did output something on EOF
-    while (ist->decoding_needed && (avpkt.size > 0 || (!pkt && got_output))) {
-        int duration;
-    handle_eof:
+    while (ist->decoding_needed && (!pkt || avpkt.size > 0)) {
+        int duration = 0;
+        int got_output = 0;
 
         ist->pts = ist->next_pts;
         ist->dts = ist->next_dts;
 
         switch (ist->dec_ctx->codec_type) {
         case AVMEDIA_TYPE_AUDIO:
-            ret = decode_audio    (ist, &avpkt, &got_output);
+            ret = decode_audio    (ist, repeating ? NULL : &avpkt, &got_output);
             break;
         case AVMEDIA_TYPE_VIDEO:
-            ret = decode_video    (ist, &avpkt, &got_output);
-            if (avpkt.duration) {
-                duration = av_rescale_q(avpkt.duration, ist->st->time_base, AV_TIME_BASE_Q);
-            } else if(ist->dec_ctx->framerate.num != 0 && ist->dec_ctx->framerate.den != 0) {
-                int ticks= av_stream_get_parser(ist->st) ? av_stream_get_parser(ist->st)->repeat_pict+1 : ist->dec_ctx->ticks_per_frame;
-                duration = ((int64_t)AV_TIME_BASE *
-                                ist->dec_ctx->framerate.den * ticks) /
-                                ist->dec_ctx->framerate.num / ist->dec_ctx->ticks_per_frame;
-            } else
-                duration = 0;
+            ret = decode_video    (ist, repeating ? NULL : &avpkt, &got_output, !pkt);
+            if (!repeating || !pkt || got_output) {
+                if (pkt && pkt->duration) {
+                    duration = av_rescale_q(pkt->duration, ist->st->time_base, AV_TIME_BASE_Q);
+                } else if(ist->dec_ctx->framerate.num != 0 && ist->dec_ctx->framerate.den != 0) {
+                    int ticks= av_stream_get_parser(ist->st) ? av_stream_get_parser(ist->st)->repeat_pict+1 : ist->dec_ctx->ticks_per_frame;
+                    duration = ((int64_t)AV_TIME_BASE *
+                                    ist->dec_ctx->framerate.den * ticks) /
+                                    ist->dec_ctx->framerate.num / ist->dec_ctx->ticks_per_frame;
+                }
 
-            if(ist->dts != AV_NOPTS_VALUE && duration) {
-                ist->next_dts += duration;
-            }else
-                ist->next_dts = AV_NOPTS_VALUE;
+                if(ist->dts != AV_NOPTS_VALUE && duration) {
+                    ist->next_dts += duration;
+                }else
+                    ist->next_dts = AV_NOPTS_VALUE;
+            }
 
             if (got_output)
                 ist->next_pts += duration; //FIXME the duration is not correct in some cases
             break;
         case AVMEDIA_TYPE_SUBTITLE:
+            if (repeating)
+                break;
             ret = transcode_subtitles(ist, &avpkt, &got_output);
             break;
         default:
@@ -2363,26 +2397,16 @@ static int process_input_packet(InputStream *ist, const AVPacket *pkt, int no_eo
             break;
         }
 
-        avpkt.dts=
-        avpkt.pts= AV_NOPTS_VALUE;
-
-        // touch data and size only if not EOF
-        if (pkt) {
-            if(ist->dec_ctx->codec_type != AVMEDIA_TYPE_AUDIO)
-                ret = avpkt.size;
-            avpkt.data += ret;
-            avpkt.size -= ret;
-        }
-        if (!got_output) {
-            continue;
-        }
-        if (got_output && !pkt)
+        if (!got_output)
             break;
+
+        eof_reached = 0;
+        repeating = 1;
     }
 
     /* after flushing, send an EOF on all the filter inputs attached to the stream */
     /* except when looping we need to flush but not to send an EOF */
-    if (!pkt && ist->decoding_needed && !got_output && !no_eof) {
+    if (!pkt && ist->decoding_needed && eof_reached && !no_eof) {
         int ret = send_filter_eof(ist);
         if (ret < 0) {
             av_log(NULL, AV_LOG_FATAL, "Error marking filters as finished\n");
@@ -2426,7 +2450,7 @@ static int process_input_packet(InputStream *ist, const AVPacket *pkt, int no_eo
         do_streamcopy(ist, ost, pkt);
     }
 
-    return got_output;
+    return !eof_reached;
 }
 
 static void print_sdp(void)

diff --git a/tests/ref/fate/cavs b/tests/ref/fate/cavs
@@ -173,4 +173,3 @@
 0,        167,        167,        1,   622080, 0xdcb4cee8
 0,        168,        168,        1,   622080, 0xb41172e5
 0,        169,        169,        1,   622080, 0x56c72478
-0,        170,        170,        1,   622080, 0x84ff3af9