forked from imkira/go-libav
/
transcoder.go
435 lines (386 loc) · 11.5 KB
/
transcoder.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
// In this example, we show a very simplistic way of transcoding the frames of
// the first video stream of an input file.
// Please note that transcoding is a difficult subject and full of corner
// cases. This sample is far from perfect, and it will easily break/crash
// depending on the specified input and output files.
//
// Tested with
//
// go run transcoder.go --input=https://bintray.com/imkira/go-libav/download_file?file_path=sample_iPod.m4v --output=output.mp4
// go run transcoder.go --input=https://bintray.com/imkira/go-libav/download_file?file_path=sample_iPod.m4v --output=output.avi
package main
import (
"flag"
"log"
"github.com/imkira/go-libav/avcodec"
"github.com/imkira/go-libav/avfilter"
"github.com/imkira/go-libav/avformat"
"github.com/imkira/go-libav/avutil"
)
var inputFileName, outputFileName string
func init() {
flag.StringVar(&inputFileName, "input", "", "source file to decode")
flag.StringVar(&outputFileName, "output", "", "target file to encode")
flag.Parse()
}
func main() {
if len(inputFileName) == 0 {
log.Fatalf("Missing --input=file\n")
}
if len(outputFileName) == 0 {
log.Fatalf("Missing --output=file\n")
}
avutil.SetLogLevel(avutil.LogLevelDebug)
// first, we set up a context for decoding
ctx, err := newContext()
if err != nil {
log.Fatalf("Failed to create context: %v\n", err)
}
defer ctx.free()
// open input file
openInput(ctx)
// open output file
openOutput(ctx)
writeHeader(ctx)
// enter transcode loop
reading, writing := true, true
for reading || writing {
reading = decodeStream(ctx)
writing = encodeStream(ctx)
}
writeTrailer(ctx)
}
func openInput(ctx *context) {
var err error
// open format (container) context
ctx.decFmt, err = avformat.NewContextForInput()
if err != nil {
log.Fatalf("Failed to open input context: %v\n", err)
}
// set some options for opening file
options := avutil.NewDictionary()
defer options.Free()
if err := options.Set("scan_all_pmts", "1"); err != nil {
log.Fatalf("Failed to set input options: %v\n", err)
}
// open file for decoding
if err := ctx.decFmt.OpenInput(inputFileName, nil, options); err != nil {
log.Fatalf("Failed to open input file: %v\n", err)
}
// initialize context with stream information
if err := ctx.decFmt.FindStreamInfo(nil); err != nil {
log.Fatalf("Failed to find stream info: %v\n", err)
}
// dump streams to standard output
ctx.decFmt.Dump(0, inputFileName, false)
// prepare first video stream for decoding
openFirstInputVideoStream(ctx)
}
func openFirstInputVideoStream(ctx *context) {
var err error
// find first video stream
if ctx.decStream = firstVideoStream(ctx.decFmt); ctx.decStream == nil {
log.Fatalf("Could not find a video stream. Aborting...\n")
}
codecCtx := ctx.decStream.CodecContext()
codec := avcodec.FindDecoderByID(codecCtx.CodecID())
if codec == nil {
log.Fatalf("Could not find decoder: %v\n", codecCtx.CodecID())
}
if ctx.decCodec, err = avcodec.NewContextWithCodec(codec); err != nil {
log.Fatalf("Failed to create codec context: %v\n", err)
}
if err := codecCtx.CopyTo(ctx.decCodec); err != nil {
log.Fatalf("Failed to copy codec context: %v\n", err)
}
if err := ctx.decCodec.SetInt64Option("refcounted_frames", 1); err != nil {
log.Fatalf("Failed to copy codec context: %v\n", err)
}
if err := ctx.decCodec.OpenWithCodec(codec, nil); err != nil {
log.Fatalf("Failed to open codec: %v\n", err)
}
// we need a video filter to push the decoded frames to
ctx.srcFilter = addFilter(ctx, "buffer", "in")
if err = ctx.srcFilter.SetImageSizeOption("video_size", ctx.decCodec.Width(), ctx.decCodec.Height()); err != nil {
log.Fatalf("Failed to set filter option: %v\n", err)
}
if err = ctx.srcFilter.SetPixelFormatOption("pix_fmt", ctx.decCodec.PixelFormat()); err != nil {
log.Fatalf("Failed to set filter option: %v\n", err)
}
if err = ctx.srcFilter.SetRationalOption("time_base", ctx.decCodec.TimeBase()); err != nil {
log.Fatalf("Failed to set filter option: %v\n", err)
}
if err = ctx.srcFilter.Init(); err != nil {
log.Fatalf("Failed to initialize buffer filter: %v\n", err)
}
}
func firstVideoStream(fmtCtx *avformat.Context) *avformat.Stream {
for _, stream := range fmtCtx.Streams() {
switch stream.CodecContext().CodecType() {
case avutil.MediaTypeVideo:
return stream
}
}
return nil
}
func openOutput(ctx *context) {
var err error
// guess format given output filename
fmt := avformat.GuessOutputFromFileName(outputFileName)
if fmt == nil {
log.Fatalf("Failed to guess output for output file: %s\n", outputFileName)
}
if ctx.encFmt, err = avformat.NewContextForOutput(fmt); err != nil {
log.Fatalf("Failed to open output context: %v\n", err)
}
ctx.encFmt.SetFileName(outputFileName)
// prepare first video stream for encoding
openOutputVideoStream(ctx, fmt)
if fmt.Flags()&avformat.FlagNoFile != 0 {
return
}
// prepare I/O
flags := avformat.IOFlagWrite
if ctx.encIO, err = avformat.OpenIOContext(outputFileName, flags, nil, nil); err != nil {
log.Fatalf("Failed to open I/O context: %v\n", err)
}
ctx.encFmt.SetIOContext(ctx.encIO)
}
func openOutputVideoStream(ctx *context, fmt *avformat.Output) {
var err error
ctx.encStream, err = ctx.encFmt.NewStreamWithCodec(nil)
if err != nil {
log.Fatalf("Failed to open output video stream: %v\n", err)
}
codecCtx := ctx.encStream.CodecContext()
codecCtx.SetCodecType(avutil.MediaTypeVideo)
codecID := fmt.GuessCodecID(outputFileName, codecCtx.CodecType())
codec := avcodec.FindEncoderByID(codecID)
if codec == nil {
log.Fatalf("Could not find encoder: %v\n", codecID)
}
if ctx.encCodec, err = avcodec.NewContextWithCodec(codec); err != nil {
log.Fatalf("Failed to create codec context: %v\n", err)
}
ctx.encCodec.SetCodecType(codecCtx.CodecType())
// we need a video filter to pull the encoded frames from
ctx.sinkFilter = addFilter(ctx, "buffersink", "out")
if err = ctx.sinkFilter.Init(); err != nil {
log.Fatalf("Failed to initialize buffersink filter: %v\n", err)
}
if err = ctx.srcFilter.Link(0, ctx.sinkFilter, 0); err != nil {
log.Fatalf("Failed to link filters: %v\n", err)
}
if err = ctx.filterGraph.Config(); err != nil {
log.Fatalf("Failed to config filter graph: %v\n", err)
}
sinkPads := ctx.sinkFilter.Inputs()
sinkPad := sinkPads[0]
ctx.encCodec.SetWidth(sinkPad.Width())
ctx.encCodec.SetHeight(sinkPad.Height())
ctx.encCodec.SetPixelFormat(sinkPad.PixelFormat())
ctx.encCodec.SetTimeBase(ctx.decCodec.TimeBase())
ctx.encCodec.SetStrictStdCompliance(avcodec.ComplianceNormal)
if fmt.Flags()&avformat.FlagGlobalHeader != 0 {
ctx.encCodec.SetFlags(ctx.encCodec.Flags() | avcodec.FlagGlobalHeader)
}
if err = ctx.encCodec.OpenWithCodec(codec, nil); err != nil {
log.Fatalf("Failed to open codec: %v\n", err)
}
if err := ctx.encCodec.CopyTo(ctx.encStream.CodecContext()); err != nil {
log.Fatalf("Failed to copy codec context: %v\n", err)
}
ctx.encStream.SetTimeBase(ctx.encCodec.TimeBase())
ctx.encStream.CodecContext().SetCodec(ctx.encCodec.Codec())
}
func decodeStream(ctx *context) bool {
// read packet from input file
reading, err := ctx.decFmt.ReadFrame(ctx.decPkt)
if err != nil {
log.Fatalf("Failed to read packet: %v\n", err)
}
if !reading {
return false
}
defer ctx.decPkt.Unref()
// is this not a packet for the the stream we are interested in?
if ctx.decPkt.StreamIndex() != ctx.decStream.Index() {
return true
}
ctx.decPkt.RescaleTime(ctx.decStream.TimeBase(), ctx.decCodec.TimeBase())
var decoded bool
for ctx.decPkt.Size() > 0 {
if !decodeFrame(ctx) {
break
}
decoded = true
}
return decoded
}
func decodeFrame(ctx *context) bool {
ok, size, err := ctx.decCodec.DecodeVideo(ctx.decPkt, ctx.decFrame)
if err != nil {
log.Fatalf("Failed to decode packet: %v\n", err)
}
ctx.decFrame.SetPTS(ctx.decFrame.BestEffortTimestamp())
if size > 0 {
defer ctx.decPkt.ConsumeData(size)
}
if !ok {
return (size > 0)
}
defer ctx.decFrame.Unref()
pushFrame(ctx)
return true
}
func pushFrame(ctx *context) {
flags := avfilter.BufferSrcFlagKeepRef
if err := ctx.srcFilter.AddFrameWithFlags(ctx.decFrame, flags); err != nil {
log.Fatalf("Failed to add frame to filter graph: %v\n", err)
}
if err := ctx.filterGraph.RequestOldest(); err != nil {
if err.(*avutil.Error).Code() != avutil.ErrorCodeEOF {
log.Fatalf("Failed to request frame from filter graph: %v\n", err)
}
}
}
func encodeStream(ctx *context) bool {
if ok := pullFrame(ctx); !ok {
return false
}
defer ctx.encFrame.Unref()
defer ctx.encPkt.Unref()
if ok := encodeFrame(ctx); !ok {
return false
}
ctx.encPkt.SetPosition(-1)
ctx.encPkt.SetStreamIndex(ctx.encStream.Index())
ctx.encPkt.RescaleTime(ctx.encCodec.TimeBase(), ctx.encStream.TimeBase())
if err := ctx.encFmt.InterleavedWriteFrame(ctx.encPkt); err != nil {
log.Fatalf("Failed to write packet: %v\n", err)
}
return true
}
func pullFrame(ctx *context) bool {
ok, err := ctx.sinkFilter.GetFrame(ctx.encFrame)
if err != nil {
log.Fatalf("Failed to get frame from filter graph: %v\n", err)
}
if ok {
ctx.encFrame.SetPictureType(avutil.PictureTypeNone)
}
return ok
}
func encodeFrame(ctx *context) bool {
ok, err := ctx.encCodec.EncodeVideo(ctx.encPkt, ctx.encFrame)
if err != nil {
log.Fatalf("Failed to encode frame: %v\n", err)
}
return ok
}
func addFilter(ctx *context, name, id string) *avfilter.Context {
filter := avfilter.FindFilterByName(name)
if filter == nil {
log.Fatalf("Could not find %s/%s filter\n", name, id)
}
fctx, err := ctx.filterGraph.AddFilter(filter, id)
if err != nil {
log.Fatalf("Failed to add %s/%s filter: %v\n", name, id, err)
}
return fctx
}
func writeHeader(ctx *context) {
if err := ctx.encFmt.WriteHeader(nil); err != nil {
log.Fatalf("Failed to write header: %v\n", err)
}
}
func writeTrailer(ctx *context) {
if err := ctx.encFmt.WriteTrailer(); err != nil {
log.Fatalf("Failed to write trailer: %v\n", err)
}
}
type context struct {
// decoding
decFmt *avformat.Context
decStream *avformat.Stream
decCodec *avcodec.Context
decPkt *avcodec.Packet
decFrame *avutil.Frame
srcFilter *avfilter.Context
// encoding
encFmt *avformat.Context
encStream *avformat.Stream
encCodec *avcodec.Context
encIO *avformat.IOContext
encPkt *avcodec.Packet
encFrame *avutil.Frame
sinkFilter *avfilter.Context
// transcoding
filterGraph *avfilter.Graph
}
func newContext() (*context, error) {
ctx := &context{}
if err := ctx.alloc(); err != nil {
ctx.free()
return nil, err
}
return ctx, nil
}
func (ctx *context) alloc() error {
var err error
if ctx.decPkt, err = avcodec.NewPacket(); err != nil {
return err
}
if ctx.decFrame, err = avutil.NewFrame(); err != nil {
return err
}
if ctx.encPkt, err = avcodec.NewPacket(); err != nil {
return err
}
if ctx.encFrame, err = avutil.NewFrame(); err != nil {
return err
}
if ctx.filterGraph, err = avfilter.NewGraph(); err != nil {
return err
}
return nil
}
func (ctx *context) free() {
if ctx.encIO != nil {
ctx.encIO.Close()
ctx.encIO = nil
}
if ctx.encFmt != nil {
ctx.encFmt.Free()
ctx.encFmt = nil
}
if ctx.filterGraph != nil {
ctx.filterGraph.Free()
ctx.filterGraph = nil
}
if ctx.encPkt != nil {
ctx.encPkt.Free()
ctx.encPkt = nil
}
if ctx.encFrame != nil {
ctx.encFrame.Free()
ctx.encFrame = nil
}
if ctx.decPkt != nil {
ctx.decPkt.Free()
ctx.decPkt = nil
}
if ctx.decFrame != nil {
ctx.decFrame.Free()
ctx.decFrame = nil
}
if ctx.decCodec != nil {
ctx.decCodec.Free()
ctx.decCodec = nil
}
if ctx.decFmt != nil {
ctx.decFmt.CloseInput()
ctx.decFmt.Free()
ctx.decFmt = nil
}
}