-
Notifications
You must be signed in to change notification settings - Fork 9
/
mp4.go
2421 lines (2040 loc) · 107 KB
/
mp4.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
package codec
import (
"github.com/Monibuca/utils/v3"
)
type MP4 interface {
}
type MP4Box interface {
Header() *MP4Header
Body() *MP4Body
}
//
// ISO_IEC_14496-12_2012.pdf Page/17
//
// The standard boxes all use compact types (32-bit) and most boxes will use the compact (32-bit) size
// standard header
type MP4BoxHeader struct {
BoxSize uint32 // 32 bits, is an integer that specifies the number of bytes in this box, including all its fields and contained boxes; if size is 1 then the actual size is in the field largesize; if size is 0, then this box is the last one in the file, and its contents extend to the end of the file (normally only used for a Media Data Box)
BoxType uint32 // 32 bits, identifies the box type; standard boxes use a compact type, which is normally four printable characters, to permit ease of identification, and is shown so in the boxes below. User extensions use an extended type; in this case, the type field is set to ‘uuid’.
}
//
// ISO_IEC_14496-12_2012.pdf Page/17
//
// Many objects also contain a version number and flags field
// full box header
type MP4FullBoxHeader struct {
Version uint8 // 8 bits, is an integer that specifies the version of this format of the box.
Flags [3]byte // 24 bits, is a map of flags
}
//
// ISO_IEC_14496-12_2012.pdf Page/17
//
// Typically only the Media Data Box(es) need the 64-bit size.
// lagesize box header
type MP4BoxLargeHeader struct {
LargeSize uint64 // 64 bits
UUIDs [16]uint8 // 128 bits
}
// if(size == 1)
// {
// unsigned int(64) largesize;
// }
// else if(size == 0)
// {
// // box extends to end of file
// }
// if(boxtype == ‘uuid’)
// {
// unsigned int(8)[16] usertype = extended_type;
// }
type MP4Header struct {
MP4BoxHeader
}
type MP4Body struct{}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/18
//
// Box Type: ftyp
// Container: File
// Mandatory: Yes
// Quantity: Exactly one (but see below)
//
// Each brand is a printable four-character code, registered with ISO, that identifies a precise specification
type FileTypeBox struct {
MP4BoxHeader // standard header
MajorBrand uint32 // 32 bits, is a brand identifier
MinorVersion uint32 // 32 bits, is an informative integer for the minor version of the major brand
CompatibleBrands []uint32 // 32 bits array, is a list, to the end of the box, of brands
}
func NewFileTypeBox() (box *FileTypeBox) {
box = new(FileTypeBox)
box.MP4BoxHeader.BoxType, _ = utils.ByteToUint32([]byte("ftyp"), true)
return
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/30
//
// Box Types: pdin
// Container: File
// Mandatory: No
// Quantity: Zero or One
type ProgressiveDownloadInformationBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
Rate uint32 // 32 bits, is a download rate expressed in bytes/second
InitialDelay uint32 // 32 bits, is the suggested delay to use when playing the file, such that if download continues at the given rate, all data within the file will arrive in time for its use and playback should not need to stall.
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/30
//
// Box Type: moov
// Container: File
// Mandatory: Yes
// Quantity: Exactly one
//
// The metadata for a presentation is stored in the single Movie Box which occurs at the top-level of a file.
// Normally this box is close to the beginning or end of the file, though this is not required
type MovieBox struct {
MP4BoxHeader // standard header
//Mhb MovieHeaderBox // the first child box(header box)
}
func NewMovieBox() (box *MovieBox) {
box = new(MovieBox)
box.MP4BoxHeader.BoxType, _ = utils.ByteToUint32([]byte("moov"), true)
return
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/31
//
// Box Type: mvhd
// Container: Movie Box ('moov')
// Mandatory: Yes
// Quantity: Exactly one
//
// This box defines overall information which is media-independent, and relevant to the entire presentation
// considered as a whole
type MovieHeaderBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
CreationTime interface{} // uint64 or uint32, is an integer that declares the creation time of the presentation (in seconds since midnight, Jan. 1, 1904, in UTC time)
ModificationTime interface{} // uint64 or uint32, is an integer that declares the most recent time the presentation was modified (in seconds since midnight, Jan. 1, 1904, in UTC time)
TimeScale uint32 // 32 bits, is an integer that specifies the time-scale for the entire presentation; this is the number of time units that pass in one second. For example, a time coordinate system that measures time in sixtieths of a second has a time scale of 60.
Duration interface{} // uint64 or uint32, is an integer that declares length of the presentation (in the indicated timescale). This property is derived from the presentation's tracks: the value of this field corresponds to the duration of the longest track in the presentation. If the duration cannot be determined then duration is set to all 1s.
Rate int32 // 32 bits, is a fixed point 16.16 number that indicates the preferred rate to play the presentation; 1.0 (0x00010000) is normal forward playback
Volume int16 // 16 bits, is a fixed point 8.8 number that indicates the preferred playback volume. 1.0 (0x0100) is full volume.
Reserved1 int16 // 16 bits, bit[16]
Reserved2 [2]uint32 // 32 bits array, const unsigned int(32)[2]
Matrix [9]int32 // 32 bits array, provides a transformation matrix for the video; (u,v,w) are restricted here to (0,0,1), hex values(0,0,0x40000000).
PreDefined [6]int32 // 32 bits array, bit(32)[6]
NextTrackID uint32 // 32 bits, is a non-zero integer that indicates a value to use for the track ID of the next track to be added to this presentation. Zero is not a valid track ID value. The value of next_track_ID shall be larger than the largest track-ID in use. If this value is equal to all 1s (32-bit maxint), and a new media track is to be added, then a search must be made in the file for an unused track identifier.
}
// CreationTime : 创建时间(相对于UTC时间1904-01-01零点的秒数)
// ModificationTime : 修改时间
// TimeScale : 文件媒体在1秒时间内的刻度值,可以理解为1秒长度的时间单元数
// Duration : 该track的时间长度,用duration和time scale值可以计算track时长,比如audio track的time scale = 8000, duration = 560128,时长为70.016,video track的time scale = 600, duration = 42000,时长为70
// Rate : 推荐播放速率,高16位和低16位分别为小数点整数部分和小数部分,即[16.16] 格式,该值为1.0(0x00010000)表示正常前向播放
// Volume : 与rate类似,[8.8] 格式,1.0(0x0100)表示最大音量
// Matrix : 视频变换矩阵 { 0x00010000,0,0,0,0x00010000,0,0,0,0x40000000 }
// NextTrackID : 下一个track使用的id号
// PreDefined:
// Preview Time : 开始预览此movie的时间
// Preview Duration : 以movie的time scale为单位,预览的duration
// Poster Time : The time value of the time of the movie poster.
// Selection Time : The time value for the start time of the current selection.
// Selection Duration : The duration of the current selection in movie time scale units.
// Current Time : 当前时间
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/32
//
// Box Type: trak
// Container: Movie Box ('moov')
// Mandatory: Yes
// Quantity: One or more
type TrackBox struct {
MP4BoxHeader // standard header
Thb TrackHeaderBox // the first child box(header box)
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/32
//
// Box Type: tkhd
// Container: Track Box ('trak')
// Mandatory: Yes
// Quantity: Exactly one
type TrackHeaderBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
CreationTime interface{} // uint64 or uint32,
ModificationTime interface{} // uint64 or uint32,
TrackID uint32 // 32 bits, is an integer that uniquely identifies this track over the entire life-time of this presentation. Track IDs are never re-used and cannot be zero
Reserved1 uint32 // 32 bits,
Duration interface{} // uint64 or uint32,
Reserved2 [2]uint32 // 32 bits array,
Layer int16 // 16 bits, specifies the front-to-back ordering of video tracks; tracks with lower numbers are closer to the viewer. 0 is the normal value, and -1 would be in front of track 0, and so on
AlternateGroup int16 // 16 bits,
Volume int16 // 16 bits, if track_is_audio 0x0100 else 0
Reserved3 uint16 // 16 bits,
Matrix [9]int32 // 32 bits array, provides a transformation matrix for the video; (u,v,w) are restricted here to (0,0,1), hex (0,0,0x40000000). { 0x00010000,0,0,0,0x00010000,0,0,0,0x40000000 }
Width uint32 // 32 bits,
Height uint32 // 32 bits,
}
// CreationTime : 创建时间
// ModificationTime : 修改时间
// TrackID : id号,不能重复且不能为0
// Reserved1 : 保留位
// Duration : track的时间长度
// Reserved2 : 保留位
// Layer : 视频层,默认为0,值小的在上层
// AlternateGroup : track分组信息,默认为0表示该track未与其他track有群组关系
// Volume : [8.8] 格式,如果为音频track,1.0(0x0100)表示最大音量;否则为0
// Reserved3 : 保留位
// Matrix : 视频变换矩阵 { 0x00010000,0,0,0,0x00010000,0,0,0,0x40000000 }
// Width : 宽
// Height : 高,均为 [16.16] 格式值,与sample描述中的实际画面大小比值,用于播放时的展示宽高
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/34
//
// Box Type: tref
// Container: Track Box (‘trak’)
// Mandatory: No
// Quantity: Zero or one
type TrackReferenceBox struct {
MP4BoxHeader // standard header
}
type TrackReferenceTypeBox struct {
MP4BoxHeader // standard header
TrackIDs []uint32 // 32 bits, is an integer that provides a reference from the containing track to another track in the presentation. track_IDs are never re-used and cannot be equal to zero
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/35
//
// Box Type: trgr
// Container: Track Box (‘trak’)
// Mandatory: No
// Quantity: Zero or one
type TrackGroupBox struct {
MP4BoxHeader // standard header
}
type TrackGroupTypeBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
TrackGroupID uint32 // 32 bits, indicates the grouping type and shall be set to one of the following values, or a value registered, or a value from a derived specification or registration
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/54
//
// Box Type: edts
// Container: Track Box (‘trak’)
// Mandatory: No
// Quantity: Zero or one
type EditBox struct {
MP4BoxHeader // standard header
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/54
//
// Box Type: elst
// Container: Edit Box (‘edts’)
// Mandatory: No
// Quantity: Zero or one
type EditListBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table
Tables []EditListTable // Edit List Table
}
type EditListTable struct {
SegmentDuration interface{} // uint64 or uint32, is an integer that specifies the duration of this edit segment in units of the timescale in the Movie Header Box
MediaTime interface{} // uint64 or uint32, is an integer containing the starting time within the media of this edit segment (in media time scale units, in composition time). If this field is set to –1, it is an empty edit. The last edit in a track shall never be an empty edit. Any difference between the duration in the Movie Header Box, and the track’s duration is expressed as an implicit empty edit at the end.
MediaRateInteger int16 // 16 bits,
MediaRateFraction int16 // 16 bits,
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/36
//
// Box Type: mdia
// Container: Track Box ('trak')
// Mandatory: Yes
// Quantity: Exactly one
//
// The media declaration container contains all the objects that declare information about the media data within a track.
type MediaBox struct {
MP4BoxHeader // standard header
Mhb MediaHeaderBox // the first child box(header box)
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/36
//
// Box Type: mdhd
// Container: Media Box ('mdia')
// Mandatory: Yes
// Quantity: Exactly one
//
// The media header declares overall information that is media-independent, and relevant to characteristics of the media in a track.
type MediaHeaderBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
CreationTime interface{} // int64 or int32, is an integer that declares the creation time of the presentation (in seconds since midnight, Jan. 1, 1904, in UTC time)
ModificationTime interface{} // int64 or int32, is an integer that declares the most recent time the presentation was modified (in seconds since midnight, Jan. 1, 1904, in UTC time)
TimeScale uint32 // 32 bits, is an integer that specifies the time-scale for the entire presentation; this is the number of time units that pass in one second. For example, a time coordinate system that measures time in sixtieths of a second has a time scale of 60.
Duration interface{} // int64 or int32, is an integer that declares length of the presentation (in the indicated timescale). This property is derived from the presentation's tracks: the value of this field corresponds to the duration of the longest track in the presentation. If the duration cannot be determined then duration is set to all 1s.
Pad byte // 1 bit,
Language [2]byte // 15 bits, unsigned int(5)[3], declares the language code for this media. See ISO 639-2/T for the set of three charactercodes. Each character is packed as the difference between its ASCII value and 0x60. Since the code is confined to being three lower-case letters, these values are strictly positive
PreDefined uint16 // 16 bits,
}
// Language : 媒体的语言码
// PreDefined : 媒体的回放质量???怎样生成此质量,什么是参照点
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/37
//
// Box Type: hdlr
// Container: Media Box ('mdia') or Meta Box ('meta')
// Mandatory: Yes
// Quantity: Exactly one
type HandlerBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
PreDefined uint32 // 32 bits,
HandlerType uint32 // 32 bits, when present in a meta box, contains an appropriate value to indicate the format of the meta box contents. The value 'null' can be used in the primary meta box to indicate that it is merely being used to hold resources
Reserved [3]uint32 // 32 bits,
Name string // string, is a null-terminated string in UTF-8 characters which gives a human-readable name for the track type (for debugging and inspection purposes).
}
// handler_type when present in a media box, is an integer containing one of the following values, or a value from a derived specification:
// 'vide' Video track
// 'soun' Audio track
// 'hint' Hint track
// 'meta' Timed Metadata track
// 'auxv' Auxiliary Video track
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/38
//
// Box Type: minf
// Container: Media Box ('mdia')
// Mandatory: Yes
// Quantity: Exactly one
//
// This box contains all the objects that declare characteristic information of the media in the track.
type MediaInformationBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/38
//
// Box Types: vmhd, smhd, hmhd, nmhd
// Container: Media Information Box (‘minf’)
// Mandatory: Yes
// Quantity: Exactly one specific media header shall be present
//
// There is a different media information header for each track type (corresponding to the media handler-type);
// the matching header shall be present, which may be one of those defined here, or one defined in a derived specification
type MediaInformationHeaderBoxes struct {
// VideoMediaHeaderBox
//
}
// Box Types: vmhd
// The video media header contains general presentation information, independent of the coding, for video media.
// Note that the flags field has the value 1.
type VideoMediaHeaderBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
GraphicsMode uint16 // 16 bits, specifies a composition mode for this video track, from the following enumerated set, which may be extended by derived specifications: copy = 0 copy over the existing image
Opcolor [3]uint16 // 16 bits array, is a set of 3 colour values (red, green, blue) available for use by graphics modes
}
// Box Types: smhd
// The sound media header contains general presentation information, independent of the coding, for audio media.
// This header is used for all tracks containing audio.
type SoundMediaHeaderBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
Balance int16 // 16 bits, is a fixed-point 8.8 number that places mono audio tracks in a stereo space; 0 is centre (the normal value); full left is -1.0 and full right is 1.0
Reserved uint16 // 16 bits,
}
// Box Types: hmhd
// The hint media header contains general information, independent of the protocol, for hint tracks.
// (A PDU is a Protocol Data Unit.)
type HintMediaHeaderBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
MaxPDUSize uint16 // 16 bits, gives the size in bytes of the largest PDU in this (hint) stream
AvgPDUSize uint16 // 16 bits, gives the average size of a PDU over the entire presentation
MaxBitrate uint32 // 32 bits, gives the maximum rate in bits/second over any window of one second
AvgBitrate uint32 // 32 bits, gives the average rate in bits/second over the entire presentation
Reserved uint32 // 32 bits,
}
// Box Types: nmhd
// Streams other than visual and audio (e.g., timed metadata streams) may use a null Media Header Box, as defined here.
type NullMediaHeaderBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/56
//
// Box Type: dinf
// Container: Media Information Box ('minf') or Meta Box ('meta')
// Mandatory: Yes (required within 'minf' box) and No (optional within 'meta' box)
// Quantity: Exactly one
//
// The data information box contains objects that declare the location of the media information in a track
type DataInformationBox struct {
MP4BoxHeader // standard header
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/56
//
// Box Types: url, urn, dref
// Container: Data Information Box ('dinf')
// Mandatory: Yes
// Quantity: Exactly one
type DataReferenceBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table
DataEntry interface{} // DataEntryUrlBox or DataEntryUrnBox.
}
// aligned(8) class DataReferenceBox
// extends FullBox('dref', version = 0, 0) {
// unsigned int(32) entry_count;
// for (i=1; i <= entry_count; i++) {
// DataEntryBox(entry_version, entry_flags) data_entry;
// }
// }
type DataEntryUrlBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
Location string // string,
}
type DataEntryUrnBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
Name string // string,
Location string // string,
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/40
//
// Box Type: stbl
// Container: Media Information Box ('minf')
// Mandatory: Yes
// Quantity: Exactly one
type SampleTableBox struct {
MP4BoxHeader // standard header
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/40
//
// Box Types: stsd
// Container: Sample Table Box ('stbl')
// Mandatory: Yes
// Quantity: Exactly one
type SampleDescriptionBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table
}
// for (i = 1 ; i <= entry_count ; i++) {
// switch (handler_type){
// case ‘soun’: // for audio tracks
// AudioSampleEntry();
// break;
// case ‘vide’: // for video tracks
// VisualSampleEntry();
// break;
// case ‘hint’: // Hint track
// HintSampleEntry();
// break;
// case ‘meta’: // Metadata track
// MetadataSampleEntry();
// break;
// }
// }
// box header和version字段后会有一个entry count字段,根据entry的个数,每个entry会有type信息,如“vide”、“sund”等,
// 根据type不同sample description会提供不同的信息,例如对于video track,会有“VisualSampleEntry”类型信息,
// 对于audio track会有“AudioSampleEntry”类型信息.
// 视频的编码类型、宽高、长度,音频的声道、采样等信息都会出现在这个box中
// is the appropriate sample entry
type SampleEntry struct {
Reserved [6]uint8 // 48 bits,
DataReferenceIndex uint16 // 16 bits, is an integer that contains the index of the data reference to use to retrieve data associated with samples that use this sample description. Data references are stored in Data Reference Boxes. The index ranges from 1 to the number of data references.
}
type HintSampleEntry struct {
Data []uint8 // 8 bits array,
}
// Box Types: btrt
type BitRateBox struct {
MP4BoxHeader // standard header
BufferSizeDB uint32 // 32 bits, gives the size of the decoding buffer for the elementary stream in bytes.
MaxBitrate uint32 // 32 bits, gives the maximum rate in bits/second over any window of one second.
AvgBitrate uint32 // 32 bits, gives the average rate in bits/second over the entire presentation.
}
type MetaDataSampleEntry struct{}
type XMLMetaDataSampleEntry struct {
ContentEncoding string // optional, is a null-terminated string in UTF-8 characters, and provides a MIME type which identifies the content encoding of the timed metadata
NameSpace string // string, gives the namespace of the schema for the timed XML metadata
SchemaLocation string // optional, optionally provides an URL to find the schema corresponding to the namespace. This is needed for decoding of the timed metadata by XML aware encoding mechanisms such as BiM.
Brb BitRateBox // optional
}
type TextMetaDataSampleEntry struct {
ContentEncoding string // optional, is a null-terminated string in UTF-8 characters, and provides a MIME type which identifies the content encoding of the timed metadata
MimeFormat string // string, provides a MIME type which identifies the content format of the timed metadata. Examples for this field are ‘text/html’ and ‘text/plain’.
Brb BitRateBox // optional
}
type URIBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
TheURI string // string, is a URI formatted according to the rules in 6.2.4
}
type URIInitBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
UriInitializationData []uint8 // 8 bits array, is opaque data whose form is defined in the documentation of the URI form.
}
type URIMetaSampleEntry struct {
TheLabel URIBox
Init URIInitBox // optional
//Mpeg4 MPEG4BitRateBox // optional
}
// Box Types: pasp
type PixelAspectRatioBox struct {
MP4BoxHeader // standard header
HSpacing uint32 // 32 bits, define the relative width and height of a pixel;
VSpacing uint32 // 32 bits, define the relative width and height of a pixel;
}
// Box Types: clap
// Visual Sequences
type CleanApertureBox struct {
MP4BoxHeader // standard header
CleanApertureWidthN uint32 // 32 bits, a fractional number which defines the exact clean aperture width, in counted pixels, of the video image
CleanApertureWidthD uint32 // 32 bits, a fractional number which defines the exact clean aperture width, in counted pixels, of the video image
CleanApertureHeightN uint32 // 32 bits, a fractional number which defines the exact clean aperture height, in counted pixels, of the video image
CleanApertureHeightD uint32 // 32 bits, a fractional number which defines the exact clean aperture height, in counted pixels, of the video image
HorizOffN uint32 // 32 bits, a fractional number which defines the horizontal offset of clean aperture centre minus (width-1)/2. Typically 0
HorizOffD uint32 // 32 bits, a fractional number which defines the horizontal offset of clean aperture centre minus (width-1)/2. Typically 0
VertOffN uint32 // 32 bits, a fractional number which defines the vertical offset of clean aperture centre minus (height-1)/2. Typically 0
VertOffD uint32 // 32 bits, a fractional number which defines the vertical offset of clean aperture centre minus (height-1)/2. Typically 0
}
// Box Types: colr
type ColourInformationBox struct {
MP4BoxHeader // standard header
ColourType uint32 // 32 bits, an indication of the type of colour information supplied. For colour_type ‘nclx’: these fields are exactly the four bytes defined for PTM_COLOR_INFO( ) in A.7.2 of ISO/IEC 29199-2 but note that the full range flag is here in a different bit position
}
// if (colour_type == ‘nclx’) /* on-screen colours */
// {
// unsigned int(16) colour_primaries;
// unsigned int(16) transfer_characteristics;
// unsigned int(16) matrix_coefficients;
// unsigned int(1) full_range_flag;
// unsigned int(7) reserved = 0;
// }
// else if (colour_type == ‘rICC’)
// {
// ICC_profile; // restricted ICC profile
// }
// else if (colour_type == ‘prof’)
// {
// ICC_profile; // unrestricted ICC profile
// }
// ICC_profile : an ICC profile as defined in ISO 15076-1 or ICC.1:2010 is supplied.
type VisualSampleEntry struct {
PreDefined1 uint16 // 16 bits,
Reserved1 uint16 // 16 bits,
PreDefined2 [3]uint32 // 96 bits,
Width uint16 // 16 bits, are the maximum visual width and height of the stream described by this sample description, in pixels
Height uint16 // 16 bits, are the maximum visual width and height of the stream described by this sample description, in pixels
HorizreSolution uint32 // 32 bits, fields give the resolution of the image in pixels-per-inch, as a fixed 16.16 number
VertreSolution uint32 // 32 bits, fields give the resolution of the image in pixels-per-inch, as a fixed 16.16 number
Reserved3 uint32 // 32 bits,
FrameCount uint16 // 16 bits, indicates how many frames of compressed video are stored in each sample. The default is 1, for one frame per sample; it may be more than 1 for multiple frames per sample
CompressorName [32]string // 32 string, is a name, for informative purposes. It is formatted in a fixed 32-byte field, with the first byte set to the number of bytes to be displayed, followed by that number of bytes of displayable data, and then padding to complete 32 bytes total (including the size byte). The field may be set to 0.
Depth uint16 // 16 bits, takes one of the following values 0x0018 – images are in colour with no alpha
PreDefined3 int16 // 16 bits,
Cab CleanApertureBox // optional, other boxes from derived specifications
Parb PixelAspectRatioBox // optional, other boxes from derived specifications
}
// Audio Sequences
type AudioSampleEntry struct {
Reserved1 [2]uint32 // 32 bits array,
ChannelCount uint16 // 16 bits, is the number of channels such as 1 (mono) or 2 (stereo)
SampleSize uint16 // 16 bits, is in bits, and takes the default value of 16
PreDefined uint16 // 16 bits,
Reserved2 uint16 // 16 bits,
SampleRate uint32 // 32 bits, is the sampling rate expressed as a 16.16 fixed-point number (hi.lo)
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/48
//
// Box Type: stts
// Container: Sample Table Box ('stbl')
// Mandatory: Yes
// Quantity: Exactly one
type TimeToSampleBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table
Table []TimeToSampleTable // Time To Sample Table , EntryCount elements
}
type TimeToSampleTable struct {
SampleCount []uint32 // 32 bits, is an integer that counts the number of consecutive samples that have the given duration
SampleDelta []uint32 // 32 bits, is an integer that gives the delta of these samples in the time-scale of the media.
}
// “stts”存储了sample的duration,描述了sample时序的映射方法,我们通过它可以找到任何时间的sample.
// “stts”可以包含一个压缩的表来映射时间和sample序号,用其他的表来提供每个sample的长度和指针.
// 表中每个条目提供了在同一个时间偏移量里面连续的sample序号,以及samples的偏移量.
// 递增这些偏移量,就可以建立一个完整的time to sample表
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/49
//
// Box Type: ctts
// Container: Sample Table Box (‘stbl’)
// Mandatory: No
// Quantity: Zero or one
type CompositionOffsetBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table
Table []CompositionOffsetTable // Composition Offset Table, EntryCount elements.
}
type CompositionOffsetTable struct {
SampleCount uint32 // 32 bits, is an integer that counts the number of consecutive samples that have the given offset.
SampleOffset interface{} // int32 or uint32, is an integer that gives the offset between CT and DT, such that CT(n) = DT(n) + CTTS(n).
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/50
//
// Box Type: cslg
// Container: Sample Table Box (‘stbl’)
// Mandatory: No
// Quantity: Zero or one
type CompositionToDecodeBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
CompositionToDTSShift int32 // 32 bits, signed, if this value is added to the composition times (as calculated by the CTS offsets from the DTS), then for all samples, their CTS is guaranteed to be greater than or equal to their DTS, and the buffer model implied by the indicated profile/level will be honoured; if leastDecodeToDisplayDelta is positive or zero, this field can be 0; otherwise it should be at least (- leastDecodeToDisplayDelta)
LeastDecodeToDisplayDelta int32 // 32 bits, signed, the smallest composition offset in the CompositionTimeToSample box in this track
GreatestDecodeToDisplayDelta int32 // 32 bits, signed, the largest composition offset in the CompositionTimeToSample box in this track
CompositionStartTime int32 // 32 bits, signed, the smallest computed composition time (CTS) for any sample in the media of this track
CompositionEndTime int32 // 32 bits, signed, the composition time plus the composition duration, of the sample with the largest computed composition time (CTS) in the media of this track; if this field takes the value 0, the composition end time is unknown.
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/58
//
// Box Type: stsc
// Container: Sample Table Box ('stbl')
// Mandatory: Yes
// Quantity: Exactly one
type SampleToChunkBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table
Table []SampleToChunkTable // Sample To Chunk Table, entry count elements.
}
type SampleToChunkTable struct {
FirstChunk []uint32 // 32 bits, is an integer that gives the index of the first chunk in this run of chunks that share the same samples-per-chunk and sample-description-index; the index of the first chunk in a track has the value 1 (the first_chunk field in the first record of this box has the value 1, identifying that the first sample maps to the first chunk).
SamplesPerChunk []uint32 // 32 bits, is an integer that gives the number of samples in each of these chunks
SampleDescriptionIndex []uint32 // 32 bits, is an integer that gives the index of the sample entry that describes the samples in this chunk. The index ranges from 1 to the number of sample entries in the Sample Description Box
}
// 用chunk组织sample可以方便优化数据获取,一个thunk包含一个或多个sample.
// “stsc”中用一个表描述了sample与chunk的映射关系,查看这张表就可以找到包含指定sample的thunk,从而找到这个sample
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/57
//
// Box Type: stsz, stz2
// Container: Sample Table Box (‘stbl’)
// Mandatory: Yes
// Quantity: Exactly one variant must be present
type SampleSizeBoxes struct{}
// Box Type: stsz
type SampleSizeBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
SampleSize uint32 // 32 bits, is integer specifying the default sample size. If all the samples are the same size, this field contains that size value. If this field is set to 0, then the samples have different sizes, and those sizes are stored in the sample size table. If this field is not 0, it specifies the constant sample size, and no array follows.
SampleCount uint32 // 32 bits, is an integer that gives the number of samples in the track; if sample-size is 0, then it is also the number of entries in the following table.
EntrySize interface{} // 32 bits array, SampleCount elements, is an integer specifying the size of a sample, indexed by its number.
}
// if (sample_size == 0) {
// for (i = 1; i <= sample_count; i++) {
// unsigned int(32) entry_size;
// }
// }
// Box Type: stz2
type CompactSampleSizeBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
Reserved [3]uint8 // 24 bits,
FieldSize uint8 // 8 bits, is an integer specifying the size in bits of the entries in the following table; it shall take the value 4, 8 or 16. If the value 4 is used, then each byte contains two values: entry[i]<<4 + entry[i+1]; if the sizes do not fill an integral number of bytes, the last byte is padded with zeros.
SampleCount uint32 // 32 bits, is an integer that gives the number of entries in the following table
EntrySize interface{} //
}
// for (i = 1; i <= sample_count; i++) {
// unsigned int(field_size) entry_size;
// }
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/59
//
// Box Type: stco, co64
// Container: Sample Table Box (‘stbl’)
// Mandatory: Yes
// Quantity: Exactly one variant must be present
type ChunkOffsetBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table
ChunkOffset []uint32 // 32 bits array, entry count elements.
}
// “stco”定义了每个thunk在媒体流中的位置。位置有两种可能,32位的和64位的,后者对非常大的电影很有用。
// 在一个表中只会有一种可能,这个位置是在整个文件中的,而不是在任何box中的,这样做就可以直接在文件中找到媒体数据,
// 而不用解释box。需要注意的是一旦前面的box有了任何改变,这张表都要重新建立,因为位置信息已经改变了
// Box Type: co64
type ChunkLargeOffsetBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table
ChunkOffset []uint64 // 64 bits array, entry count elements.
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/51
//
// Box Type: stss
// Container: Sample Table Box (‘stbl’)
// Mandatory: No
// Quantity: Zero or one
//
// This box provides a compact marking of the sync samples within the stream. The table is arranged in strictly increasing order of sample number.
// If the sync sample box is not present, every sample is a sync sample.
type SyncSampleBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table. If entry_count is zero, there are no sync samples within the stream and the following table is empty
SampleNumber []uint32 // 32 bits array, entry count elements. gives the numbers of the samples that are sync samples in the stream.
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/52
//
// Box Type: stsh
// Container: Sample Table Box (‘stbl’)
// Mandatory: No
// Quantity: Zero or one
type ShadowSyncSampleBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
EntryCount uint32 // 32 bits, is an integer that gives the number of entries in the following table.
Table []ShadowSyncSampleTable // Shadow Sync Sample Table, entry count elements.
}
type ShadowSyncSampleTable struct {
ShadowedSampleNumber uint32 // 32 bits, gives the number of a sample for which there is an alternative sync sample.
SyncSampleNumber uint32 // 32 bits, gives the number of the alternative sync sample.
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/60
//
// Box Type: padb
// Container: Sample Table (‘stbl’)
// Mandatory: No
// Quantity: Zero or one
//
// In some streams the media samples do not occupy all bits of the bytes given by the sample size, and are
// padded at the end to a byte boundary. In some cases, it is necessary to record externally the number of
// padding bits used. This table supplies that information.
type PaddingBitsBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
SampleCount uint32 // 32 bits, counts the number of samples in the track; it should match the count in other tables
Table []PaddingBitsTable // Padding Bits Table, (sample count + 1) / 2 elements.
}
type PaddingBitsTable struct {
Reserved1 byte // 1 bit,
Pad1 byte // 3 bits, a value from 0 to 7, indicating the number of bits at the end of sample (i*2)+1.
Reserved2 byte // 1 bit,
Pad2 byte // 3 bits, a value from 0 to 7, indicating the number of bits at the end of sample (i*2)+2.
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/46
//
// Box Type: stdp
// Container: Sample Table Box (‘stbl’).
// Mandatory: No.
// Quantity: Zero or one.
//
// This box contains the degradation priority of each sample. The values are stored in the table, one for each
// sample. The size of the table, sample_count is taken from the sample_count in the Sample Size Box
// ('stsz'). Specifications derived from this define the exact meaning and acceptable range of the priority field.
type DegradationPriorityBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
Priority []uint16 // 16 bits array, sample count elements, is integer specifying the degradation priority for each sample.
}
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/53
//
// Box Types: sdtp
// Container: Sample Table Box (‘stbl’)
// Mandatory: No
// Quantity: Zero or one
type IndependentAndDisposableSamplesBox struct {
MP4BoxHeader // standard header
MP4FullBoxHeader // full box header
Table []IndependentAndDisposableSamplesTable // Independent And Disposable Samples Table, sample count elements
}
type IndependentAndDisposableSamplesTable struct {
IsLeading byte // 2 bits,
SampleDependsOn byte // 2 bits,
SampleIsDependedOn byte // 2 bits,
SampleHasTedundancy byte // 2 bits,
}
// is_leading takes one of the following four values:
// 0: the leading nature of this sample is unknown;
// 1: this sample is a leading sample that has a dependency before the referenced I-picture (and is
// therefore not decodable);
// 2: this sample is not a leading sample;
// 3: this sample is a leading sample that has no dependency before the referenced I-picture (and is
// therefore decodable);
// sample_depends_on takes one of the following four values:
// 0: the dependency of this sample is unknown;
// 1: this sample does depend on others (not an I picture);
// 2: this sample does not depend on others (I picture);
// 3: reserved
// sample_is_depended_on takes one of the following four values:
// 0: the dependency of other samples on this sample is unknown;
// 1: other samples may depend on this one (not disposable);
// 2: no other sample depends on this one (disposable);
// 3: reserved
// sample_has_redundancy takes one of the following four values:
// 0: it is unknown whether there is redundant coding in this sample;
// 1: there is redundant coding in this sample;
// 2: there is no redundant coding in this sample;
// 3: reserved
// -------------------------------------------------------------------------------------------------------
//
// ISO_IEC_14496-12_2012.pdf Page/75
//
// Box Type: sbgp
// Container: Sample Table Box (‘stbl’) or Track Fragment Box (‘traf’)