Skip to content
Permalink
Browse files

[FEATURE] Added support for DVB inside MKV (#1082)

* [FIX] Fix incorrect comparison of strings for AVC codec id in .mkv

* Initial work on adding DVB support to .mkv

* [REQUEST] Finished adding support for DVB inside MKV (#1000)

* Update CHANGES.TXT
  • Loading branch information...
thelastpolaris authored and cfsmp3 committed Mar 23, 2019
1 parent 4d24568 commit 718cf55131b8276647be1aadcc751d9c71afd2a7
@@ -10,6 +10,9 @@
- Fix: Many typos in comments and output messages
- Fix: Ignore Visual Studio temporary project files
- New: Add support for non-Latin characters in stdout
- Fix: Check whether stream is empty
- New: Add support for EIA-608 inside .mkv
- New: Add support for DVB inside .mkv

0.87 (2018-10-23)
-----------------
@@ -535,6 +535,7 @@ void free_encoder_context(struct encoder_ctx *ctx)
freep(&ctx->start_credits_text);
freep(&ctx->end_credits_text);
freep(&ctx->prev);
freep(&ctx->last_string);
freep(&ctx);
}
void free_decoder_context(struct lib_cc_decode *ctx)
@@ -988,6 +988,9 @@ struct encoder_ctx *init_encoder(struct encoder_cfg *opt)
ctx->encoding = opt->encoding;
ctx->write_format = opt->write_format;

ctx->is_mkv = 0;
ctx->last_string = NULL;

ctx->transcript_settings = &opt->transcript_settings;
ctx->no_bom = opt->no_bom;
ctx->sentence_cap = opt->sentence_cap;
@@ -128,6 +128,9 @@ struct encoder_ctx
//for dvb subs
struct encoder_ctx* prev;
int write_previous;
//for dvb in .mkv
int is_mkv; //are we working with .mkv file
char* last_string; //last recognized DVB sub

// Segmenting
int segment_pending;
@@ -110,23 +110,28 @@ int write_cc_bitmap_as_srt(struct cc_subtitle *sub, struct encoder_ctx *context)
str = paraof_ocrtext(sub, context->encoded_crlf, context->encoded_crlf_length);
if (str)
{
if (context->prev_start != -1 || !(sub->flags & SUB_EOD_MARKER))
{
millis_to_time (ms_start,&h1,&m1,&s1,&ms1);
millis_to_time (ms_end-1,&h2,&m2,&s2,&ms2); // -1 To prevent overlapping with next line.
context->srt_counter++;
sprintf(timeline, "%u%s", context->srt_counter, context->encoded_crlf);
used = encode_line(context, context->buffer,(unsigned char *) timeline);
write(context->out->fh, context->buffer, used);
sprintf (timeline, "%02u:%02u:%02u,%03u --> %02u:%02u:%02u,%03u%s",
h1, m1, s1, ms1, h2, m2, s2, ms2, context->encoded_crlf);
used = encode_line(context, context->buffer,(unsigned char *) timeline);
write (context->out->fh, context->buffer, used);
len = strlen(str);
write (context->out->fh, str, len);
write (context->out->fh, context->encoded_crlf, context->encoded_crlf_length);
}
freep(&str);
if(context->is_mkv == 1) {
// Save recognized string for later use in matroska.c
context->last_string = str;
} else {
if (context->prev_start != -1 || !(sub->flags & SUB_EOD_MARKER))
{
millis_to_time (ms_start,&h1,&m1,&s1,&ms1);
millis_to_time (ms_end-1,&h2,&m2,&s2,&ms2); // -1 To prevent overlapping with next line.
context->srt_counter++;
sprintf(timeline, "%u%s", context->srt_counter, context->encoded_crlf);
used = encode_line(context, context->buffer,(unsigned char *) timeline);
write(context->out->fh, context->buffer, used);
sprintf (timeline, "%02u:%02u:%02u,%03u --> %02u:%02u:%02u,%03u%s",
h1, m1, s1, ms1, h2, m2, s2, ms2, context->encoded_crlf);
used = encode_line(context, context->buffer,(unsigned char *) timeline);
write (context->out->fh, context->buffer, used);
len = strlen(str);
write (context->out->fh, str, len);
write (context->out->fh, context->encoded_crlf, context->encoded_crlf_length);
}
freep(&str);
}
}
for(i = 0, rect = sub->data; i < sub->nb_data; i++, rect++)
{
@@ -1685,9 +1685,14 @@ void dvbsub_handle_display_segment(struct encoder_ctx *enc_ctx,
return;
if (enc_ctx->write_previous) //this condition is used for the first subtitle - write_previous will be 0 first so we don't encode a non-existing previous sub
{
enc_ctx->prev->last_string = NULL; // Reset last recognized sub text
sub->prev->end_time = (dec_ctx->timing->current_pts - dec_ctx->timing->min_pts) / (MPEG_CLOCK_FREQ / 1000); //we set the end time of the previous sub the current pts
encode_sub(enc_ctx->prev, sub->prev); //we encode it
enc_ctx->srt_counter = enc_ctx->prev->srt_counter; //for dvb subs we need to update the current srt counter because we always encode the previous subtitle (and the counter is increased for the previous context)

enc_ctx->last_string = enc_ctx->prev->last_string; // Update last recognized string (used in Matroska)
enc_ctx->prev->last_string = NULL;

enc_ctx->srt_counter = enc_ctx->prev->srt_counter; //for dvb subs we need to update the current srt counter because we always encode the previous subtitle (and the counter is increased for the previous context)
enc_ctx->prev_start = enc_ctx->prev->prev_start;
sub->prev->got_output = 0;
if (enc_ctx->write_format == CCX_OF_WEBVTT) { // we already wrote header, but since we encoded last sub, we must prevent multiple headers in future
@@ -1,10 +1,11 @@
#include "lib_ccx.h"
#include "lib_ccx.h"
#include "utility.h"
#include "matroska.h"
#include "ccx_encoders_helpers.h"
#include "ccx_common_timing.h"
#include <limits.h>
#include <assert.h>
#include "dvb_subtitle_decoder.h"

void skip_bytes(FILE* file, ULLONG n) {
FSEEK(file, n, SEEK_CUR);
@@ -256,14 +257,50 @@ struct matroska_sub_sentence* parse_segment_cluster_block_group_block(struct mat

ULLONG size = pos + len - get_current_byte(file);
char* message = read_bytes_signed(file, size);
struct matroska_sub_track* track = mkv_ctx->sub_tracks[sub_track_index];

struct matroska_sub_sentence* sentence = malloc(sizeof(struct matroska_sub_sentence));
sentence->text = message;
sentence->text_size = size;
sentence->time_start = timecode + cluster_timecode;
ULLONG timestamp = timecode + cluster_timecode;
sentence->blockaddition = NULL;
sentence->time_end = 0; // Initialize time_end so that it is updated if it was not set

if(strcmp(track->codec_id_string, dvb_codec_id) == 0) {
struct encoder_ctx *enc_ctx = update_encoder_list(mkv_ctx->ctx);
struct lib_cc_decode *dec_ctx = update_decoder_list(mkv_ctx->ctx);

set_current_pts(dec_ctx->timing, timestamp * (MPEG_CLOCK_FREQ/1000));

int ret = dvbsub_decode(enc_ctx, dec_ctx, message, size, &mkv_ctx->dec_sub);
// We use string produced by enc_ctx as a message
free(message);

/* Bear in mind that in DVB we are handling the text of the previous block.
There can be 2 types of DVB in .mkv. One is when each display block is followed by empty block in order to
allow gaps in time between display blocks. Another one is when display block is followed by another display block.
This code handles both cases but we don't save and use empty blocks as sentences, only time_starts of them. */
char* dvb_message = enc_ctx->last_string;
if (ret<0 || dvb_message == NULL) {
// No text - no sentence is returned. Free the memory
free(sentence);
if(ret < 0) mprint ("Return from dvbsub_decode: %d\n", ret);
else track->last_timestamp = timestamp; // We save timestamp because we need to use it for the next sub as a timestart
return NULL;
}
sentence->text = dvb_message;
sentence->text_size = strlen(dvb_message);

/* Update time.
Time start - timestamp of the previous block
Time end - timestamp of the current block */
sentence->time_start = track->last_timestamp;
sentence->time_end = timestamp;
track->last_timestamp = timestamp;
} else {
sentence->time_start = timestamp;
sentence->text = message;
sentence->text_size = size;
}

struct matroska_sub_track* track = mkv_ctx->sub_tracks[sub_track_index];
if (track->sentence_count==0){
track->sentences = malloc(sizeof(struct matroska_sub_sentence*));
}
@@ -418,7 +455,9 @@ void parse_segment_cluster_block_group(struct matroska_ctx* mkv_ctx, ULLONG clus
// between the timestamp of this Block and the timestamp of the next Block in "display" order
if (block_duration == ULONG_MAX)
sentence_list[i]->time_end = ULONG_MAX;
else
else if(sentence_list[i]->time_end == 0)
// If no time_end is set, set it according to block_duration.
// We need this check for correct DVB timecodes
sentence_list[i]->time_end = sentence_list[i]->time_start + block_duration;

if (ccx_options.gui_mode_reports) {
@@ -684,17 +723,15 @@ void parse_segment_track_entry(struct matroska_ctx* mkv_ctx) {
codec_id_string = read_vint_block_string(file);
codec_id = get_track_subtitle_codec_id(codec_id_string);
mprint(" Codec ID: %s\n", codec_id_string);
//We only support AVC by now
if( *codec_id_string == *avc_codec_id) mkv_ctx->avc_track_number = track_number;
else free(codec_id_string);
//We only support AVC by now for EIA-608
if( strcmp((const char *)codec_id_string, (const char *)avc_codec_id) == 0 ) mkv_ctx->avc_track_number = track_number;
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_CODEC_PRIVATE:
if (track_type == MATROSKA_TRACK_TYPE_SUBTITLE)
// We handle DVB's private data differently
if (track_type == MATROSKA_TRACK_TYPE_SUBTITLE && strcmp((const char *)codec_id_string, (const char *)dvb_codec_id) != 0 )
header = read_vint_block_string(file);
else if( *codec_id_string == *avc_codec_id && mkv_ctx->avc_track_number == track_number)
parse_private_codec_data(mkv_ctx);
else
read_vint_block_skip(file);
parse_private_codec_data(mkv_ctx, codec_id_string, track_number, lang);
MATROSKA_SWITCH_BREAK(code, code_len);
case MATROSKA_SEGMENT_TRACK_CODEC_NAME:
read_vint_block_skip(file);
@@ -785,32 +822,70 @@ void parse_segment_track_entry(struct matroska_ctx* mkv_ctx) {
sub_track->track_number = track_number;
sub_track->lang_index = 0;
sub_track->codec_id = codec_id;
sub_track->codec_id_string = codec_id_string;
sub_track->sentence_count = 0;
sub_track->last_timestamp = 0;
for (int i = 0; i < mkv_ctx->sub_tracks_count; i++)
if (strcmp((const char *)mkv_ctx->sub_tracks[i]->lang, (const char *)lang) == 0)
sub_track->lang_index++;
mkv_ctx->sub_tracks = realloc(mkv_ctx->sub_tracks, sizeof(struct matroska_sub_track*) * (mkv_ctx->sub_tracks_count + 1));
mkv_ctx->sub_tracks[mkv_ctx->sub_tracks_count] = sub_track;
mkv_ctx->sub_tracks_count++;
}
else
else {
free(lang);
if(codec_id_string) free(codec_id_string);
}
}

// Read sequence parameter set for AVC
void parse_private_codec_data(struct matroska_ctx* mkv_ctx)
void parse_private_codec_data(struct matroska_ctx* mkv_ctx, char* codec_id_string, ULLONG track_number, char* lang)
{
FILE* file = mkv_ctx->file;
ULLONG len = read_vint_length(file);
// Skip reserved data
ULLONG reserved_len = 8;
skip_bytes(file, reserved_len);
unsigned char* data = NULL;

struct lib_cc_decode *dec_ctx = update_decoder_list(mkv_ctx->ctx);
ULLONG size = len - reserved_len;

unsigned char* data = read_byte_block(file, size);
do_NAL(dec_ctx, data, size, &mkv_ctx->dec_sub);
if( (strcmp((const char *)codec_id_string, (const char *)avc_codec_id) == 0) && mkv_ctx->avc_track_number == track_number) {
// Skip reserved data
ULLONG reserved_len = 8;
skip_bytes(file, reserved_len);

ULLONG size = len - reserved_len;

data = read_byte_block(file, size);
do_NAL(dec_ctx, data, size, &mkv_ctx->dec_sub);
} else if (strcmp((const char *)codec_id_string, (const char *)dvb_codec_id) == 0) {
struct encoder_ctx *enc_ctx = update_encoder_list(mkv_ctx->ctx);
enc_ctx->write_previous = 0;
enc_ctx->is_mkv = 1;

data = read_byte_block(file, len);

unsigned char* codec_data = malloc(sizeof(char)*8);
// 1.ISO_639_language_code (3 bytes)
strcpy(codec_data, lang);
// 2.subtitling_type (1 byte)
codec_data[3] = data[4];
// 3.composition_page_id (2 bytes)
codec_data[4] = data[0];
codec_data[5] = data[1];
// 4.ancillary_page_id (2 bytes)
codec_data[6] = data[2];
codec_data[7] = data[3];

struct dvb_config cnf;
memset((void*)&cnf, 0, sizeof(struct dvb_config));

parse_dvb_description(&cnf, codec_data, 8);
dec_ctx->private_data = dvbsub_init_decoder(&cnf, 0);

free(codec_data);
} else {
skip_bytes(file, len);
return;
}

free(data);
}
@@ -1038,7 +1113,12 @@ void save_sub_track(struct matroska_ctx* mkv_ctx, struct matroska_sub_track* tra
while (*(sentence->text+size)=='\n' || *(sentence->text+size)=='\r' )
size++;
write(desc, sentence->text+size, sentence->text_size-size);
write(desc, "\n\n", 2);

if(sentence->text[sentence->text_size - 1] == '\n') {
write(desc, "\n", 1);
} else {
write(desc, "\n\n", 2);
}

free(timestamp_start);
free(timestamp_end);
@@ -1074,6 +1154,8 @@ void free_sub_track(struct matroska_sub_track* track)
free(track->header);
if (track->lang != NULL)
free(track->lang);
if (track->codec_id_string != NULL)
free(track->codec_id_string);
for (int i = 0; i < track->sentence_count; i++)
{
struct matroska_sub_sentence* sentence = track->sentences[i];
@@ -1183,7 +1265,7 @@ int matroska_loop(struct lib_ccx_ctx *ctx)
mkv_ctx->file = create_file(ctx);
mkv_ctx->sub_tracks = malloc(sizeof(struct matroska_sub_track**));
//EIA-608
memset(&mkv_ctx->dec_sub,0,sizeof(mkv_ctx->dec_sub));
memset(&mkv_ctx->dec_sub, 0, sizeof(mkv_ctx->dec_sub));
mkv_ctx->avc_track_number = -1;

matroska_parse(mkv_ctx);
@@ -178,6 +178,7 @@ char* matroska_track_text_subtitle_id_extensions[] = {
};

char* avc_codec_id = "V_MPEG4/ISO/AVC";
char* dvb_codec_id = "S_DVBSUB";

/* Messages */
#define MATROSKA_INFO "\nMatroska parser info: "
@@ -218,6 +219,8 @@ struct matroska_sub_track {
ULLONG track_number;
ULLONG lang_index;
enum matroska_track_subtitle_codec_id codec_id;
char* codec_id_string;
ULLONG last_timestamp;

int sentence_count;
struct matroska_sub_sentence** sentences;
@@ -259,7 +262,7 @@ void parse_segment_cluster(struct matroska_ctx* mkv_ctx);
void parse_simple_block(struct matroska_ctx* mkv_ctx, ULLONG frame_timestamp);
int process_avc_frame_mkv(struct matroska_ctx* mkv_ctx, struct matroska_avc_frame frame);
void parse_segment_track_entry(struct matroska_ctx* mkv_ctx);
void parse_private_codec_data(struct matroska_ctx* mkv_ctx);
void parse_private_codec_data(struct matroska_ctx* mkv_ctx, char* codec_id_string, ULLONG track_number, char* lang);
void parse_segment_tracks(struct matroska_ctx* mkv_ctx);
void parse_segment(struct matroska_ctx* mkv_ctx);

0 comments on commit 718cf55

Please sign in to comment.
You can’t perform that action at this time.