From c9b74c39e90f214be59438eaa57a6ac16aa680a8 Mon Sep 17 00:00:00 2001 From: Umang Jain Date: Sat, 25 Apr 2015 19:21:26 +0530 Subject: [PATCH 1/4] Teletext capitalization fix Fix also detects presence of ' I ' https://github.com/CCExtractor/ccextractor/issues/139 --- src/lib_ccx/telxcc.c | 79 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 4 deletions(-) diff --git a/src/lib_ccx/telxcc.c b/src/lib_ccx/telxcc.c index 570a1065f..30ae7b918 100644 --- a/src/lib_ccx/telxcc.c +++ b/src/lib_ccx/telxcc.c @@ -33,6 +33,7 @@ Werner Brückner -- Teletext in digital television #include "hamming.h" #include "teletext.h" #include +#include "ccx_common_char_encoding.c" #ifdef I18N #include #include @@ -235,6 +236,22 @@ static unsigned ucs2_buffer_prev_used=0; static uint64_t prev_hide_timestamp; static uint64_t prev_show_timestamp; +char cap_telx(char *a) + { static int flag=1; + + if(*a == '\r') + {flag = 1; } + + if(flag && (*a != '\r')) + { + flag = 0; + return cctoupper(*a); + } + else + return cctolower(*a); + +} + void page_buffer_add_string (const char *s) { if (page_buffer_cur_size<(page_buffer_cur_used+strlen (s)+1)) @@ -245,9 +262,22 @@ void page_buffer_add_string (const char *s) if (!page_buffer_cur) fatal (EXIT_NOT_ENOUGH_MEMORY, "Not enough memory to process teletext page.\n"); } - memcpy (page_buffer_cur+page_buffer_cur_used, s, strlen (s)); - page_buffer_cur_used+=strlen (s); + + if(strlen(s) <= 2) + {char t = cap_telx(s); + s = &t; + memcpy (page_buffer_cur+page_buffer_cur_used, s,1); + + page_buffer_cur_used+=1; + page_buffer_cur[page_buffer_cur_used]=0; + } + + else + {memcpy (page_buffer_cur+page_buffer_cur_used, s,strlen(s)); + + page_buffer_cur_used+=strlen(s); page_buffer_cur[page_buffer_cur_used]=0; + } } void ucs2_buffer_add_char (uint64_t c) @@ -460,6 +490,7 @@ void process_page(struct lib_ccx_ctx *ctx, teletext_page_t *page) { timecode_show[12] = 0; timestamp_to_srttime(page->hide_timestamp, timecode_hide); timecode_hide[12] = 0; + int i_flag=0; // flag to detect presence of " I " - matter of Capitalization // process data for (uint8_t row = 1; row < 25; row++) { @@ -588,8 +619,48 @@ void process_page(struct lib_ccx_ctx *ctx, teletext_page_t *page) { if (v >= 0x20) { - //if (ctx->wbout1.fh!=-1) fdprintf(ctx->wbout1.fh, "%s", u); - page_buffer_add_string (u); + + if(u[0] == ' '){ //just to detect presence of " I " + if(i_flag == 0){ + i_flag = 1; + continue; + } + else if (i_flag == 2) + i_flag++; + } + + if(i_flag == 1 && u[0] == 'I'){ + i_flag = 2; + continue; + } + + if(i_flag==3 && u[0]== ' '){ //" I " - detected. Add " I ". + char p[3] = {' ','I',' '} ; + page_buffer_add_string(p); + i_flag=0; + continue; + } + + if(i_flag == 2 && u[0]!=' '){ //" I" detected earlier but next char i.e. u, + char temp_1[2] = {'I','\0'}; // contains a char, therefore "I" is a part of word. + char temp_2[2] = {' ','\0'}; + page_buffer_add_string(temp_2); //recover string that was skipped order to detect "I" + page_buffer_add_string(temp_1); + page_buffer_add_string(u); //after adding skipped string, add current char now + i_flag=0; + continue; + } + + if(i_flag==1){ + char l[2]= {' ','\0'}; + page_buffer_add_string(l); + i_flag=0; + } + + + page_buffer_add_string (u); //u contains non-"I" character here.add that. + i_flag=0; + if (ccx_options.gui_mode_reports) // For now we just handle the easy stuff fprintf (stderr,"%s",u); } From c346e36d4e681518f2958f7531b493e0ae0dc7fe Mon Sep 17 00:00:00 2001 From: Umang Jain Date: Sat, 25 Apr 2015 20:51:44 +0530 Subject: [PATCH 2/4] Enhancement of Capitalization in teletext --- src/lib_ccx/telxcc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib_ccx/telxcc.c b/src/lib_ccx/telxcc.c index 30ae7b918..c5495a24c 100644 --- a/src/lib_ccx/telxcc.c +++ b/src/lib_ccx/telxcc.c @@ -239,10 +239,10 @@ static uint64_t prev_show_timestamp; char cap_telx(char *a) { static int flag=1; - if(*a == '\r') - {flag = 1; } + if(*a == '\r' || *a == '?' || *a=='.' || *a == '!' || *a == ':') + {flag = 1; return *a; } - if(flag && (*a != '\r')) + if(flag && (*a != '\r' || *a != '?' || *a != '.' || *a != '!' || *a != ':')) { flag = 0; return cctoupper(*a); @@ -263,7 +263,7 @@ void page_buffer_add_string (const char *s) fatal (EXIT_NOT_ENOUGH_MEMORY, "Not enough memory to process teletext page.\n"); } - if(strlen(s) <= 2) + if(strlen(s) <= 2 && ( (*s >= 'A' && *s <= 'Z') || (*s >= 'a' && *s <= 'z')) || (*s == '\r') ) {char t = cap_telx(s); s = &t; memcpy (page_buffer_cur+page_buffer_cur_used, s,1); From 0d39b789f23fe5c4841a19fa1ad390fe33e8d178 Mon Sep 17 00:00:00 2001 From: Umang Jain Date: Wed, 3 Jun 2015 19:30:43 +0530 Subject: [PATCH 3/4] Capitalization after punctuation --- src/lib_ccx/telxcc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib_ccx/telxcc.c b/src/lib_ccx/telxcc.c index c5495a24c..482efe928 100644 --- a/src/lib_ccx/telxcc.c +++ b/src/lib_ccx/telxcc.c @@ -236,13 +236,13 @@ static unsigned ucs2_buffer_prev_used=0; static uint64_t prev_hide_timestamp; static uint64_t prev_show_timestamp; -char cap_telx(char *a) +char cap_telx(const char *a) { static int flag=1; - if(*a == '\r' || *a == '?' || *a=='.' || *a == '!' || *a == ':') + if(*a == '?' || *a=='.' || *a == '!' || *a == ':') {flag = 1; return *a; } - if(flag && (*a != '\r' || *a != '?' || *a != '.' || *a != '!' || *a != ':')) + if(flag && (*a != '?' || *a != '.' || *a != '!' || *a != ':')) { flag = 0; return cctoupper(*a); @@ -263,7 +263,7 @@ void page_buffer_add_string (const char *s) fatal (EXIT_NOT_ENOUGH_MEMORY, "Not enough memory to process teletext page.\n"); } - if(strlen(s) <= 2 && ( (*s >= 'A' && *s <= 'Z') || (*s >= 'a' && *s <= 'z')) || (*s == '\r') ) + if(strlen(s) <= 2 && ( (*s >= 'A' && *s <= 'Z') || (*s >= 'a' && *s <= 'z')) || (*s == '.') ) {char t = cap_telx(s); s = &t; memcpy (page_buffer_cur+page_buffer_cur_used, s,1); From 27d2f0fac8b49e962d4a69043ce9e2d321aadfd5 Mon Sep 17 00:00:00 2001 From: Umang Jain Date: Sat, 6 Jun 2015 21:55:10 +0530 Subject: [PATCH 4/4] Added -sc parameter --- src/lib_ccx/lib_ccx.h | 1 + src/lib_ccx/params.c | 2 ++ src/lib_ccx/telxcc.c | 15 ++++++++------- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/lib_ccx/lib_ccx.h b/src/lib_ccx/lib_ccx.h index 6db19bb2b..e89117976 100644 --- a/src/lib_ccx/lib_ccx.h +++ b/src/lib_ccx/lib_ccx.h @@ -126,6 +126,7 @@ struct ccx_s_teletext_config { // uint8_t se_mode : 1; // search engine compatible mode => Uses CCExtractor's write_format // uint64_t utc_refvalue; // UTC referential value => Moved to ccx_decoders_common, so can be used for other decoders (608/xds) too uint16_t user_page; // Page selected by user, which MIGHT be different to 'page' depending on autodetection stuff + int sentence_cap; }; #define MAX_PID 65536 struct lib_ccx_ctx diff --git a/src/lib_ccx/params.c b/src/lib_ccx/params.c index 7abc59890..5d29eafc5 100644 --- a/src/lib_ccx/params.c +++ b/src/lib_ccx/params.c @@ -1100,6 +1100,8 @@ void parse_parameters (struct ccx_s_options *opt, int argc, char *argv[]) strcmp (argv[i],"-sc")==0) { ccx_options.sentence_cap=1; + // Teletext needs to be set as well + tlt_config.sentence_cap = 1; continue; } if ((strcmp (argv[i],"--capfile")==0 || diff --git a/src/lib_ccx/telxcc.c b/src/lib_ccx/telxcc.c index 482efe928..ef6b73727 100644 --- a/src/lib_ccx/telxcc.c +++ b/src/lib_ccx/telxcc.c @@ -263,7 +263,7 @@ void page_buffer_add_string (const char *s) fatal (EXIT_NOT_ENOUGH_MEMORY, "Not enough memory to process teletext page.\n"); } - if(strlen(s) <= 2 && ( (*s >= 'A' && *s <= 'Z') || (*s >= 'a' && *s <= 'z')) || (*s == '.') ) + if(strlen(s) <= 2 && ((*s >= 'A' && *s <= 'Z') || (*s >= 'a' && *s <= 'z')) && tlt_config.sentence_cap || (*s == '.')) {char t = cap_telx(s); s = &t; memcpy (page_buffer_cur+page_buffer_cur_used, s,1); @@ -619,8 +619,9 @@ void process_page(struct lib_ccx_ctx *ctx, teletext_page_t *page) { if (v >= 0x20) { - + if(u[0] == ' '){ //just to detect presence of " I " + if(i_flag == 0){ i_flag = 1; continue; @@ -642,9 +643,9 @@ void process_page(struct lib_ccx_ctx *ctx, teletext_page_t *page) { } if(i_flag == 2 && u[0]!=' '){ //" I" detected earlier but next char i.e. u, - char temp_1[2] = {'I','\0'}; // contains a char, therefore "I" is a part of word. + char temp_1[2] = {'I','\0'}; // contains a char, therefore "I" is a part of word. char temp_2[2] = {' ','\0'}; - page_buffer_add_string(temp_2); //recover string that was skipped order to detect "I" + page_buffer_add_string(temp_2); //recover string that was skipped order to detect "I" page_buffer_add_string(temp_1); page_buffer_add_string(u); //after adding skipped string, add current char now i_flag=0; @@ -658,10 +659,10 @@ void process_page(struct lib_ccx_ctx *ctx, teletext_page_t *page) { } - page_buffer_add_string (u); //u contains non-"I" character here.add that. + page_buffer_add_string (u); //u contains non-"I" character here.add that. i_flag=0; - - if (ccx_options.gui_mode_reports) // For now we just handle the easy stuff + + if (ccx_options.gui_mode_reports) // For now we just handle the easy stuff fprintf (stderr,"%s",u); } }