Skip to content

Commit

Permalink
snapshot of project "lynx", label v2-9-0dev_6k
Browse files Browse the repository at this point in the history
  • Loading branch information
ThomasDickey committed Jun 30, 2021
1 parent 4c627e4 commit 5111b53
Show file tree
Hide file tree
Showing 20 changed files with 4,343 additions and 4,153 deletions.
7 changes: 5 additions & 2 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
-- $LynxId: CHANGES,v 1.1073 2021/06/10 00:37:08 tom Exp $
-- $LynxId: CHANGES,v 1.1076 2021/06/30 20:28:04 tom Exp $
===============================================================================
Changes since Lynx 2.8 release
===============================================================================

2021-06-09 (2.9.0dev.7)
2021-06-30 (2.9.0dev.7)
* add chinese-utf8 optional feature for gb2312 changes -TD
* enable cjk and japanese-utf8 features by default, renaming the symbol
EXP_JAPANESEUTF8_SUPPORT -> USE_JAPANESEUTF8_SUPPORT -TD
* fix warnings from scan-build -TD
* update configure script to work with _Noreturn changes in ncurses 20210320
development snapshot -TD
Expand Down
16 changes: 9 additions & 7 deletions INSTALLATION
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,9 @@ II. Compile instructions -- UNIX
--disable-bibp-urls (define DISABLE_BIBP)
Disable (do not compile code) support for bibp: URLs.

--disable-cjk (define CJK_EX)
Disable logic for supporting CJK documents.

--disable-color-style (define USE_COLOR_STYLE)
Use this option to disable optional color style. This is implemented
for modern curses implementations, e.g., those that support color.
Expand Down Expand Up @@ -320,6 +323,9 @@ II. Compile instructions -- UNIX
requested, the configure script will otherwise use the messages in the
./po subdirectory.

--disable-japanese-utf8 (define USE_JAPANESEUTF8_SUPPORT)
use Japanese UTF-8 logic.

--disable-justify-elts (define USE_JUSTIFY_ELTS)
Do not use element-justification logic.

Expand Down Expand Up @@ -420,9 +426,8 @@ II. Compile instructions -- UNIX
lynx.cfg, allowing user to configure a subset of the compiled-in
charsets for normal use.

--enable-cjk (define CJK_EX)
Add experimental logic for supporting CJK documents. (This is not
necessary for CJK support and may go away in a future release.)
--enable-chinese-utf8 (define EXP_CHINESEUTF8_SUPPORT)
use Chinese UTF-8 logic.

--enable-debug (The symbol DEBUG is always defined.)
Use this option to compile-in support for debugging.
Expand Down Expand Up @@ -483,9 +488,6 @@ II. Compile instructions -- UNIX
--enable-ipv6 (define ENABLE_IPV6)
use IPV6 (with IPV4) logic.

--enable-japanese-utf8 (define EXP_JAPANESEUTF8_SUPPORT)
use experimental Japanese UTF-8 logic.

--enable-kbd-layout (define EXP_KEYBOARD_LAYOUT)
Disabled by default, this option allows you to use translation
tables on the input keystrokes. Current tables include
Expand Down Expand Up @@ -1491,4 +1493,4 @@ VIII. Acknowledgment

-- 1999/04/24 - H. Nelson <lynx-admin@irm.nara.kindai.ac.jp>
-- vile:txtmode
-- $LynxId: INSTALLATION,v 1.132 2021/03/23 00:37:21 tom Exp $
-- $LynxId: INSTALLATION,v 1.134 2021/06/29 22:59:06 tom Exp $
1 change: 1 addition & 0 deletions PACKAGE/debian/rules
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ configure-stamp:
--enable-gzip-help \
--enable-htmlized-cfg \
--enable-ipv6 \
--enable-chinese-utf8 \
--enable-japanese-utf8 \
--enable-justify-elts \
--enable-local-docs \
Expand Down
3 changes: 2 additions & 1 deletion PACKAGE/lynx.spec
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# $LynxId: lynx.spec,v 1.59 2020/11/26 19:25:38 tom Exp $
# $LynxId: lynx.spec,v 1.60 2021/06/30 21:01:24 tom Exp $
Summary: A text-based Web browser
Name: lynx-dev
Version: 2.9.0
Expand Down Expand Up @@ -69,6 +69,7 @@ HTTP, FTP, WAIS, and NNTP servers.
--enable-htmlized-cfg \
--enable-internal-links \
--enable-ipv6 \
--enable-chinese-utf8 \
--enable-japanese-utf8 \
--enable-justify-elts \
--enable-kbd-layout \
Expand Down
34 changes: 21 additions & 13 deletions WWW/Library/Implementation/HTCJK.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* $LynxId: HTCJK.h,v 1.19 2012/08/15 23:11:03 tom Exp $
* $LynxId: HTCJK.h,v 1.21 2021/06/30 17:16:36 tom Exp $
*
* CJK character converter HTCJK.h
* =======================
Expand Down Expand Up @@ -32,21 +32,29 @@ extern "C" {
#define TO_HANJI "\033$A"
#define TO_HANGUL "\033$(C"
#define TO_ASCII "\033(B"
#define IS_SJIS_LO(lo) ((0x40<=lo)&&(lo!=0x7F)&&(lo<=0xFC))

#define IS_GBK_LO(lo) ((0xA1 <= (lo)) && ((lo) <= 0xFE))
#define IS_GBK_HI(hi) ((0xA1 <= (hi)) && ((hi) <= 0xF7))

#define IS_SJIS_LO(lo) ((0x40 <= (lo)) && ((lo) != 0x7F) && ((lo) <= 0xFC))
#define IS_SJIS_HI1(hi) ((0x81 <= (hi)) && ((hi) <= 0x9F)) /* 1st lev. */
#define IS_SJIS_HI2(hi) ((0xE0 <= (hi)) && ((hi) <= 0xEF)) /* 2nd lev. */
#define IS_SJIS(hi,lo,in_sjis) (!IS_SJIS_LO(lo)?0:IS_SJIS_HI1(hi)?(in_sjis=1):in_sjis&&IS_SJIS_HI2(hi))
#define IS_SJIS_2BYTE(hi,lo) (IS_SJIS_LO(lo)&&(IS_SJIS_HI1(hi)||IS_SJIS_HI2(hi)))
#define IS_SJIS_X0201KANA(lo) ((0xA1<=lo)&&(lo<=0xDF))
#define IS_EUC_LOX(lo) ((0xA1<=lo)&&(lo<=0xFE)) /* extended */
#define IS_EUC_HI(hi) ((0xA1<=hi)&&(hi<=0xFE))
#define IS_EUC_X0201KANA(hi,lo) ((hi==0x8E)&&(0xA1<=lo)&&(lo<=0xDF))
#define IS_EUC(hi,lo) ((IS_EUC_HI(hi) && IS_EUC_LOX(lo))||IS_EUC_X0201KANA(hi,lo))
#define IS_SJIS(hi,lo,in_sjis) (!IS_SJIS_LO(lo) ? 0 : IS_SJIS_HI1(hi) ? (in_sjis=1) : in_sjis && IS_SJIS_HI2(hi))
#define IS_SJIS_2BYTE(hi,lo) (IS_SJIS_LO(lo) && (IS_SJIS_HI1(hi) || IS_SJIS_HI2(hi)))
#define IS_SJIS_X0201KANA(lo) ((0xA1 <= (lo)) && ((lo) <= 0xDF))

#define IS_EUC_LOX(lo) ((0xA1 <= (lo)) && ((lo) <= 0xFE)) /* extended */
#define IS_EUC_HI(hi) ((0xA1 <= (hi)) && ((hi) <= 0xFE))
#define IS_EUC_X0201KANA(hi,lo) (((hi) == 0x8E) && (0xA1 <= (lo)) && ((lo) <= 0xDF))
#define IS_EUC(hi,lo) ((IS_EUC_HI(hi) && IS_EUC_LOX(lo)) || IS_EUC_X0201KANA(hi,lo))

#define IS_JAPANESE_2BYTE(hi,lo) (IS_SJIS_2BYTE(hi,lo) || IS_EUC(hi,lo))
#define IS_BIG5_LOS(lo) ((0x40<=lo)&&(lo<=0x7E)) /* standard */
#define IS_BIG5_LOX(lo) ((0xA1<=lo)&&(lo<=0xFE)) /* extended */
#define IS_BIG5_HI(hi) ((0xA1<=hi)&&(hi<=0xFE))
#define IS_BIG5(hi,lo) (IS_BIG5_HI(hi) && (IS_BIG5_LOS(lo) || IS_BIG5_LOX(lo)))

#define IS_BIG5_LOS(lo) ((0x40 <= (lo)) && ((lo) <= 0x7E)) /* standard */
#define IS_BIG5_LOX(lo) ((0xA1 <= (lo)) && ((lo) <= 0xFE)) /* extended */
#define IS_BIG5_HI(hi) ((0xA1 <= (hi)) && ((hi) <= 0xFE))
#define IS_BIG5(hi,lo) (IS_BIG5_HI(hi) && (IS_BIG5_LOS(lo) || IS_BIG5_LOX(lo)))

typedef enum {
NOKANJI = 0, EUC, SJIS, JIS
} HTkcode;
Expand Down
4 changes: 2 additions & 2 deletions WWW/Library/Implementation/HTMIME.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* $LynxId: HTMIME.c,v 1.100 2018/03/11 21:32:38 tom Exp $
* $LynxId: HTMIME.c,v 1.101 2021/06/29 22:01:12 tom Exp $
*
* MIME Message Parse HTMIME.c
* ==================
Expand Down Expand Up @@ -389,7 +389,7 @@ static int pumpData(HTStream *me)
UCT_SETBY_DEFAULT);
}
if ((p_in->enc != UCT_ENC_CJK)
#ifdef EXP_JAPANESEUTF8_SUPPORT
#ifdef USE_JAPANESEUTF8_SUPPORT
&& ((p_in->enc != UCT_ENC_UTF8)
|| (p_out->enc != UCT_ENC_CJK))
#endif
Expand Down
4 changes: 2 additions & 2 deletions WWW/Library/Implementation/HTUtils.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* $LynxId: HTUtils.h,v 1.133 2021/06/09 22:17:19 tom Exp $
* $LynxId: HTUtils.h,v 1.134 2021/06/29 22:01:12 tom Exp $
*
* Utility macros for the W3 code library
* MACROS FOR GENERAL USE
Expand Down Expand Up @@ -145,7 +145,7 @@ char *alloca();
#endif

#ifndef HAVE_ICONV
#undef EXP_JAPANESEUTF8_SUPPORT
#undef USE_JAPANESEUTF8_SUPPORT
#endif

#ifndef lynx_srand
Expand Down
104 changes: 80 additions & 24 deletions WWW/Library/Implementation/SGML.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* $LynxId: SGML.c,v 1.169 2020/01/21 22:06:39 tom Exp $
* $LynxId: SGML.c,v 1.172 2021/06/30 20:25:01 tom Exp $
*
* General SGML Parser code SGML.c
* ========================
Expand Down Expand Up @@ -38,6 +38,12 @@
# include <LYPrettySrc.h>
#endif

/* a global variable doesn't work with info-stages which convert encoding */
#if defined(EXP_CHINESEUTF8_SUPPORT)
#undef IS_CJK_TTY
#define IS_CJK_TTY me->T.do_cjk
#endif

#define AssumeCP1252(me) \
(((me)->inUCLYhndl == LATIN1 \
|| (me)->inUCLYhndl == US_ASCII) \
Expand Down Expand Up @@ -93,7 +99,7 @@ static void fake_put_character(HTStream *p GCC_UNUSED,
/*the following macros are used for pretty source view. */
#define IS_C(attr) (attr.type == HTMLA_CLASS)

#if defined(EXP_JAPANESEUTF8_SUPPORT)
#if defined(USE_JAPANESEUTF8_SUPPORT)
# define UTF8_TTY_ISO2022JP (me->T.output_utf8)
#else
# define UTF8_TTY_ISO2022JP 0
Expand Down Expand Up @@ -402,7 +408,7 @@ static void set_chartrans_handling(HTStream *me,
* would be better to call a Lynx_HTML_parser function to set an element in
* its HTStructured object, itself, if this were needed. - FM
*/
#ifndef EXP_JAPANESEUTF8_SUPPORT
#ifndef USE_JAPANESEUTF8_SUPPORT
if (IS_CJK_TTY) {
me->current_tag_charset = -1;
} else
Expand Down Expand Up @@ -1640,6 +1646,12 @@ static void SGML_character(HTStream *me, int c_in)
c = UCH(c_in);
clong = UCH(c);

#if 0
CTRACE((tfp, "%s:%d PUTC %02x %c\n",
LYCharSet_UC[me->inUCLYhndl].MIMEname, me->T.do_cjk, c, (c > 32 &&
c < 127)
? c : '#'));
#endif
if (me->T.decode_utf8) {
switch (HTDecodeUTF8(&(me->U), &c_in, &clong)) {
case dUTF8_ok:
Expand All @@ -1665,7 +1677,7 @@ static void SGML_character(HTStream *me, int c_in)
/*
* If we want the raw input converted to Unicode, try that now. - FM
*/
#ifdef EXP_JAPANESEUTF8_SUPPORT
#ifdef USE_JAPANESEUTF8_SUPPORT
/* Convert ISO-2022-JP to Unicode (charset=iso-2022-jp is unrecognized) */
#define IS_JIS7_HILO(c) (0x20<(c)&&(c)<0x7F)
if (UTF8_TTY_ISO2022JP && (me->state == S_nonascii_text
Expand Down Expand Up @@ -1698,18 +1710,18 @@ static void SGML_character(HTStream *me, int c_in)
}
goto top1;
}
#endif /* EXP_JAPANESEUTF8_SUPPORT */
#endif /* USE_JAPANESEUTF8_SUPPORT */
#ifdef USE_JAPANESEUTF8_SUPPORT
if (me->T.trans_to_uni &&
#ifdef EXP_JAPANESEUTF8_SUPPORT
((strcmp(LYCharSet_UC[me->inUCLYhndl].MIMEname, "euc-jp") == 0) ||
(strcmp(LYCharSet_UC[me->inUCLYhndl].MIMEname, "shift_jis") == 0))) {
if (strcmp(LYCharSet_UC[me->inUCLYhndl].MIMEname, "shift_jis") == 0) {
if (me->U.utf_count == 0) {
if (IS_SJIS_HI1((unsigned char) c) ||
IS_SJIS_HI2((unsigned char) c)) {
if (IS_SJIS_HI1(c) ||
IS_SJIS_HI2(c)) {
me->U.utf_buf[0] = (char) c;
me->U.utf_count = 1;
clong = -11;
clong = ucCannotConvert;
} else if (IS_SJIS_X0201KANA(c)) {
if (conv_jisx0201kana) {
JISx0201TO0208_SJIS(c,
Expand All @@ -1721,34 +1733,67 @@ static void SGML_character(HTStream *me, int c_in)
}
}
} else {
if (IS_SJIS_LO((unsigned char) c)) {
if (IS_SJIS_LO(c)) {
me->U.utf_buf[1] = (char) c;
clong = UCTransJPToUni(me->U.utf_buf, 2, me->inUCLYhndl);
}
me->U.utf_count = 0;
}
} else {
if (me->U.utf_count == 0) {
if (IS_EUC_HI((unsigned char) c) || c == 0x8E) {
if (IS_EUC_HI(c) || c == 0x8E) {
me->U.utf_buf[0] = (char) c;
me->U.utf_count = 1;
clong = -11;
clong = ucCannotConvert;
}
} else {
if (IS_EUC_LOX((unsigned char) c)) {
if (IS_EUC_LOX(c)) {
me->U.utf_buf[1] = (char) c;
clong = UCTransJPToUni(me->U.utf_buf, 2, me->inUCLYhndl);
}
me->U.utf_count = 0;
}
}
goto top1;
} else if (me->T.trans_to_uni &&
#endif /* EXP_JAPANESEUTF8_SUPPORT */
} else
#endif /* USE_JAPANESEUTF8_SUPPORT */
#ifdef EXP_CHINESEUTF8_SUPPORT
if (me->T.trans_to_uni &&
((strcmp(LYCharSet_UC[me->inUCLYhndl].MIMEname, "euc-cn") == 0))) {
if (me->U.utf_count == 0) {
if (IS_GBK_HI(c) ||
IS_GBK_HI(c)) {
me->U.utf_buf[0] = (char) c;
me->U.utf_count = 1;
clong = ucCannotConvert;
CTRACE((tfp, "Get EUC-CN: 0x%02X\n", c & 0xff));
}
} else {
if (IS_GBK_LO(c)) {
me->U.utf_buf[1] = (char) c;
clong = UCTransJPToUni(me->U.utf_buf, 2, me->inUCLYhndl);
if (clong > 0) {
CTRACE((tfp, "... second: [%02X%02X] U+%04lX\n",
me->U.utf_buf[0] & 0xff,
me->U.utf_buf[1] & 0xff,
clong));
} else {
CTRACE((tfp, "... second: [%02X%02X] %ld\n",
me->U.utf_buf[0] & 0xff,
me->U.utf_buf[1] & 0xff,
clong));
}
}
me->U.utf_count = 0;
}
goto top1;
} else
#endif /* EXP_CHINESEUTF8_SUPPORT */
if (me->T.trans_to_uni &&
/* S/390 -- gil -- 0744 */
((TOASCII(clong) >= LYlowest_eightbit[me->inUCLYhndl]) ||
(clong < ' ' && clong != 0 &&
me->T.trans_C0_to_uni))) {
((TOASCII(clong) >= LYlowest_eightbit[me->inUCLYhndl]) ||
(clong < ' ' && clong != 0 &&
me->T.trans_C0_to_uni))) {
/*
* Convert the octet to Unicode. - FM
*/
Expand Down Expand Up @@ -1890,7 +1935,7 @@ static void SGML_character(HTStream *me, int c_in)
*/
if ((HTCJK == JAPANESE) && (me->state == S_in_kanji) &&
!IS_JAPANESE_2BYTE(me->kanji_buf, UCH(c))
#ifdef EXP_JAPANESEUTF8_SUPPORT
#ifdef USE_JAPANESEUTF8_SUPPORT
&& !me->T.decode_utf8
#endif
) {
Expand Down Expand Up @@ -1944,9 +1989,22 @@ static void SGML_character(HTStream *me, int c_in)
}
/* FALLTHRU */
case S_text:
if (IS_CJK_TTY && ((TOASCII(c) & 0200) != 0)
#ifdef EXP_JAPANESEUTF8_SUPPORT
&& !me->T.decode_utf8
#ifdef EXP_CHINESEUTF8_SUPPORT
if (IS_CJK_TTY &&
!strcmp(LYCharSet_UC[me->inUCLYhndl].MIMEname, "euc-cn")) {
/*
* Leave the case statement if we have not collected both of the
* bytes for the EUC-CN character. If we have, then continue on
* to convert it to Unicode.
*/
if (clong == ucCannotConvert) {
break;
}
} else
#endif
if (IS_CJK_TTY && ((TOASCII(c) & 0200) != 0)
#ifdef USE_JAPANESEUTF8_SUPPORT
&& !me->T.decode_utf8
#endif
) { /* S/390 -- gil -- 0864 */
/*
Expand Down Expand Up @@ -2461,8 +2519,6 @@ static void SGML_character(HTStream *me, int c_in)
#ifdef USE_PRETTYSRC
entity_string = string->data;
#endif
/* S/390 -- gil -- 1039 */
/* CTRACE((tfp, "%s: %d: %s\n", __FILE__, __LINE__, string->data)); */
if (!strcmp(string->data, "zwnj") &&
(!me->element_stack ||
(me->element_stack->tag &&
Expand Down
4 changes: 2 additions & 2 deletions WWW/Library/Implementation/UCDefs.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* $LynxId: UCDefs.h,v 1.17 2009/03/10 20:02:44 tom Exp $
* $LynxId: UCDefs.h,v 1.18 2021/06/29 00:21:51 tom Exp $
*
* Definitions for Unicode character-translations
*/
Expand Down Expand Up @@ -46,7 +46,7 @@ typedef enum {
#define UCT_REP_SUPERSETOF_LAT1 0x02
#define UCT_REP_IS_LAT1 UCT_REP_SUBSETOF_LAT1 | UCT_REP_SUPERSETOF_LAT1
/*
* Assume everything we deal with is included in the UCS2 reperoire,
* Assume everything we deal with is included in the UCS2 repertoire,
* so a flag for _REP_SUBSETOF_UCS2 would be redundant.
*/

Expand Down
Loading

0 comments on commit 5111b53

Please sign in to comment.