diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/antlr/ANTLRParser.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/antlr/ANTLRParser.scala index 9bafa234f..5f6b832a9 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/antlr/ANTLRParser.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/antlr/ANTLRParser.scala @@ -29,13 +29,14 @@ import za.co.absa.cobrix.cobol.parser.policies.StringTrimmingPolicy.StringTrimmi import java.nio.charset.Charset -class ThrowErrorStrategy() extends DefaultErrorStrategy { +class ThrowErrorStrategy(posAdjustment: Int) extends DefaultErrorStrategy { override def recover(recognizer: Parser, e: RecognitionException): Unit = { throw new SyntaxErrorException( e.getOffendingToken.getLine, - "", + Option(e.getOffendingToken.getCharPositionInLine + posAdjustment), + None, "Invalid input " + getTokenErrorDisplay(e.getOffendingToken) + " at position " + e.getOffendingToken.getLine - + ":" + (e.getOffendingToken.getCharPositionInLine + 6) + + ":" + (e.getOffendingToken.getCharPositionInLine + posAdjustment) ) } @@ -65,8 +66,9 @@ object ANTLRParser extends Logging { isUtf16BigEndian: Boolean, floatingPointFormat: FloatingPointFormat, fieldCodePageMap: Map[String, String]): CopybookAST = { - val visitor = new ParserVisitor(enc, stringTrimmingPolicy, isDisplayAlwaysString, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat, strictSignOverpunch, improvedNullDetection, strictIntegralPrecision, decodeBinaryAsHex, fieldCodePageMap) + val visitor = new ParserVisitor(enc, stringTrimmingPolicy, commentPolicy, isDisplayAlwaysString, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat, strictSignOverpunch, improvedNullDetection, strictIntegralPrecision, decodeBinaryAsHex, fieldCodePageMap) + val adjPos = if (commentPolicy.truncateComments) commentPolicy.commentsUpToChar else 0 val strippedContents = filterSpecialCharacters(copyBookContents).split("\\r?\\n").map( line => truncateComments(line, commentPolicy) @@ -81,7 +83,7 @@ object ANTLRParser extends Logging { val parser = new copybookParser(tokens) parser.removeErrorListeners() parser.addErrorListener(new LogErrorListener(logger)) - parser.setErrorHandler(new ThrowErrorStrategy()) + parser.setErrorHandler(new ThrowErrorStrategy(adjPos)) visitor.visitMain(parser.main()) visitor.ast diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/antlr/ParserVisitor.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/antlr/ParserVisitor.scala index c1f615246..063c7efe3 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/antlr/ParserVisitor.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/antlr/ParserVisitor.scala @@ -28,6 +28,7 @@ import za.co.absa.cobrix.cobol.parser.decoders.FloatingPointFormat.FloatingPoint import za.co.absa.cobrix.cobol.parser.encoding.codepage.CodePage import za.co.absa.cobrix.cobol.parser.encoding._ import za.co.absa.cobrix.cobol.parser.exceptions.SyntaxErrorException +import za.co.absa.cobrix.cobol.parser.policies.CommentPolicy import za.co.absa.cobrix.cobol.parser.policies.StringTrimmingPolicy.StringTrimmingPolicy import za.co.absa.cobrix.cobol.parser.position.{Left, Position, Right} @@ -41,6 +42,7 @@ sealed trait Expr class ParserVisitor(enc: Encoding, stringTrimmingPolicy: StringTrimmingPolicy, + commentPolicy: CommentPolicy, isDisplayAlwaysString: Boolean, ebcdicCodePage: CodePage, asciiCharset: Charset, @@ -158,11 +160,11 @@ class ParserVisitor(enc: Encoding, pic.value match { case dec: Decimal => if (dec.compact.isDefined && !dec.compact.contains(usageVal)) - throw new SyntaxErrorException(ctx.start.getLine, "", s"Field USAGE (${dec.compact.get}) doesn't match group's USAGE ($usageVal).") + throw new SyntaxErrorException(ctx.start.getLine, Option(ctx.start.getCharPositionInLine), None, s"Field USAGE (${dec.compact.get}) doesn't match group's USAGE ($usageVal).") dec.copy(compact=usage) case int: Integral => if (int.compact.isDefined && !int.compact.contains(usageVal)) - throw new SyntaxErrorException(ctx.start.getLine, "", s"Field USAGE (${int.compact.get}) doesn't match group's USAGE ($usageVal).") + throw new SyntaxErrorException(ctx.start.getLine, Option(ctx.start.getCharPositionInLine), None, s"Field USAGE (${int.compact.get}) doesn't match group's USAGE ($usageVal).") int.copy(compact=usage) case x: AlphaNumeric if usageVal == COMP3U() => Integral(x.pic, x.length*2, None, false, None, Some(COMP3U()), None, x.originalPic) @@ -170,7 +172,7 @@ class ParserVisitor(enc: Encoding, val enc = if (decodeBinaryAsHex) HEX else RAW x.copy(compact=usage, enc=Some(enc)) case x: AlphaNumeric => - throw new SyntaxErrorException(ctx.start.getLine, "", s"Field USAGE $usageVal is not supported with this PIC: ${x.pic}. The field should be numeric.") + throw new SyntaxErrorException(ctx.start.getLine, Option(ctx.start.getCharPositionInLine), None, s"Field USAGE $usageVal is not supported with this PIC: ${x.pic}. The field should be numeric.") } ) } @@ -226,7 +228,7 @@ class ParserVisitor(enc: Encoding, case None => addLevel(section) case Some(s) if s > section => addLevel(section) case _ => - throw new SyntaxErrorException(levels.top.el.children.last.lineNumber, levels.top.el.children.last.name, + throw new SyntaxErrorException(levels.top.el.children.last.lineNumber, None, Option(levels.top.el.children.last.name), s"The field is a leaf element and cannot contain nested fields.") } @@ -556,35 +558,37 @@ class ParserVisitor(enc: Encoding, } def checkBounds(ctx: ParserRuleContext, expr: PicExpr): PicExpr = { + val adjustPos = if (commentPolicy.truncateComments) commentPolicy.commentsUpToChar + 1 else 1 + val pos = Option(ctx.stop.getCharPositionInLine + adjustPos) expr.value match { case x: Decimal => if (x.isSignSeparate && x.compact.isDefined) - throw new SyntaxErrorException(ctx.start.getLine, getIdentifier(ctx.parent), + throw new SyntaxErrorException(ctx.start.getLine, None, Option(getIdentifier(ctx.parent)), s"SIGN SEPARATE clause is not supported for ${x.compact.get}. It is only supported for DISPLAY formatted fields.") if(x.scale > Constants.maxDecimalScale) - throw new SyntaxErrorException(ctx.start.getLine, getIdentifier(ctx.parent), + throw new SyntaxErrorException(ctx.start.getLine, pos, Option(getIdentifier(ctx.parent)), s"Decimal numbers with scale bigger than ${Constants.maxDecimalScale} are not supported.") if(x.precision > Constants.maxDecimalPrecision) - throw new SyntaxErrorException(ctx.start.getLine, getIdentifier(ctx.parent), + throw new SyntaxErrorException(ctx.start.getLine, pos, Option(getIdentifier(ctx.parent)), s"Decimal numbers with precision bigger than ${Constants.maxDecimalPrecision} are not supported.") if (x.compact.isDefined && x.explicitDecimal) - throw new SyntaxErrorException(ctx.start.getLine, getIdentifier(ctx.parent), + throw new SyntaxErrorException(ctx.start.getLine, pos, Option(getIdentifier(ctx.parent)), s"Explicit decimal point in 'PIC ${expr.value.originalPic.get}' is not supported for ${x.compact.get}. It is only supported for DISPLAY formatted fields.") case x: Integral => if (x.isSignSeparate && x.compact.isDefined) { - throw new SyntaxErrorException(ctx.start.getLine, getIdentifier(ctx.parent), + throw new SyntaxErrorException(ctx.start.getLine, None, Option(getIdentifier(ctx.parent)), s"SIGN SEPARATE clause is not supported for ${x.compact.get}. It is only supported for DISPLAY formatted fields.") } if (x.precision > Constants.maxBinIntPrecision && x.compact.contains(COMP4())) { - throw new SyntaxErrorException(ctx.start.getLine, getIdentifier(ctx.parent), + throw new SyntaxErrorException(ctx.start.getLine, pos, Option(getIdentifier(ctx.parent)), s"BINARY-encoded integers with precision bigger than ${Constants.maxBinIntPrecision} are not supported.") } if (x.precision < 1 || x.precision >= Constants.maxFieldLength) - throw new SyntaxErrorException(ctx.start.getLine, getIdentifier(ctx.parent), + throw new SyntaxErrorException(ctx.start.getLine, pos, Option(getIdentifier(ctx.parent)), s"Incorrect field size of ${x.precision} for PIC ${expr.value.originalPic.get}. Supported size is in range from 1 to ${Constants.maxFieldLength}.") case x: AlphaNumeric => if (x.length < 1 || x.length >= Constants.maxFieldLength) - throw new SyntaxErrorException(ctx.start.getLine, getIdentifier(ctx.parent), + throw new SyntaxErrorException(ctx.start.getLine, pos, Option(getIdentifier(ctx.parent)), s"Incorrect field size of ${x.length} for PIC ${expr.value.originalPic.get}. Supported size is in range from 1 to ${Constants.maxFieldLength}.") } expr diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/asttransform/BinaryPropertiesAdder.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/asttransform/BinaryPropertiesAdder.scala index aa79dfcc8..a47f6d152 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/asttransform/BinaryPropertiesAdder.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/asttransform/BinaryPropertiesAdder.scala @@ -53,10 +53,10 @@ class BinaryPropertiesAdder extends AstTransformer { redefinedNames.clear() case Some(redefines) => if (i == 0) { - throw new SyntaxErrorException(child.lineNumber, child.name, s"The first field of a group cannot use REDEFINES keyword.") + throw new SyntaxErrorException(child.lineNumber, None, Option(child.name), s"The first field of a group cannot use REDEFINES keyword.") } if (!redefinedNames.contains(redefines.toUpperCase)) { - throw new SyntaxErrorException(child.lineNumber, child.name, s"The field ${child.name} redefines $redefines, which is not part if the redefined fields block.") + throw new SyntaxErrorException(child.lineNumber, None, Option(child.name), s"The field ${child.name} redefines $redefines, which is not part of the redefined fields block.") } newChildren(i - 1) = newChildren(i - 1).withUpdatedIsRedefined(newIsRedefined = true) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage037.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage037.scala index 7da09113c..99c363461 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage037.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage037.scala @@ -20,7 +20,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage * EBCDIC code page 37 contains all of the standard Latin-1 characters. * */ -class CodePage037 extends SingleByteCodePage(CodePage037.ebcdicToAsciiMapping) { +class CodePage037 extends SingleByteCodePage(CodePage037.ebcdicToAsciiMapping, CodePage037.asciiToEbcdicMapping) { override def codePageShortName: String = "cp037" } @@ -60,4 +60,6 @@ object CodePage037 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage037Ext.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage037Ext.scala index 79d690268..1d44b8b0c 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage037Ext.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage037Ext.scala @@ -21,7 +21,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage * * In addition to "cp037" code page it contains conversions for non-printable characters. */ -class CodePage037Ext extends SingleByteCodePage(CodePage037Ext.ebcdicToAsciiMapping) { +class CodePage037Ext extends SingleByteCodePage(CodePage037Ext.ebcdicToAsciiMapping, CodePage037Ext.asciiToEbcdicMapping) { override def codePageShortName: String = "cp037_extended" } @@ -53,4 +53,6 @@ object CodePage037Ext { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1025.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1025.scala index 5becb3ab7..b255c533f 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1025.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1025.scala @@ -19,7 +19,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage /** * EBCDIC code page with full Cyrillic-charset */ -class CodePage1025 extends SingleByteCodePage(CodePage1025.ebcdicToAsciiMapping) { +class CodePage1025 extends SingleByteCodePage(CodePage1025.ebcdicToAsciiMapping, CodePage1025.asciiToEbcdicMapping) { override def codePageShortName: String = "cp1025" } @@ -56,4 +56,6 @@ object CodePage1025 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1047.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1047.scala index d118c7ccf..c9ec7d987 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1047.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1047.scala @@ -20,7 +20,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage * EBCDIC code page 1047 contains all of the Latin-1/Open System characters. * */ -class CodePage1047 extends SingleByteCodePage(CodePage1047.ebcdicToAsciiMapping) { +class CodePage1047 extends SingleByteCodePage(CodePage1047.ebcdicToAsciiMapping, CodePage1047.asciiToEbcdicMapping) { override def codePageShortName: String = "cp1047" } @@ -55,4 +55,6 @@ object CodePage1047 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1140.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1140.scala index 5a09a0509..7ceb1974f 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1140.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1140.scala @@ -23,7 +23,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage * It corresponds to code page 037 and only differs from it in position 9F, where the euro sign € is located instead * of the international currency symbol ¤. */ -class CodePage1140 extends SingleByteCodePage(CodePage1140.ebcdicToAsciiMapping) { +class CodePage1140 extends SingleByteCodePage(CodePage1140.ebcdicToAsciiMapping, CodePage1140.asciiToEbcdicMapping) { override def codePageShortName: String = "cp1140" } @@ -55,4 +55,6 @@ object CodePage1140 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1141.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1141.scala index e58f5c03d..1f62489b5 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1141.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1141.scala @@ -23,7 +23,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage * It corresponds to code page 273 and only differs from it in position 9F, where the euro sign € is located instead * of the international currency symbol ¤. */ -class CodePage1141 extends SingleByteCodePage(CodePage1141.ebcdicToAsciiMapping) { +class CodePage1141 extends SingleByteCodePage(CodePage1141.ebcdicToAsciiMapping, CodePage1141.asciiToEbcdicMapping) { override def codePageShortName: String = "cp1141" } @@ -55,4 +55,6 @@ object CodePage1141 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1142.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1142.scala index f3c449771..09c69b2f8 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1142.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1142.scala @@ -22,7 +22,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage * It corresponds to code page 277 and only differs from it in position 5A, where the euro sign € is located instead * of the international currency symbol ¤. */ -class CodePage1142 extends SingleByteCodePage(CodePage1142.ebcdicToAsciiMapping) { +class CodePage1142 extends SingleByteCodePage(CodePage1142.ebcdicToAsciiMapping, CodePage1142.asciiToEbcdicMapping) { override def codePageShortName: String = "cp1142" } @@ -54,4 +54,6 @@ object CodePage1142 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1143.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1143.scala index 27b92ee29..c82fe04bc 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1143.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1143.scala @@ -22,7 +22,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage * It corresponds to code page 278 and only differs from it in position 5A, where the euro sign € is located instead * of the international currency symbol ¤. */ -class CodePage1143 extends SingleByteCodePage(CodePage1143.ebcdicToAsciiMapping) { +class CodePage1143 extends SingleByteCodePage(CodePage1143.ebcdicToAsciiMapping, CodePage1143.asciiToEbcdicMapping) { override def codePageShortName: String = "cp1143" } @@ -54,4 +54,6 @@ object CodePage1143 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1144.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1144.scala index 01e2ee83a..f84b2a36b 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1144.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1144.scala @@ -22,7 +22,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage * It corresponds to code page 280 and only differs from it in position 9F, where the euro sign € is located instead * of the international currency symbol ¤. */ -class CodePage1144 extends SingleByteCodePage(CodePage1144.ebcdicToAsciiMapping, Some(CodePage1144.asciiToEbcdicMapping)) { +class CodePage1144 extends SingleByteCodePage(CodePage1144.ebcdicToAsciiMapping, CodePage1144.asciiToEbcdicMapping) { override def codePageShortName: String = "cp1144" } @@ -53,268 +53,7 @@ object CodePage1144 { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '³', 'Û', 'Ü', 'Ù', 'Ú', spc) // 240 - 255 } ebcdic2ascii - } - /** - * To generate conversion mapping use the python script shared in the PR. - */ - val asciiToEbcdicMapping: Int => Byte = { - case 0 => 0x00.toByte - case 1 => 0x01.toByte - case 2 => 0x02.toByte - case 3 => 0x03.toByte - case 156 => 0x04.toByte - case 9 => 0x05.toByte - case 134 => 0x06.toByte - case 127 => 0x07.toByte - case 151 => 0x08.toByte - case 141 => 0x09.toByte - case 142 => 0x0a.toByte - case 11 => 0x0b.toByte - case 12 => 0x0c.toByte - case 13 => 0x0d.toByte - case 14 => 0x0e.toByte - case 15 => 0x0f.toByte - case 16 => 0x10.toByte - case 17 => 0x11.toByte - case 18 => 0x12.toByte - case 19 => 0x13.toByte - case 157 => 0x14.toByte - case 8 => 0x16.toByte - case 135 => 0x17.toByte - case 24 => 0x18.toByte - case 25 => 0x19.toByte - case 146 => 0x1a.toByte - case 143 => 0x1b.toByte - case 28 => 0x1c.toByte - case 29 => 0x1d.toByte - case 30 => 0x1e.toByte - case 31 => 0x1f.toByte - case 128 => 0x20.toByte - case 129 => 0x21.toByte - case 130 => 0x22.toByte - case 131 => 0x23.toByte - case 132 => 0x24.toByte - case 10 => 0x25.toByte //NL and LF EBCDIC representation map to LF in Unicode. Choosing to always map LF to LF - case 23 => 0x26.toByte - case 27 => 0x27.toByte - case 136 => 0x28.toByte - case 137 => 0x29.toByte - case 138 => 0x2a.toByte - case 139 => 0x2b.toByte - case 140 => 0x2c.toByte - case 5 => 0x2d.toByte - case 6 => 0x2e.toByte - case 7 => 0x2f.toByte - case 144 => 0x30.toByte - case 145 => 0x31.toByte - case 22 => 0x32.toByte - case 147 => 0x33.toByte - case 148 => 0x34.toByte - case 149 => 0x35.toByte - case 150 => 0x36.toByte - case 4 => 0x37.toByte - case 152 => 0x38.toByte - case 153 => 0x39.toByte - case 154 => 0x3a.toByte - case 155 => 0x3b.toByte - case 20 => 0x3c.toByte - case 21 => 0x3d.toByte - case 158 => 0x3e.toByte - case 26 => 0x3f.toByte - case 32 => 0x40.toByte - case 160 => 0x41.toByte - case 226 => 0x42.toByte - case 228 => 0x43.toByte - case 123 => 0x44.toByte - case 225 => 0x45.toByte - case 227 => 0x46.toByte - case 229 => 0x47.toByte - case 92 => 0x48.toByte - case 241 => 0x49.toByte - case 176 => 0x4a.toByte - case 46 => 0x4b.toByte - case 60 => 0x4c.toByte - case 40 => 0x4d.toByte - case 43 => 0x4e.toByte - case 33 => 0x4f.toByte - case 38 => 0x50.toByte - case 93 => 0x51.toByte - case 234 => 0x52.toByte - case 235 => 0x53.toByte - case 125 => 0x54.toByte - case 237 => 0x55.toByte - case 238 => 0x56.toByte - case 239 => 0x57.toByte - case 126 => 0x58.toByte - case 223 => 0x59.toByte - case 233 => 0x5a.toByte - case 36 => 0x5b.toByte - case 42 => 0x5c.toByte - case 41 => 0x5d.toByte - case 59 => 0x5e.toByte - case 94 => 0x5f.toByte - case 45 => 0x60.toByte - case 47 => 0x61.toByte - case 194 => 0x62.toByte - case 196 => 0x63.toByte - case 192 => 0x64.toByte - case 193 => 0x65.toByte - case 195 => 0x66.toByte - case 197 => 0x67.toByte - case 199 => 0x68.toByte - case 209 => 0x69.toByte - case 242 => 0x6a.toByte - case 44 => 0x6b.toByte - case 37 => 0x6c.toByte - case 95 => 0x6d.toByte - case 62 => 0x6e.toByte - case 63 => 0x6f.toByte - case 248 => 0x70.toByte - case 201 => 0x71.toByte - case 202 => 0x72.toByte - case 203 => 0x73.toByte - case 200 => 0x74.toByte - case 205 => 0x75.toByte - case 206 => 0x76.toByte - case 207 => 0x77.toByte - case 204 => 0x78.toByte - case 249 => 0x79.toByte - case 58 => 0x7a.toByte - case 163 => 0x7b.toByte - case 167 => 0x7c.toByte - case 39 => 0x7d.toByte - case 61 => 0x7e.toByte - case 34 => 0x7f.toByte - case 216 => 0x80.toByte - case 97 => 0x81.toByte - case 98 => 0x82.toByte - case 99 => 0x83.toByte - case 100 => 0x84.toByte - case 101 => 0x85.toByte - case 102 => 0x86.toByte - case 103 => 0x87.toByte - case 104 => 0x88.toByte - case 105 => 0x89.toByte - case 171 => 0x8a.toByte - case 187 => 0x8b.toByte - case 240 => 0x8c.toByte - case 253 => 0x8d.toByte - case 254 => 0x8e.toByte - case 177 => 0x8f.toByte - case 91 => 0x90.toByte - case 106 => 0x91.toByte - case 107 => 0x92.toByte - case 108 => 0x93.toByte - case 109 => 0x94.toByte - case 110 => 0x95.toByte - case 111 => 0x96.toByte - case 112 => 0x97.toByte - case 113 => 0x98.toByte - case 114 => 0x99.toByte - case 170 => 0x9a.toByte - case 186 => 0x9b.toByte - case 230 => 0x9c.toByte - case 184 => 0x9d.toByte - case 198 => 0x9e.toByte - case 8364 => 0x9f.toByte - case 181 => 0xa0.toByte - case 236 => 0xa1.toByte - case 115 => 0xa2.toByte - case 116 => 0xa3.toByte - case 117 => 0xa4.toByte - case 118 => 0xa5.toByte - case 119 => 0xa6.toByte - case 120 => 0xa7.toByte - case 121 => 0xa8.toByte - case 122 => 0xa9.toByte - case 161 => 0xaa.toByte - case 191 => 0xab.toByte - case 208 => 0xac.toByte - case 221 => 0xad.toByte - case 222 => 0xae.toByte - case 174 => 0xaf.toByte - case 162 => 0xb0.toByte - case 35 => 0xb1.toByte - case 165 => 0xb2.toByte - case 183 => 0xb3.toByte - case 169 => 0xb4.toByte - case 64 => 0xb5.toByte - case 182 => 0xb6.toByte - case 188 => 0xb7.toByte - case 189 => 0xb8.toByte - case 190 => 0xb9.toByte - case 172 => 0xba.toByte - case 124 => 0xbb.toByte - case 175 => 0xbc.toByte - case 168 => 0xbd.toByte - case 180 => 0xbe.toByte - case 215 => 0xbf.toByte - case 224 => 0xc0.toByte - case 65 => 0xc1.toByte - case 66 => 0xc2.toByte - case 67 => 0xc3.toByte - case 68 => 0xc4.toByte - case 69 => 0xc5.toByte - case 70 => 0xc6.toByte - case 71 => 0xc7.toByte - case 72 => 0xc8.toByte - case 73 => 0xc9.toByte - case 173 => 0xca.toByte - case 244 => 0xcb.toByte - case 246 => 0xcc.toByte - case 166 => 0xcd.toByte - case 243 => 0xce.toByte - case 245 => 0xcf.toByte - case 232 => 0xd0.toByte - case 74 => 0xd1.toByte - case 75 => 0xd2.toByte - case 76 => 0xd3.toByte - case 77 => 0xd4.toByte - case 78 => 0xd5.toByte - case 79 => 0xd6.toByte - case 80 => 0xd7.toByte - case 81 => 0xd8.toByte - case 82 => 0xd9.toByte - case 185 => 0xda.toByte - case 251 => 0xdb.toByte - case 252 => 0xdc.toByte - case 96 => 0xdd.toByte - case 250 => 0xde.toByte - case 255 => 0xdf.toByte - case 231 => 0xe0.toByte - case 247 => 0xe1.toByte - case 83 => 0xe2.toByte - case 84 => 0xe3.toByte - case 85 => 0xe4.toByte - case 86 => 0xe5.toByte - case 87 => 0xe6.toByte - case 88 => 0xe7.toByte - case 89 => 0xe8.toByte - case 90 => 0xe9.toByte - case 178 => 0xea.toByte - case 212 => 0xeb.toByte - case 214 => 0xec.toByte - case 210 => 0xed.toByte - case 211 => 0xee.toByte - case 213 => 0xef.toByte - case 48 => 0xf0.toByte - case 49 => 0xf1.toByte - case 50 => 0xf2.toByte - case 51 => 0xf3.toByte - case 52 => 0xf4.toByte - case 53 => 0xf5.toByte - case 54 => 0xf6.toByte - case 55 => 0xf7.toByte - case 56 => 0xf8.toByte - case 57 => 0xf9.toByte - case 179 => 0xfa.toByte - case 219 => 0xfb.toByte - case 220 => 0xfc.toByte - case 217 => 0xfd.toByte - case 218 => 0xfe.toByte - case 159 => 0xff.toByte - case _ => 0x40.toByte // defaults to space if mapping not available. - } + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1145.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1145.scala index 36568bc36..baf986902 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1145.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1145.scala @@ -20,7 +20,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage * EBCDIC code page 1145 is used to represent characters of Spain and Latin America * with € at the position of the international currency symbol ¤. */ -class CodePage1145 extends SingleByteCodePage(CodePage1145.ebcdicToAsciiMapping) { +class CodePage1145 extends SingleByteCodePage(CodePage1145.ebcdicToAsciiMapping, CodePage1145.asciiToEbcdicMapping) { override def codePageShortName: String = "cp1145" } @@ -52,4 +52,6 @@ object CodePage1145 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1146.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1146.scala index 0dce6b10a..4d4a8e710 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1146.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1146.scala @@ -20,7 +20,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage * EBCDIC code page 1146 is used to represent characters of the United Kingdom * with € at the position of the international currency symbol ¤. */ -class CodePage1146 extends SingleByteCodePage(CodePage1146.ebcdicToAsciiMapping) { +class CodePage1146 extends SingleByteCodePage(CodePage1146.ebcdicToAsciiMapping, CodePage1146.asciiToEbcdicMapping) { override def codePageShortName: String = "cp1146" } @@ -52,4 +52,6 @@ object CodePage1146 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1147.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1147.scala index 3e3d33c0c..848d19405 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1147.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1147.scala @@ -20,7 +20,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage * EBCDIC code page 1147 of France is the same as 297. * with € at the position of the international currency symbol ¤. */ -class CodePage1147 extends SingleByteCodePage(CodePage1147.ebcdicToAsciiMapping) { +class CodePage1147 extends SingleByteCodePage(CodePage1147.ebcdicToAsciiMapping, CodePage1147.asciiToEbcdicMapping) { override def codePageShortName: String = "cp1147" } @@ -52,4 +52,6 @@ object CodePage1147 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1148.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1148.scala index f38295d3e..dc29b81b2 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1148.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1148.scala @@ -23,7 +23,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage * It corresponds to code page 500 and only differs from it in position 9F, where the euro sign € is located instead * of the international currency symbol ¤. */ -class CodePage1148 extends SingleByteCodePage(CodePage1148.ebcdicToAsciiMapping) { +class CodePage1148 extends SingleByteCodePage(CodePage1148.ebcdicToAsciiMapping, CodePage1148.asciiToEbcdicMapping) { override def codePageShortName: String = "cp1148" } @@ -55,4 +55,6 @@ object CodePage1148 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1160.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1160.scala index 19e38cd14..fddbc4d96 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1160.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage1160.scala @@ -20,7 +20,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage * EBCDIC code page 1160 with support for Thai script used in IBM mainframes which is same as 838 * with € at the position 0xFE. */ -class CodePage1160 extends SingleByteCodePage(CodePage1160.ebcdicToAsciiMapping) { +class CodePage1160 extends SingleByteCodePage(CodePage1160.ebcdicToAsciiMapping, CodePage1160.asciiToEbcdicMapping) { override def codePageShortName: String = "cp1160" } @@ -69,4 +69,6 @@ object CodePage1160 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage273.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage273.scala index 95b9e7eb9..a83cdcc95 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage273.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage273.scala @@ -21,7 +21,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage * * Er wird zur Darstellung des Zeichensatzes der deutschen Sprache verwendet. */ -class CodePage273 extends SingleByteCodePage(CodePage273.ebcdicToAsciiMapping) { +class CodePage273 extends SingleByteCodePage(CodePage273.ebcdicToAsciiMapping, CodePage273.asciiToEbcdicMapping) { override def codePageShortName: String = "cp273" } @@ -53,4 +53,6 @@ object CodePage273 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage274.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage274.scala index 8cfc9706c..a24401ee0 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage274.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage274.scala @@ -19,7 +19,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage /** * EBCDIC code page 274. Belgium. */ -class CodePage274 extends SingleByteCodePage(CodePage274.ebcdicToAsciiMapping) { +class CodePage274 extends SingleByteCodePage(CodePage274.ebcdicToAsciiMapping, CodePage274.asciiToEbcdicMapping) { override def codePageShortName: String = "cp274" } @@ -51,4 +51,6 @@ object CodePage274 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage275.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage275.scala index cf834fd81..60c3a0b55 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage275.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage275.scala @@ -19,7 +19,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage /** * EBCDIC code page 277 is used to represent characters of Brazil. */ -class CodePage275 extends SingleByteCodePage(CodePage275.ebcdicToAsciiMapping) { +class CodePage275 extends SingleByteCodePage(CodePage275.ebcdicToAsciiMapping, CodePage275.asciiToEbcdicMapping) { override def codePageShortName: String = "cp275" } @@ -51,4 +51,6 @@ object CodePage275 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage277.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage277.scala index d8c3c0bbd..a7428399e 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage277.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage277.scala @@ -19,7 +19,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage /** * EBCDIC code page 277 is used to represent characters of Denmark and Norway. */ -class CodePage277 extends SingleByteCodePage(CodePage277.ebcdicToAsciiMapping) { +class CodePage277 extends SingleByteCodePage(CodePage277.ebcdicToAsciiMapping, CodePage277.asciiToEbcdicMapping) { override def codePageShortName: String = "cp277" } @@ -51,4 +51,6 @@ object CodePage277 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage278.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage278.scala index f2700a64c..b411a76c0 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage278.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage278.scala @@ -19,7 +19,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage /** * EBCDIC code page 278 is used to represent characters of Finland and Sweden. */ -class CodePage278 extends SingleByteCodePage(CodePage278.ebcdicToAsciiMapping) { +class CodePage278 extends SingleByteCodePage(CodePage278.ebcdicToAsciiMapping, CodePage278.asciiToEbcdicMapping) { override def codePageShortName: String = "cp278" } @@ -51,4 +51,6 @@ object CodePage278 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage280.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage280.scala index 1746297b6..9beb8365b 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage280.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage280.scala @@ -19,7 +19,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage /** * EBCDIC code page 280 is used to represent characters of Italy. */ -class CodePage280 extends SingleByteCodePage(CodePage280.ebcdicToAsciiMapping) { +class CodePage280 extends SingleByteCodePage(CodePage280.ebcdicToAsciiMapping, CodePage280.asciiToEbcdicMapping) { override def codePageShortName: String = "cp280" } @@ -51,4 +51,6 @@ object CodePage280 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage284.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage284.scala index da040eacb..024d15719 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage284.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage284.scala @@ -19,7 +19,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage /** * EBCDIC code page 284 is used to represent characters of Spain and Latin America. */ -class CodePage284 extends SingleByteCodePage(CodePage284.ebcdicToAsciiMapping) { +class CodePage284 extends SingleByteCodePage(CodePage284.ebcdicToAsciiMapping, CodePage284.asciiToEbcdicMapping) { override def codePageShortName: String = "cp284" } @@ -51,4 +51,6 @@ object CodePage284 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage285.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage285.scala index b7124a850..2b5da31ad 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage285.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage285.scala @@ -19,7 +19,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage /** * EBCDIC code page 285 is used to represent characters of the United Kingdom. */ -class CodePage285 extends SingleByteCodePage(CodePage285.ebcdicToAsciiMapping) { +class CodePage285 extends SingleByteCodePage(CodePage285.ebcdicToAsciiMapping, CodePage285.asciiToEbcdicMapping) { override def codePageShortName: String = "cp285" } @@ -51,4 +51,6 @@ object CodePage285 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage297.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage297.scala index 10f0bfbb1..dece1bfe3 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage297.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage297.scala @@ -19,7 +19,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage /** * EBCDIC code page 285 is used to represent characters of France. */ -class CodePage297 extends SingleByteCodePage(CodePage297.ebcdicToAsciiMapping) { +class CodePage297 extends SingleByteCodePage(CodePage297.ebcdicToAsciiMapping, CodePage297.asciiToEbcdicMapping) { override def codePageShortName: String = "cp297" } @@ -51,4 +51,6 @@ object CodePage297 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage500.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage500.scala index 5b6f94a8b..52edbc511 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage500.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage500.scala @@ -19,7 +19,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage /** * EBCDIC code page 500. Belgium, Canada, Switzerland, International. */ -class CodePage500 extends SingleByteCodePage(CodePage500.ebcdicToAsciiMapping) { +class CodePage500 extends SingleByteCodePage(CodePage500.ebcdicToAsciiMapping, CodePage500.asciiToEbcdicMapping) { override def codePageShortName: String = "cp500" } @@ -51,4 +51,6 @@ object CodePage500 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage838.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage838.scala index 9bfd5cfb3..c735d371a 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage838.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage838.scala @@ -19,7 +19,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage /** * EBCDIC code page with support for Thai script used in IBM mainframes */ -class CodePage838 extends SingleByteCodePage(CodePage838.ebcdicToAsciiMapping) { +class CodePage838 extends SingleByteCodePage(CodePage838.ebcdicToAsciiMapping, CodePage838.asciiToEbcdicMapping) { override def codePageShortName: String = "cp838" } @@ -69,4 +69,6 @@ object CodePage838 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage870.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage870.scala index 920965fa7..ad8d6a432 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage870.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage870.scala @@ -20,7 +20,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage * EBCDIC code page with full Latin-2-charset used in IBM mainframes * in Albania, Bosnia and Herzegovina, Croatia, Czech Republic, Hungary, Poland, Romania, Slovakia, and Slovenia */ -class CodePage870 extends SingleByteCodePage(CodePage870.ebcdicToAsciiMapping) { +class CodePage870 extends SingleByteCodePage(CodePage870.ebcdicToAsciiMapping, CodePage870.asciiToEbcdicMapping) { override def codePageShortName: String = "cp870" } @@ -57,4 +57,6 @@ object CodePage870 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage875.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage875.scala index 916cf073b..15ae5fa98 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage875.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePage875.scala @@ -20,7 +20,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage * EBCDIC code page 875 contains all of the Greek characters. * */ -class CodePage875 extends SingleByteCodePage(CodePage875.ebcdicToAsciiMapping) { +class CodePage875 extends SingleByteCodePage(CodePage875.ebcdicToAsciiMapping, CodePage875.asciiToEbcdicMapping) { override def codePageShortName: String = "cp875" } @@ -55,4 +55,6 @@ object CodePage875 { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePageCommon.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePageCommon.scala index a8854bd8a..e5883ea38 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePageCommon.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePageCommon.scala @@ -21,7 +21,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage * * It is an "invariant" subset of EBCDIC. Each converted symbol should be present in all EBCDIC pages. */ -class CodePageCommon extends SingleByteCodePage(CodePageCommon.ebcdicToAsciiMapping, Some(CodePageCommon.asciiToEbcdicMapping)) { +class CodePageCommon extends SingleByteCodePage(CodePageCommon.ebcdicToAsciiMapping, CodePageCommon.asciiToEbcdicMapping) { override def codePageShortName: String = "common" } @@ -56,28 +56,5 @@ object CodePageCommon { ebcdic2ascii } - private val data = Array[Byte]( - 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x0D.toByte, 0x00.toByte, 0x00.toByte, 0x25.toByte, 0x00.toByte, 0x00.toByte, // 0 - 15 - 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 16 - 31 - 0x40.toByte, 0x5A.toByte, 0x7F.toByte, 0x7B.toByte, 0x5B.toByte, 0x6C.toByte, 0x50.toByte, 0x7D.toByte, 0x4D.toByte, 0x5D.toByte, 0x5C.toByte, 0x4E.toByte, 0x6B.toByte, 0x60.toByte, 0x4B.toByte, 0x61.toByte, // 32 - 47 - 0xF0.toByte, 0xF1.toByte, 0xF2.toByte, 0xF3.toByte, 0xF4.toByte, 0xF5.toByte, 0xF6.toByte, 0xF7.toByte, 0xF8.toByte, 0xF9.toByte, 0x7A.toByte, 0x5E.toByte, 0x4C.toByte, 0x7E.toByte, 0x6E.toByte, 0x6F.toByte, // 48 - 63 - 0x7C.toByte, 0xC1.toByte, 0xC2.toByte, 0xC3.toByte, 0xC4.toByte, 0xC5.toByte, 0xC6.toByte, 0xC7.toByte, 0xC8.toByte, 0xC9.toByte, 0xD1.toByte, 0xD2.toByte, 0xD3.toByte, 0xD4.toByte, 0xD5.toByte, 0xD6.toByte, // 64 - 79 - 0xD7.toByte, 0xD8.toByte, 0xD9.toByte, 0xE2.toByte, 0xE3.toByte, 0xE4.toByte, 0xE5.toByte, 0xE6.toByte, 0xE7.toByte, 0xE8.toByte, 0xE9.toByte, 0xBA.toByte, 0xE0.toByte, 0xBB.toByte, 0xB0.toByte, 0x6D.toByte, // 80 - 95 - 0x79.toByte, 0x81.toByte, 0x82.toByte, 0x83.toByte, 0x84.toByte, 0x85.toByte, 0x86.toByte, 0x87.toByte, 0x88.toByte, 0x89.toByte, 0x91.toByte, 0x92.toByte, 0x93.toByte, 0x94.toByte, 0x95.toByte, 0x96.toByte, // 96 - 111 - 0x97.toByte, 0x98.toByte, 0x99.toByte, 0xA2.toByte, 0xA3.toByte, 0xA4.toByte, 0xA5.toByte, 0xA6.toByte, 0xA7.toByte, 0xA8.toByte, 0xA9.toByte, 0xC0.toByte, 0x6A.toByte, 0xD0.toByte, 0xA1.toByte, 0x00.toByte, // 112 - 127 - 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 128 - 143 - 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 144 - 159 - 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 160 - 175 - 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 176 - 191 - 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 192 - 207 - 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 208 - 223 - 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, // 224 - 239 - 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte // 240 - 255 - ) - /** - * This is the table for converting basic ASCII symbols to EBCDIC common code page - */ - def asciiToEbcdicMapping: Int => Byte = (y : Int) => { - data.applyOrElse(y, (x : Int) => 0x40.toByte) - } + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePageCommonExt.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePageCommonExt.scala index e8dba1192..18bc2345a 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePageCommonExt.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/CodePageCommonExt.scala @@ -22,7 +22,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage * It is an "invariant" subset of EBCDIC. Each converted symbol should be present in all EBCDIC pages. * In addition to "common" code page it contains conversions for non-printable characters. */ -class CodePageCommonExt extends SingleByteCodePage(CodePageCommonExt.ebcdicToAsciiMapping) { +class CodePageCommonExt extends SingleByteCodePage(CodePageCommonExt.ebcdicToAsciiMapping, CodePageCommonExt.asciiToEbcdicMapping) { override def codePageShortName: String = "common_extended" } @@ -89,4 +89,6 @@ object CodePageCommonExt { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/SingleByteCodePage.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/SingleByteCodePage.scala index d25aa0ef6..97279051b 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/SingleByteCodePage.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/SingleByteCodePage.scala @@ -21,7 +21,7 @@ import java.util /** * The base class for all single-byte EBCDIC decoders. */ -abstract class SingleByteCodePage(ebcdicToAsciiMapping: Array[Char], asciiToEbcdicMapping: Option[Int => Byte]=None) +abstract class SingleByteCodePage(ebcdicToAsciiMapping: Array[Char], asciiToEbcdicMapping: Array[Byte]) extends CodePage { private val ConversionTableElements = 256 private val conversionTable = ebcdicToAsciiMapping @@ -34,7 +34,7 @@ abstract class SingleByteCodePage(ebcdicToAsciiMapping: Array[Char], asciiToEbcd /** * Decodes bytes encoded as single byte EBCDIC code page to string. */ - final def convert(bytes: Array[Byte]): String = { + final override def convert(bytes: Array[Byte]): String = { var i = 0 val buf = new StringBuffer(bytes.length) while (i < bytes.length) { @@ -51,24 +51,46 @@ abstract class SingleByteCodePage(ebcdicToAsciiMapping: Array[Char], asciiToEbcd * @param length The length of the output (in bytes) * @return A string representation of the binary data */ - def convert(string: String, length: Int): Array[Byte] = { + final override def convert(string: String, length: Int): Array[Byte] = { require(length >= 0, s"Field length cannot be negative, got $length") - require(asciiToEbcdicMapping.isDefined, s"Cannot encode strings for Code Page without ASCII to EBCDIC " + - s"mapping ${this.getClass.getSimpleName}") - var i = 0 val buf = new Array[Byte](length) // PIC X fields are space-filled on mainframe. Use EBCDIC space 0x40. util.Arrays.fill(buf, 0x40.toByte) - while (i < string.length && i < length) { - val unicodeCodePoint: Int = string.codePointAt(i) - buf(i) = asciiToEbcdicMapping.get(unicodeCodePoint) - i = i + 1 + val conversionTable = asciiToEbcdicMapping + val maxChar = conversionTable.length - 1 + + var inPos = 0 + var outPos = 0 + while (inPos < string.length && outPos < length) { + val unicodeCodePoint = string.codePointAt(inPos) + if (unicodeCodePoint <= maxChar) { + buf(outPos) = conversionTable(unicodeCodePoint) + } + outPos += 1 + inPos += Character.charCount(unicodeCodePoint) } buf } - override def supportsEncoding: Boolean = asciiToEbcdicMapping.isDefined + override def supportsEncoding: Boolean = true +} + +object SingleByteCodePage { + def getReverseTable(ebcdicToAsciiMapping: Array[Char]): Array[Byte] = { + val maxVal = ebcdicToAsciiMapping.map(_.toInt).max + + val reverseMap = new Array[Byte](maxVal + 1) + + util.Arrays.fill(reverseMap, 0x40.toByte) + + for (i <- ebcdicToAsciiMapping.indices) { + val asciiChar = ebcdicToAsciiMapping(i) + reverseMap(asciiChar.toInt) = i.toByte + } + reverseMap(32) = 0x40.toByte // space character + reverseMap + } } diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/exceptions/SyntaxErrorException.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/exceptions/SyntaxErrorException.scala index 9e6a5f441..d176d4f5a 100644 --- a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/exceptions/SyntaxErrorException.scala +++ b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/exceptions/SyntaxErrorException.scala @@ -16,15 +16,26 @@ package za.co.absa.cobrix.cobol.parser.exceptions -class SyntaxErrorException(val lineNumber: Int, val field: String, val msg: String) - extends Exception(SyntaxErrorException.constructErrorMessage(lineNumber, field, msg)) { +class SyntaxErrorException(val lineNumber: Int, val posOpt: Option[Int], val fieldOpt: Option[String], val msg: String) + extends Exception(SyntaxErrorException.constructErrorMessage(lineNumber, posOpt, fieldOpt, msg)) { } object SyntaxErrorException { - private def constructErrorMessage(lineNumber: Int, field: String, msg: String): String = { - val atLine = if (lineNumber > 0) s" at line $lineNumber" - val atField = if (field.nonEmpty) s", field $field" else "" + private def constructErrorMessage(lineNumber: Int, pos: Option[Int], fieldOpt: Option[String], msg: String): String = { + val atLine = if (lineNumber > 0) { + pos match { + case Some(p) => s" at line $lineNumber:$p" + case None => s" at line $lineNumber" + } + } + else + "" + + val atField = fieldOpt match { + case Some(f) => s", field $f" + case None => "" + } s"Syntax error in the copybook$atLine$atField: $msg" } -} \ No newline at end of file +} diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/decoders/StringDecodersSpec.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/decoders/StringDecodersSpec.scala index 613b12de4..9f12d5cfd 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/decoders/StringDecodersSpec.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/decoders/StringDecodersSpec.scala @@ -83,145 +83,181 @@ class StringDecodersSpec extends AnyWordSpec { "EBCDIC with code pages" should { "decode a CP273 string special characters" in { - val expected = " {Ä!~Ü^[ö§¤ß¢@ä¦ü}Ö\\] " - val bytes = Array(0x40, 0x43, 0x4A, 0x4F, 0x59, 0x5A, 0x5F, 0x63, 0x6A, 0x7C, 0x9F, + val expectedUnicode = " {Ä!~Ü^[ö§¤ß¢@ä¦ü}Ö\\] " + val ebcdicBytes = Array(0x40, 0x43, 0x4A, 0x4F, 0x59, 0x5A, 0x5F, 0x63, 0x6A, 0x7C, 0x9F, 0xA1, 0xB0, 0xB5, 0xC0, 0xCC, 0xD0, 0xDC, 0xE0, 0xEC, 0xFC, 0x40).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage273, improvedNullDetection = false) + val enc = new CodePage273 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP273 string example" in { - val expected = "Victor jagt zwölf Boxkämpfer quer über den großen Sylter Deich" + val expectedUnicode = "Victor jagt zwölf Boxkämpfer quer über den großen Sylter Deich" - val bytes = Array(0xE5, 0x89, 0x83, 0xA3, 0x96, 0x99, 0x40, 0x91, 0x81, 0x87, 0xA3, 0x40, 0xA9, 0xA6, + val ebcdicBytes = Array(0xE5, 0x89, 0x83, 0xA3, 0x96, 0x99, 0x40, 0x91, 0x81, 0x87, 0xA3, 0x40, 0xA9, 0xA6, 0x6A, 0x93, 0x86, 0x40, 0xC2, 0x96, 0xA7, 0x92, 0xC0, 0x94, 0x97, 0x86, 0x85, 0x99, 0x40, 0x98, 0xA4, 0x85, 0x99, 0x40, 0xD0, 0x82, 0x85, 0x99, 0x40, 0x84, 0x85, 0x95, 0x40, 0x87, 0x99, 0x96, 0xA1, 0x85, 0x95, 0x40, 0xE2, 0xA8, 0x93, 0xA3, 0x85, 0x99, 0x40, 0xC4, 0x85, 0x89, 0x83, 0x88).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage273, improvedNullDetection = false) + val enc = new CodePage273 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP274 string special characters" in { - val expected = " æÄ!üÜ^Æö]ߢ§Øäèéø¨åÖ#àÅ[ç¤Ç " - val bytes = Array(0x40, 0x9C, 0x63, 0x4F, 0xDC, 0xFC, 0x5F, 0x9E, 0xCC, 0x5A, 0x59, + val expectedUnicode = " æÄ!üÜ^Æö]ߢ§Øäèéø¨åÖ#àÅ[ç¤Ç " + val ebcdicBytes = Array(0x40, 0x9C, 0x63, 0x4F, 0xDC, 0xFC, 0x5F, 0x9E, 0xCC, 0x5A, 0x59, 0xB0, 0xB5, 0x80, 0x43, 0xD0, 0xC0, 0x70, 0xA1, 0x47, 0xEC, 0x7B, 0x7C, 0x67, 0x4A, 0xE0, 0x9F, 0x68, 0x40).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage274, improvedNullDetection = false) + val enc = new CodePage274 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP275 string special characters" in { - val expected = " æÄ!üÜ^Æö$ߢ§Øäéõø~åÖÕÃÅÉ\\¤] " - val bytes = Array(0x40, 0x9C, 0x63, 0x4F, 0xDC, 0xFC, 0x5F, 0x9E, 0xCC, 0x5A, 0x59, + val expectedUnicode = " æÄ!üÜ^Æö$ߢ§Øäéõø~åÖÕÃÅÉ\\¤] " + val ebcdicBytes = Array(0x40, 0x9C, 0x63, 0x4F, 0xDC, 0xFC, 0x5F, 0x9E, 0xCC, 0x5A, 0x59, 0xB0, 0xB5, 0x80, 0x43, 0xD0, 0xC0, 0x70, 0xA1, 0x47, 0xEC, 0x7B, 0x7C, 0x67, 0x4A, 0xE0, 0x9F, 0x68, 0x40).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage275, improvedNullDetection = false) + val enc = new CodePage275 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP277 string special characters" in { - val expected = " {Ä!~Ü^[ö¤ß¢§@ä忦ü}ÖÆØ$#\\] " - val bytes = Array(0x40, 0x9C, 0x63, 0x4F, 0xDC, 0xFC, 0x5F, 0x9E, 0xCC, 0x5A, 0x59, + val expectedUnicode = " {Ä!~Ü^[ö¤ß¢§@ä忦ü}ÖÆØ$#\\] " + val ebcdicBytes = Array(0x40, 0x9C, 0x63, 0x4F, 0xDC, 0xFC, 0x5F, 0x9E, 0xCC, 0x5A, 0x59, 0xB0, 0xB5, 0x80, 0x43, 0xD0, 0xC0, 0x70, 0xA1, 0x47, 0xEC, 0x7B, 0x7C, 0x67, 0x4A, 0xE0, 0x9F, 0x40).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage277, improvedNullDetection = false) + val enc = new CodePage277 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP277 string example" in { - val expected = "Ægte ørreder svømmer i åen, mens små ællinger leger ved søen." + val expectedUnicode = "Ægte ørreder svømmer i åen, mens små ællinger leger ved søen." - val bytes = Array(0x7B, 0x87, 0xA3, 0x85, 0x40, 0x6A, 0x99, 0x99, 0x85, 0x84, 0x85, 0x99, 0x40, + val ebcdicBytes = Array(0x7B, 0x87, 0xA3, 0x85, 0x40, 0x6A, 0x99, 0x99, 0x85, 0x84, 0x85, 0x99, 0x40, 0xA2, 0xA5, 0x6A, 0x94, 0x94, 0x85, 0x99, 0x40, 0x89, 0x40, 0xD0, 0x85, 0x95, 0x6B, 0x40, 0x94, 0x85, 0x95, 0xA2, 0x40, 0xA2, 0x94, 0xD0, 0x40, 0xC0, 0x93, 0x93, 0x89, 0x95, 0x87, 0x85, 0x99, 0x40, 0x93, 0x85, 0x87, 0x85, 0x99, 0x40, 0xA5, 0x85, 0x84, 0x40, 0xA2, 0x6A, 0x85, 0x95, 0x4B).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage277, improvedNullDetection = false) + val enc = new CodePage277 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP278 string special characters" in { - val expected = " {Ä!~Ü^[ö¤ß¢§@ä忦ü}ÖÆØ$#\\] " - val bytes = Array(0x40, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, + val expectedUnicode = " {Ä!~Ü^[ö¤ß¢§@ä忦ü}ÖÆØ$#\\] " + val ebcdicBytes = Array(0x40, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, 0xB0, 0x4A, 0xEC, 0xC0, 0xD0, 0x9C, 0xCC, 0xA1, 0x47, 0x7C, 0x9E, 0x80, 0x67, 0x63, 0x71, 0x9F, 0x40).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage278, improvedNullDetection = false) + val enc = new CodePage278 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP278 string example" in { - val expected = "Ångbåten är över sjön med färggranna blommor." + val expectedUnicode = "Ångbåten är över sjön med färggranna blommor." - val bytes = Array(0x5B, 0x95, 0x87, 0x82, 0xD0, 0xA3, 0x85, 0x95, 0x40, 0xC0, 0x99, 0x40, 0x6A, + val ebcdicBytes = Array(0x5B, 0x95, 0x87, 0x82, 0xD0, 0xA3, 0x85, 0x95, 0x40, 0xC0, 0x99, 0x40, 0x6A, 0xA5, 0x85, 0x99, 0x40, 0xA2, 0x91, 0x6A, 0x95, 0x40, 0x94, 0x85, 0x84, 0x40, 0x86, 0xC0, 0x99, 0x87, 0x87, 0x99, 0x81, 0x95, 0x95, 0x81, 0x40, 0x82, 0x93, 0x96, 0x94, 0x94, 0x96, 0x99, 0x4B).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage278, improvedNullDetection = false) + val enc = new CodePage278 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP280 string special characters" in { - val expected = " ä£!üÜ^@òéߢ°Öàèæöìå§ÆØÅÄɤ " - val bytes = Array(0x40, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, + val expectedUnicode = " ä£!üÜ^@òéߢ°Öàèæöìå§ÆØÅÄɤ " + val ebcdicBytes = Array(0x40, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, 0xB0, 0x4A, 0xEC, 0xC0, 0xD0, 0x9C, 0xCC, 0xA1, 0x47, 0x7C, 0x9E, 0x80, 0x67, 0x63, 0x71, 0x9F, 0x40).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage280, improvedNullDetection = false) + val enc = new CodePage280 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP284 string special characters" in { - val expected = " äÑ|üܬ§ñ]ߢ[Ö{}æö¨å@ÆØÅÄɤ " - val bytes = Array(0x40, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, + val expectedUnicode = " äÑ|üܬ§ñ]ߢ[Ö{}æö¨å@ÆØÅÄɤ " + val ebcdicBytes = Array(0x40, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, 0xB0, 0x4A, 0xEC, 0xC0, 0xD0, 0x9C, 0xCC, 0xA1, 0x47, 0x7C, 0x9E, 0x80, 0x67, 0x63, 0x71, 0x9F, 0x40).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage284, improvedNullDetection = false) + val enc = new CodePage284 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP285 string special characters" in { - val expected = " $£¯¢[^~ä#|üܬ§¦!ߢ$Ö{}æö¯å@ÆØÅÄɤ " - val bytes = Array(0x40, 0x4A, 0x5B, 0xA1, 0xB0, 0xB1, 0xBA, 0xBC, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, + val expectedUnicode = " $£¯¢[^~ä#|üܬ§¦!ߢ$Ö{}æö¯å@ÆØÅÄɤ " + val ebcdicBytes = Array(0x40, 0x4A, 0x5B, 0xA1, 0xB0, 0xB1, 0xBA, 0xBC, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, 0xB0, 0x4A, 0xEC, 0xC0, 0xD0, 0x9C, 0xCC, 0xA1, 0x47, 0x7C, 0x9E, 0x80, 0x67, 0x63, 0x71, 0x9F, 0x40).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage285, improvedNullDetection = false) + val enc = new CodePage285 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP297 string special characters" in { - val expected = " °$¨¢#¬¯ä£!üÜ^]ù§ß¢°Öéèæö¨åàÆØÅÄɤ " - val bytes = Array(0x40, 0x4A, 0x5B, 0xA1, 0xB0, 0xB1, 0xBA, 0xBC, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, + val expectedUnicode = " °$¨¢#¬¯ä£!üÜ^]ù§ß¢°Öéèæö¨åàÆØÅÄɤ " + val ebcdicBytes = Array(0x40, 0x4A, 0x5B, 0xA1, 0xB0, 0xB1, 0xBA, 0xBC, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, 0xB0, 0x4A, 0xEC, 0xC0, 0xD0, 0x9C, 0xCC, 0xA1, 0x47, 0x7C, 0x9E, 0x80, 0x67, 0x63, 0x71, 0x9F, 0x40).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage297, improvedNullDetection = false) + val enc = new CodePage297 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP500 string special characters" in { - val expected = "âäàáãåçñ[.<(+!&éêëèíîïìß]$*);^-/ÂÄÀÁÃÅÇѦ,%_>?øÉÊËÈÍÎÏÌ`:#@'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ¤µ~stuvwxyz¡¿ÐÝÞ®¢£¥·©§¶¼½¾¬|¯¨´×{ABCDEFGHI\u00ADôöòóõ}JKLMNOPQR¹ûüùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ" - val bytes = Array( + val expectedUnicode = "âäàáãåçñ[.<(+!&éêëèíîïìß]$*);^-/ÂÄÀÁÃÅÇѦ,%_>?øÉÊËÈÍÎÏÌ`:#@'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ¤µ~stuvwxyz¡¿ÐÝÞ®¢£¥·©§¶¼½¾¬|¯¨´×{ABCDEFGHI\u00ADôöòóõ}JKLMNOPQR¹ûüùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ" + val ebcdicBytes = Array( 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, @@ -236,24 +272,50 @@ class StringDecodersSpec extends AnyWordSpec { 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE ).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage500, improvedNullDetection = false) + val enc = new CodePage500 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP838 string special characters" in { - val expected = " ¢$~๐๑ฯัข#|แํ¬๕¦!]๐¢็{}ลึ~ฆ@ว๏ดฐ " - val bytes = Array(0x40, 0x4A, 0x5B, 0xA1, 0xB0, 0xB1, 0xBA, 0xBC, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, + val expectedUnicode = " ¢$~๐๑ฯัข#|แํ¬๕¦!]๐¢็{}ลึ~ฆ@ว๏ดฐ " + val ebcdicBytes = Array(0x40, 0x4A, 0x5B, 0xA1, 0xB0, 0xB1, 0xBA, 0xBC, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, 0xB0, 0x4A, 0xEC, 0xC0, 0xD0, 0x9C, 0xCC, 0xA1, 0x47, 0x7C, 0x9E, 0x80, 0x67, 0x63, 0x40).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage838, improvedNullDetection = false) + val enc = new CodePage838 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) + } + + "decode a CP1025 string special characters" in { + val expectedUnicode = "АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяABCDEFGHIJKLMNOPQRSTUVWXYZ123456789[]\r\n" + val ebcdicBytes = Array( + 0xB9, 0xBA, 0xED, 0xBF, 0xBC, 0xBD, 0xEC, 0xFA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xDA, 0xDB, 0xDC, + 0xDE, 0xDF, 0xEA, 0xEB, 0xBE, 0xCA, 0xBB, 0xFE, 0xFB, 0xFD, 0x57, 0xEF, 0xEE, 0xFC, 0xB8, 0xDD, + 0x77, 0x78, 0xAF, 0x8D, 0x8A, 0x8B, 0xAE, 0xB2, 0x8F, 0x90, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, + 0xAA, 0xAB, 0xAC, 0xAD, 0x8C, 0x8E, 0x80, 0xB6, 0xB3, 0xB5, 0xB7, 0xB1, 0xB0, 0xB4, 0x76, 0xA0, + 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, + 0xF7, 0xF8, 0xF9, 0x4A, 0x5A, 0x25, 0x0D + ).map(_.toByte) + + val enc = new CodePage1025 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) + + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP1140 string special characters" in { - val expected = "âäàáãåçñ¢.<(+|&éêëèíîïìß!$*);¬-/ÂÄÀÁÃÅÇѦ,%_>?øÉÊËÈÍÎÏÌ`:#@'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ€µ~stuvwxyz¡¿ÐÝÞ®^£¥·©§¶¼½¾[]¯¨´×{ABCDEFGHI\u00ADôöòóõ}JKLMNOPQR¹ûüùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ" - val bytes = Array( + val expectedUnicode = "âäàáãåçñ¢.<(+|&éêëèíîïìß!$*);¬-/ÂÄÀÁÃÅÇѦ,%_>?øÉÊËÈÍÎÏÌ`:#@'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ€µ~stuvwxyz¡¿ÐÝÞ®^£¥·©§¶¼½¾[]¯¨´×{ABCDEFGHI\u00ADôöòóõ}JKLMNOPQR¹ûüùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ" + val ebcdicBytes = Array( 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, @@ -268,14 +330,17 @@ class StringDecodersSpec extends AnyWordSpec { 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE ).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage1140, improvedNullDetection = false) + val enc = new CodePage1140 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP1141 string special characters" in { - val expected = "â{àáãåçñÄ.<(+!&éêëèíîïì~Ü$*);^-/Â[ÀÁÃÅÇÑö,%_>?øÉÊËÈÍÎÏÌ`:#§'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ€µßstuvwxyz¡¿ÐÝÞ®¢£¥·©@¶¼½¾¬|¯¨´×äABCDEFGHI\u00ADô¦òóõüJKLMNOPQR¹û}ùúÿÖ÷STUVWXYZ²Ô\\ÒÓÕ0123456789³Û]ÙÚ" - val bytes = Array( + val expectedUnicode = "â{àáãåçñÄ.<(+!&éêëèíîïì~Ü$*);^-/Â[ÀÁÃÅÇÑö,%_>?øÉÊËÈÍÎÏÌ`:#§'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ€µßstuvwxyz¡¿ÐÝÞ®¢£¥·©@¶¼½¾¬|¯¨´×äABCDEFGHI\u00ADô¦òóõüJKLMNOPQR¹û}ùúÿÖ÷STUVWXYZ²Ô\\ÒÓÕ0123456789³Û]ÙÚ" + val ebcdicBytes = Array( 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, @@ -290,14 +355,17 @@ class StringDecodersSpec extends AnyWordSpec { 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE ).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage1141, improvedNullDetection = false) + val enc = new CodePage1141 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP1142 string example" in { - val expected = "âäàáã}çñ#.<(+!&éêëèíîïì߀Å*);^-/ÂÄÀÁÃ$ÇÑø,%_>?¦ÉÊËÈÍÎÏÌ`:ÆØ'=\"@abcdefghi«»ðýþ±°jklmnopqrªº{¸[]µüstuvwxyz¡¿ÐÝÞ®¢£¥·©§¶¼½¾¬|¯¨´×æABCDEFGHI\u00ADôöòóõåJKLMNOPQR¹û~ùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ" - val bytes = Array( + val expectedUnicode = "âäàáã}çñ#.<(+!&éêëèíîïì߀Å*);^-/ÂÄÀÁÃ$ÇÑø,%_>?¦ÉÊËÈÍÎÏÌ`:ÆØ'=\"@abcdefghi«»ðýþ±°jklmnopqrªº{¸[]µüstuvwxyz¡¿ÐÝÞ®¢£¥·©§¶¼½¾¬|¯¨´×æABCDEFGHI\u00ADôöòóõåJKLMNOPQR¹û~ùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ" + val ebcdicBytes = Array( 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, @@ -312,14 +380,17 @@ class StringDecodersSpec extends AnyWordSpec { 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE ).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage1142, improvedNullDetection = false) + val enc = new CodePage1142 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP1143 string example" in { - val expected = "â{àáã}çñ§.<(+!&`êëèíîïì߀Å*);^-/Â#ÀÁÃ$ÇÑö,%_>?ø\\ÊËÈÍÎÏÌé:ÄÖ'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ]µüstuvwxyz¡¿ÐÝÞ®¢£¥·©[¶¼½¾¬|¯¨´×äABCDEFGHI\u00ADô¦òóõåJKLMNOPQR¹û~ùúÿÉ÷STUVWXYZ²Ô@ÒÓÕ0123456789³ÛÜÙÚ" - val bytes = Array( + val expectedUnicode = "â{àáã}çñ§.<(+!&`êëèíîïì߀Å*);^-/Â#ÀÁÃ$ÇÑö,%_>?ø\\ÊËÈÍÎÏÌé:ÄÖ'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ]µüstuvwxyz¡¿ÐÝÞ®¢£¥·©[¶¼½¾¬|¯¨´×äABCDEFGHI\u00ADô¦òóõåJKLMNOPQR¹û~ùúÿÉ÷STUVWXYZ²Ô@ÒÓÕ0123456789³ÛÜÙÚ" + val ebcdicBytes = Array( 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, @@ -334,14 +405,17 @@ class StringDecodersSpec extends AnyWordSpec { 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE ).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage1143, improvedNullDetection = false) + val enc = new CodePage1143 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP1144 string example" in { - val expected = "âä{áãå\\ñ°.<(+!&]êë}íîï~ßé$*);^-/ÂÄÀÁÃÅÇÑò,%_>?øÉÊËÈÍÎÏÌù:£§'=\"Øabcdefghi«»ðýþ±[jklmnopqrªºæ¸Æ€µìstuvwxyz¡¿ÐÝÞ®¢#¥·©@¶¼½¾¬|¯¨´×àABCDEFGHI\u00ADôö¦óõèJKLMNOPQR¹ûü`úÿç÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ" - val bytes = Array( + val expectedUnicode = "âä{áãå\\ñ°.<(+!&]êë}íîï~ßé$*);^-/ÂÄÀÁÃÅÇÑò,%_>?øÉÊËÈÍÎÏÌù:£§'=\"Øabcdefghi«»ðýþ±[jklmnopqrªºæ¸Æ€µìstuvwxyz¡¿ÐÝÞ®¢#¥·©@¶¼½¾¬|¯¨´×àABCDEFGHI\u00ADôö¦óõèJKLMNOPQR¹ûü`úÿç÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ" + val ebcdicBytes = Array( 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, @@ -356,47 +430,59 @@ class StringDecodersSpec extends AnyWordSpec { 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE ).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage1144, improvedNullDetection = false) + val enc = new CodePage1144 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP1145 string special characters" in { - val expected = " äÑ|üܬ§ñ]ߢ[Ö{}æö¨å@ÆØÅÄÉ€ " - val bytes = Array(0x40, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, + val expectedUnicode = " äÑ|üܬ§ñ]ߢ[Ö{}æö¨å@ÆØÅÄÉ€ " + val ebcdicBytes = Array(0x40, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, 0xB0, 0x4A, 0xEC, 0xC0, 0xD0, 0x9C, 0xCC, 0xA1, 0x47, 0x7C, 0x9E, 0x80, 0x67, 0x63, 0x71, 0x9F, 0x40).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage1145, improvedNullDetection = false) + val enc = new CodePage1145 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP1146 string special characters" in { - val expected = " $£¯¢[^~ä#|üܬ§¦!ߢ$Ö{}æö¯å@ÆØÅÄÉ€ " - val bytes = Array(0x40, 0x4A, 0x5B, 0xA1, 0xB0, 0xB1, 0xBA, 0xBC, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, + val expectedUnicode = " $£¯¢[^~ä#|üܬ§¦!ߢ$Ö{}æö¯å@ÆØÅÄÉ€ " + val ebcdicBytes = Array(0x40, 0x4A, 0x5B, 0xA1, 0xB0, 0xB1, 0xBA, 0xBC, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, 0xB0, 0x4A, 0xEC, 0xC0, 0xD0, 0x9C, 0xCC, 0xA1, 0x47, 0x7C, 0x9E, 0x80, 0x67, 0x63, 0x71, 0x9F, 0x40).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage1146, improvedNullDetection = false) + val enc = new CodePage1146 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP1147 string special characters" in { - val expected = " °$¨¢#¬¯ä£!üÜ^]ù§ß¢°Öéèæö¨åàÆØÅÄÉ€ " - val bytes = Array(0x40, 0x4A, 0x5B, 0xA1, 0xB0, 0xB1, 0xBA, 0xBC, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, + val expectedUnicode = " °$¨¢#¬¯ä£!üÜ^]ù§ß¢°Öéèæö¨åàÆØÅÄÉ€ " + val ebcdicBytes = Array(0x40, 0x4A, 0x5B, 0xA1, 0xB0, 0xB1, 0xBA, 0xBC, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, 0xB0, 0x4A, 0xEC, 0xC0, 0xD0, 0x9C, 0xCC, 0xA1, 0x47, 0x7C, 0x9E, 0x80, 0x67, 0x63, 0x71, 0x9F, 0x40).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage1147, improvedNullDetection = false) + val enc = new CodePage1147 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP1148 string special characters" in { - val expected = "âäàáãåçñ[.<(+!&éêëèíîïìß]$*);^-/ÂÄÀÁÃÅÇѦ,%_>?øÉÊËÈÍÎÏÌ`:#@'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ€µ~stuvwxyz¡¿ÐÝÞ®¢£¥·©§¶¼½¾¬|¯¨´×{ABCDEFGHI\u00ADôöòóõ}JKLMNOPQR¹ûüùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ" - val bytes = Array( + val expectedUnicode = "âäàáãåçñ[.<(+!&éêëèíîïìß]$*);^-/ÂÄÀÁÃÅÇѦ,%_>?øÉÊËÈÍÎÏÌ`:#@'=\"Øabcdefghi«»ðýþ±°jklmnopqrªºæ¸Æ€µ~stuvwxyz¡¿ÐÝÞ®¢£¥·©§¶¼½¾¬|¯¨´×{ABCDEFGHI\u00ADôöòóõ}JKLMNOPQR¹ûüùúÿ\\÷STUVWXYZ²ÔÖÒÓÕ0123456789³ÛÜÙÚ" + val ebcdicBytes = Array( 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, @@ -411,19 +497,25 @@ class StringDecodersSpec extends AnyWordSpec { 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE ).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage1148, improvedNullDetection = false) + val enc = new CodePage1148 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } "decode a CP1160 string special characters" in { - val expected = " ¢$~๐๑ฯัข#|แํ¬๕¦!]๐¢็{}ลึ~ฆ@ว๏ดฐ€ " - val bytes = Array(0x40, 0x4A, 0x5B, 0xA1, 0xB0, 0xB1, 0xBA, 0xBC, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, + val expectedUnicode = " ¢$~๐๑ฯัข#|แํ¬๕¦!]๐¢็{}ลึ~ฆ@ว๏ดฐ€ " + val ebcdicBytes = Array(0x40, 0x4A, 0x5B, 0xA1, 0xB0, 0xB1, 0xBA, 0xBC, 0x43, 0x7B, 0x4F, 0xDC, 0xFC, 0x5F, 0xB5, 0x6A, 0x5A, 0x59, 0xB0, 0x4A, 0xEC, 0xC0, 0xD0, 0x9C, 0xCC, 0xA1, 0x47, 0x7C, 0x9E, 0x80, 0x67, 0x63, 0xFE, 0x40).map(_.toByte) - val actual = decodeEbcdicString(bytes, KeepAll, new CodePage1160, improvedNullDetection = false) + val enc = new CodePage1160 + val actualUnicode = decodeEbcdicString(ebcdicBytes, KeepAll, enc, improvedNullDetection = false) + val actualEbcdicBytes = enc.convert(expectedUnicode, expectedUnicode.length) - assert(actual == expected) + assert(actualUnicode == expectedUnicode) + assert(actualEbcdicBytes.sameElements(ebcdicBytes)) } } } diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/FakeCodePage.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/FakeCodePage.scala index 7e612c0fb..671314e82 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/FakeCodePage.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/encoding/codepage/FakeCodePage.scala @@ -16,7 +16,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage -class FakeCodePage extends SingleByteCodePage(FakeCodePage.ebcdicToAsciiMapping) { +class FakeCodePage extends SingleByteCodePage(FakeCodePage.ebcdicToAsciiMapping, FakeCodePage.asciiToEbcdicMapping) { /** * A short name is used to distinguish between different code pages, so it must be unique */ @@ -25,4 +25,6 @@ class FakeCodePage extends SingleByteCodePage(FakeCodePage.ebcdicToAsciiMapping) object FakeCodePage { val ebcdicToAsciiMapping: Array[Char] = CodePageCommon.ebcdicToAsciiMapping + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) } diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/parse/DataSizeSpec.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/parse/DataSizeSpec.scala index cbc25d306..3ce1f4c15 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/parse/DataSizeSpec.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/parse/DataSizeSpec.scala @@ -25,7 +25,7 @@ import za.co.absa.cobrix.cobol.parser.ast.{Group, Primitive} import za.co.absa.cobrix.cobol.parser.decoders.FloatingPointFormat import za.co.absa.cobrix.cobol.parser.encoding.ASCII import za.co.absa.cobrix.cobol.parser.encoding.codepage.CodePage -import za.co.absa.cobrix.cobol.parser.policies.StringTrimmingPolicy +import za.co.absa.cobrix.cobol.parser.policies.{CommentPolicy, StringTrimmingPolicy} import java.nio.charset.StandardCharsets @@ -35,6 +35,7 @@ class DataSizeSpec extends AnyFunSuite { private def parse(pic: String): Primitive = { val visitor = new ParserVisitor(ASCII, StringTrimmingPolicy.TrimNone, + CommentPolicy(), isDisplayAlwaysString = false, CodePage.getCodePageByName("common"), StandardCharsets.US_ASCII, @@ -55,7 +56,7 @@ class DataSizeSpec extends AnyFunSuite { val parser = new copybookParser(tokens) parser.removeErrorListeners() parser.addErrorListener(new LogErrorListener(logger)) - parser.setErrorHandler(new ThrowErrorStrategy()) + parser.setErrorHandler(new ThrowErrorStrategy(6)) visitor.visit(parser.main()) visitor.ast.children.head.asInstanceOf[Group].children.head.asInstanceOf[Primitive] } diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/parse/PicValidationSpec.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/parse/PicValidationSpec.scala index e5004c118..5228bb6f2 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/parse/PicValidationSpec.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/parse/PicValidationSpec.scala @@ -26,7 +26,7 @@ import za.co.absa.cobrix.cobol.parser.decoders.FloatingPointFormat import za.co.absa.cobrix.cobol.parser.encoding.ASCII import za.co.absa.cobrix.cobol.parser.encoding.codepage.CodePage import za.co.absa.cobrix.cobol.parser.exceptions.SyntaxErrorException -import za.co.absa.cobrix.cobol.parser.policies.StringTrimmingPolicy +import za.co.absa.cobrix.cobol.parser.policies.{CommentPolicy, StringTrimmingPolicy} class PicValidationSpec extends AnyFunSuite { private val logger: Logger = LoggerFactory.getLogger(this.getClass) @@ -35,6 +35,7 @@ class PicValidationSpec extends AnyFunSuite { val visitor = new ParserVisitor(ASCII, StringTrimmingPolicy.TrimNone, + CommentPolicy(), isDisplayAlwaysString = false, CodePage.getCodePageByName("common"), StandardCharsets.UTF_8, @@ -55,7 +56,7 @@ class PicValidationSpec extends AnyFunSuite { val parser = new copybookParser(tokens) parser.removeErrorListeners() parser.addErrorListener(new LogErrorListener(logger)) - parser.setErrorHandler(new ThrowErrorStrategy()) + parser.setErrorHandler(new ThrowErrorStrategy(6)) visitor.visit(parser.main()) } diff --git a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/parse/SyntaxErrorsSpec.scala b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/parse/SyntaxErrorsSpec.scala index 68a2f4f2f..cabc80a2b 100644 --- a/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/parse/SyntaxErrorsSpec.scala +++ b/cobol-parser/src/test/scala/za/co/absa/cobrix/cobol/parser/parse/SyntaxErrorsSpec.scala @@ -41,9 +41,27 @@ class SyntaxErrorsSpec extends AnyFunSuite { } assert(syntaxErrorException.lineNumber == 5) + assert(syntaxErrorException.posOpt.isEmpty) + assert(syntaxErrorException.fieldOpt.contains("GRP_FIELD")) assert(syntaxErrorException.msg.contains("The field is a leaf element")) } + test("Test handle malformed statement") { + val copyBookContents: String = + """ 01 RECORD. + | 07 SUB-FLD1 PIC X(30). + |""".stripMargin + + val syntaxErrorException = intercept[SyntaxErrorException] { + CopybookParser.parseTree(copyBookContents) + } + + assert(syntaxErrorException.lineNumber == 2) + assert(syntaxErrorException.posOpt.contains(13)) + assert(syntaxErrorException.fieldOpt.isEmpty) + assert(syntaxErrorException.msg == "Invalid input 'SUB-FLD1' at position 2:13") + } + test("Test handle malformed redefines") { val copyBookContents: String = """ 01 RECORD. @@ -57,7 +75,9 @@ class SyntaxErrorsSpec extends AnyFunSuite { } assert(syntaxErrorException.lineNumber == 4) - assert(syntaxErrorException.msg.contains("The field SUB_FLD2 redefines SUB_FLD1, which is not part if the redefined fields block")) + assert(syntaxErrorException.posOpt.isEmpty) + assert(syntaxErrorException.fieldOpt.contains("SUB_FLD2")) + assert(syntaxErrorException.msg.contains("The field SUB_FLD2 redefines SUB_FLD1, which is not part of the redefined fields block")) } test("Test too big decimal precision") { @@ -70,6 +90,8 @@ class SyntaxErrorsSpec extends AnyFunSuite { CopybookParser.parseTree(copyBookContents) } assert(syntaxErrorException.lineNumber == 2) + assert(syntaxErrorException.posOpt.contains(36)) + assert(syntaxErrorException.fieldOpt.contains("FIELD")) assert(syntaxErrorException.msg.contains("Decimal numbers with precision bigger")) } @@ -83,6 +105,8 @@ class SyntaxErrorsSpec extends AnyFunSuite { CopybookParser.parseTree(copyBookContents) } assert(syntaxErrorException.lineNumber == 2) + assert(syntaxErrorException.posOpt.contains(36)) + assert(syntaxErrorException.fieldOpt.contains("FIELD")) assert(syntaxErrorException.msg.contains("Decimal numbers with scale bigger")) } @@ -95,7 +119,10 @@ class SyntaxErrorsSpec extends AnyFunSuite { val syntaxErrorException = intercept[SyntaxErrorException] { CopybookParser.parseTree(copyBookContents) } + assert(syntaxErrorException.lineNumber == 2) + assert(syntaxErrorException.posOpt.contains(8)) + assert(syntaxErrorException.fieldOpt.isEmpty) assert(syntaxErrorException.msg.contains("Invalid input '/' at position 2:8")) } @@ -110,6 +137,8 @@ class SyntaxErrorsSpec extends AnyFunSuite { CopybookParser.parseTree(copyBookContents) } assert(syntaxErrorException.lineNumber == 2) + assert(syntaxErrorException.posOpt.isEmpty) + assert(syntaxErrorException.fieldOpt.contains("FIELD")) assert(syntaxErrorException.msg.contains("SIGN SEPARATE clause is not supported for COMP-3")) } @@ -123,6 +152,8 @@ class SyntaxErrorsSpec extends AnyFunSuite { CopybookParser.parseTree(copyBookContents) } assert(syntaxErrorException.lineNumber == 2) + assert(syntaxErrorException.posOpt.contains(38)) + assert(syntaxErrorException.fieldOpt.contains("FIELD")) assert(syntaxErrorException.msg.contains("Explicit decimal point in 'PIC 9(8).9(9)' is not supported for COMP-3.")) } @@ -148,6 +179,8 @@ class SyntaxErrorsSpec extends AnyFunSuite { CopybookParser.parseTree(copyBookContents) } assert(syntaxErrorException.lineNumber == 2) + assert(syntaxErrorException.posOpt.contains(32)) + assert(syntaxErrorException.fieldOpt.isEmpty) assert(syntaxErrorException.msg.contains("Invalid input")) assert(syntaxErrorException.msg.contains("at position 2:32")) } @@ -162,6 +195,8 @@ class SyntaxErrorsSpec extends AnyFunSuite { CopybookParser.parseTree(copyBookContents) } assert(syntaxErrorException.lineNumber == 2) + assert(syntaxErrorException.posOpt.contains(29)) + assert(syntaxErrorException.fieldOpt.isEmpty) assert(syntaxErrorException.msg.contains("Invalid input '(' at position 2:29")) } diff --git a/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/utils/CustomCodePage.scala b/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/utils/CustomCodePage.scala index f9b42443d..9f7fda7a7 100644 --- a/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/utils/CustomCodePage.scala +++ b/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/utils/CustomCodePage.scala @@ -18,7 +18,7 @@ package za.co.absa.cobrix.spark.cobol.source.utils import za.co.absa.cobrix.cobol.parser.encoding.codepage.SingleByteCodePage -class CustomCodePage extends SingleByteCodePage(CustomCodePage.ebcdicToAsciiMapping) { +class CustomCodePage extends SingleByteCodePage(CustomCodePage.ebcdicToAsciiMapping, CustomCodePage.asciiToEbcdicMapping) { override def codePageShortName: String = "custom_test" } @@ -50,4 +50,6 @@ object CustomCodePage { } ebcdic2ascii } + + lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping) }