AbsaOSS · yruslan · Nov 6, 2025 · Nov 3, 2025 · Nov 4, 2025 · Nov 5, 2025
@@ -29,13 +29,14 @@ import za.co.absa.cobrix.cobol.parser.policies.StringTrimmingPolicy.StringTrimmi
 import java.nio.charset.Charset
 
 
-class ThrowErrorStrategy() extends DefaultErrorStrategy {
+class ThrowErrorStrategy(posAdjustment: Int) extends DefaultErrorStrategy {
   override def recover(recognizer: Parser, e: RecognitionException): Unit = {
     throw new SyntaxErrorException(
       e.getOffendingToken.getLine,
-      "",
+      Option(e.getOffendingToken.getCharPositionInLine + posAdjustment),
+      None,
       "Invalid input " + getTokenErrorDisplay(e.getOffendingToken) + " at position " + e.getOffendingToken.getLine
-      + ":" + (e.getOffendingToken.getCharPositionInLine + 6)
+      + ":" + (e.getOffendingToken.getCharPositionInLine + posAdjustment)
     )
   }
 
@@ -65,8 +66,9 @@ object ANTLRParser extends Logging {
             isUtf16BigEndian: Boolean,
             floatingPointFormat: FloatingPointFormat,
             fieldCodePageMap: Map[String, String]): CopybookAST = {
-    val visitor = new ParserVisitor(enc, stringTrimmingPolicy, isDisplayAlwaysString, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat, strictSignOverpunch, improvedNullDetection, strictIntegralPrecision, decodeBinaryAsHex, fieldCodePageMap)
+    val visitor = new ParserVisitor(enc, stringTrimmingPolicy, commentPolicy, isDisplayAlwaysString, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat, strictSignOverpunch, improvedNullDetection, strictIntegralPrecision, decodeBinaryAsHex, fieldCodePageMap)
 
+    val adjPos = if (commentPolicy.truncateComments) commentPolicy.commentsUpToChar else 0
     val strippedContents = filterSpecialCharacters(copyBookContents).split("\\r?\\n").map(
       line =>
         truncateComments(line, commentPolicy)
@@ -81,7 +83,7 @@ object ANTLRParser extends Logging {
     val parser = new copybookParser(tokens)
     parser.removeErrorListeners()
     parser.addErrorListener(new LogErrorListener(logger))
-    parser.setErrorHandler(new ThrowErrorStrategy())
+    parser.setErrorHandler(new ThrowErrorStrategy(adjPos))
 
     visitor.visitMain(parser.main())
     visitor.ast

@@ -28,6 +28,7 @@ import za.co.absa.cobrix.cobol.parser.decoders.FloatingPointFormat.FloatingPoint
 import za.co.absa.cobrix.cobol.parser.encoding.codepage.CodePage
 import za.co.absa.cobrix.cobol.parser.encoding._
 import za.co.absa.cobrix.cobol.parser.exceptions.SyntaxErrorException
+import za.co.absa.cobrix.cobol.parser.policies.CommentPolicy
 import za.co.absa.cobrix.cobol.parser.policies.StringTrimmingPolicy.StringTrimmingPolicy
 import za.co.absa.cobrix.cobol.parser.position.{Left, Position, Right}
 
@@ -41,6 +42,7 @@ sealed trait Expr
 
 class ParserVisitor(enc: Encoding,
                     stringTrimmingPolicy: StringTrimmingPolicy,
+                    commentPolicy: CommentPolicy,
                     isDisplayAlwaysString: Boolean,
                     ebcdicCodePage: CodePage,
                     asciiCharset: Charset,
@@ -158,19 +160,19 @@ class ParserVisitor(enc: Encoding,
           pic.value match {
             case dec: Decimal =>
               if (dec.compact.isDefined && !dec.compact.contains(usageVal))
-                throw  new SyntaxErrorException(ctx.start.getLine, "", s"Field USAGE (${dec.compact.get}) doesn't match group's USAGE ($usageVal).")
+                throw  new SyntaxErrorException(ctx.start.getLine, Option(ctx.start.getCharPositionInLine), None, s"Field USAGE (${dec.compact.get}) doesn't match group's USAGE ($usageVal).")
               dec.copy(compact=usage)
             case int: Integral =>
               if (int.compact.isDefined && !int.compact.contains(usageVal))
-                throw  new SyntaxErrorException(ctx.start.getLine, "", s"Field USAGE (${int.compact.get}) doesn't match group's USAGE ($usageVal).")
+                throw  new SyntaxErrorException(ctx.start.getLine, Option(ctx.start.getCharPositionInLine), None, s"Field USAGE (${int.compact.get}) doesn't match group's USAGE ($usageVal).")
               int.copy(compact=usage)
             case x: AlphaNumeric if usageVal == COMP3U() =>
               Integral(x.pic, x.length*2, None, false, None, Some(COMP3U()), None, x.originalPic)
             case x: AlphaNumeric if usageVal == COMP1() || usageVal == COMP4() =>
               val enc = if (decodeBinaryAsHex) HEX else RAW
               x.copy(compact=usage, enc=Some(enc))
             case x: AlphaNumeric =>
-              throw new SyntaxErrorException(ctx.start.getLine, "", s"Field USAGE $usageVal is not supported with this PIC: ${x.pic}. The field should be numeric.")
+              throw new SyntaxErrorException(ctx.start.getLine, Option(ctx.start.getCharPositionInLine), None, s"Field USAGE $usageVal is not supported with this PIC: ${x.pic}. The field should be numeric.")
           }
         )
     }
@@ -226,7 +228,7 @@ class ParserVisitor(enc: Encoding,
       case None => addLevel(section)
       case Some(s) if s > section => addLevel(section)
       case _ =>
-        throw new SyntaxErrorException(levels.top.el.children.last.lineNumber, levels.top.el.children.last.name,
+        throw new SyntaxErrorException(levels.top.el.children.last.lineNumber, None, Option(levels.top.el.children.last.name),
           s"The field is a leaf element and cannot contain nested fields.")
     }
 
@@ -556,35 +558,37 @@ class ParserVisitor(enc: Encoding,
   }
 
   def checkBounds(ctx: ParserRuleContext, expr: PicExpr): PicExpr = {
+    val adjustPos = if (commentPolicy.truncateComments) commentPolicy.commentsUpToChar + 1 else 1
+    val pos = Option(ctx.stop.getCharPositionInLine + adjustPos)
     expr.value match {
       case x: Decimal =>
         if (x.isSignSeparate && x.compact.isDefined)
-          throw new SyntaxErrorException(ctx.start.getLine, getIdentifier(ctx.parent),
+          throw new SyntaxErrorException(ctx.start.getLine, None, Option(getIdentifier(ctx.parent)),
             s"SIGN SEPARATE clause is not supported for ${x.compact.get}. It is only supported for DISPLAY formatted fields.")
         if(x.scale > Constants.maxDecimalScale)
-          throw new SyntaxErrorException(ctx.start.getLine, getIdentifier(ctx.parent),
+          throw new SyntaxErrorException(ctx.start.getLine, pos, Option(getIdentifier(ctx.parent)),
             s"Decimal numbers with scale bigger than ${Constants.maxDecimalScale} are not supported.")
         if(x.precision > Constants.maxDecimalPrecision)
-          throw new SyntaxErrorException(ctx.start.getLine, getIdentifier(ctx.parent),
+          throw new SyntaxErrorException(ctx.start.getLine, pos, Option(getIdentifier(ctx.parent)),
             s"Decimal numbers with precision bigger than ${Constants.maxDecimalPrecision} are not supported.")
         if (x.compact.isDefined && x.explicitDecimal)
-          throw new SyntaxErrorException(ctx.start.getLine, getIdentifier(ctx.parent),
+          throw new SyntaxErrorException(ctx.start.getLine, pos, Option(getIdentifier(ctx.parent)),
             s"Explicit decimal point in 'PIC ${expr.value.originalPic.get}' is not supported for ${x.compact.get}. It is only supported for DISPLAY formatted fields.")
       case x: Integral =>
         if (x.isSignSeparate && x.compact.isDefined) {
-          throw new SyntaxErrorException(ctx.start.getLine, getIdentifier(ctx.parent),
+          throw new SyntaxErrorException(ctx.start.getLine, None, Option(getIdentifier(ctx.parent)),
             s"SIGN SEPARATE clause is not supported for ${x.compact.get}. It is only supported for DISPLAY formatted fields.")
         }
         if (x.precision > Constants.maxBinIntPrecision && x.compact.contains(COMP4())) {
-          throw new SyntaxErrorException(ctx.start.getLine, getIdentifier(ctx.parent),
+          throw new SyntaxErrorException(ctx.start.getLine, pos, Option(getIdentifier(ctx.parent)),
             s"BINARY-encoded integers with precision bigger than ${Constants.maxBinIntPrecision} are not supported.")
         }
         if (x.precision < 1 || x.precision >= Constants.maxFieldLength)
-          throw new SyntaxErrorException(ctx.start.getLine, getIdentifier(ctx.parent),
+          throw new SyntaxErrorException(ctx.start.getLine, pos, Option(getIdentifier(ctx.parent)),
             s"Incorrect field size of ${x.precision} for PIC ${expr.value.originalPic.get}. Supported size is in range from 1 to ${Constants.maxFieldLength}.")
       case x: AlphaNumeric =>
         if (x.length < 1 || x.length >= Constants.maxFieldLength)
-          throw new SyntaxErrorException(ctx.start.getLine, getIdentifier(ctx.parent),
+          throw new SyntaxErrorException(ctx.start.getLine, pos, Option(getIdentifier(ctx.parent)),
             s"Incorrect field size of ${x.length} for PIC ${expr.value.originalPic.get}. Supported size is in range from 1 to ${Constants.maxFieldLength}.")
     }
     expr

@@ -53,10 +53,10 @@ class BinaryPropertiesAdder extends AstTransformer {
           redefinedNames.clear()
         case Some(redefines) =>
           if (i == 0) {
-            throw new SyntaxErrorException(child.lineNumber, child.name, s"The first field of a group cannot use REDEFINES keyword.")
+            throw new SyntaxErrorException(child.lineNumber, None, Option(child.name), s"The first field of a group cannot use REDEFINES keyword.")
           }
           if (!redefinedNames.contains(redefines.toUpperCase)) {
-            throw new SyntaxErrorException(child.lineNumber, child.name, s"The field ${child.name} redefines $redefines, which is not part if the redefined fields block.")
+            throw new SyntaxErrorException(child.lineNumber, None, Option(child.name), s"The field ${child.name} redefines $redefines, which is not part of the redefined fields block.")
           }
           newChildren(i - 1) = newChildren(i - 1).withUpdatedIsRedefined(newIsRedefined = true)
       }

@@ -20,7 +20,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage
   * EBCDIC code page 37 contains all of the standard Latin-1 characters.
   *
   */
-class CodePage037 extends SingleByteCodePage(CodePage037.ebcdicToAsciiMapping) {
+class CodePage037 extends SingleByteCodePage(CodePage037.ebcdicToAsciiMapping, CodePage037.asciiToEbcdicMapping) {
   override def codePageShortName: String = "cp037"
 }
 
@@ -60,4 +60,6 @@ object CodePage037 {
     }
     ebcdic2ascii
   }
+
+  lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping)
 }
@@ -21,7 +21,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage
   *
   * In addition to "cp037" code page it contains conversions for non-printable characters.
   */
-class CodePage037Ext extends SingleByteCodePage(CodePage037Ext.ebcdicToAsciiMapping) {
+class CodePage037Ext extends SingleByteCodePage(CodePage037Ext.ebcdicToAsciiMapping, CodePage037Ext.asciiToEbcdicMapping) {
   override def codePageShortName: String = "cp037_extended"
 }
 
@@ -53,4 +53,6 @@ object CodePage037Ext {
     }
     ebcdic2ascii
   }
+
+  lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping)
 }
@@ -19,7 +19,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage
 /**
   * EBCDIC code page with full Cyrillic-charset
   */
-class CodePage1025 extends SingleByteCodePage(CodePage1025.ebcdicToAsciiMapping) {
+class CodePage1025 extends SingleByteCodePage(CodePage1025.ebcdicToAsciiMapping, CodePage1025.asciiToEbcdicMapping) {
   override def codePageShortName: String = "cp1025"
 }
 
@@ -56,4 +56,6 @@ object CodePage1025 {
     }
     ebcdic2ascii
   }
+
+  lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping)
 }
@@ -20,7 +20,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage
   * EBCDIC code page 1047 contains all of the Latin-1/Open System characters.
   *
   */
-class CodePage1047 extends SingleByteCodePage(CodePage1047.ebcdicToAsciiMapping) {
+class CodePage1047 extends SingleByteCodePage(CodePage1047.ebcdicToAsciiMapping, CodePage1047.asciiToEbcdicMapping) {
   override def codePageShortName: String = "cp1047"
 }
 
@@ -55,4 +55,6 @@ object CodePage1047 {
     }
     ebcdic2ascii
   }
+
+  lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping)
 }
@@ -23,7 +23,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage
   * It corresponds to code page 037 and only differs from it in position 9F, where the euro sign € is located instead
   * of the international currency symbol ¤.
   */
-class CodePage1140 extends SingleByteCodePage(CodePage1140.ebcdicToAsciiMapping) {
+class CodePage1140 extends SingleByteCodePage(CodePage1140.ebcdicToAsciiMapping, CodePage1140.asciiToEbcdicMapping) {
   override def codePageShortName: String = "cp1140"
 }
 
@@ -55,4 +55,6 @@ object CodePage1140 {
     }
     ebcdic2ascii
   }
+
+  lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping)
 }
@@ -23,7 +23,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage
   * It corresponds to code page 273 and only differs from it in position 9F, where the euro sign € is located instead
   * of the international currency symbol ¤.
   */
-class CodePage1141 extends SingleByteCodePage(CodePage1141.ebcdicToAsciiMapping) {
+class CodePage1141 extends SingleByteCodePage(CodePage1141.ebcdicToAsciiMapping, CodePage1141.asciiToEbcdicMapping) {
   override def codePageShortName: String = "cp1141"
 }
 
@@ -55,4 +55,6 @@ object CodePage1141 {
     }
     ebcdic2ascii
   }
+
+  lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping)
 }
@@ -22,7 +22,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage
   * It corresponds to code page 277 and only differs from it in position 5A, where the euro sign € is located instead
   * of the international currency symbol ¤.
   */
-class CodePage1142 extends SingleByteCodePage(CodePage1142.ebcdicToAsciiMapping) {
+class CodePage1142 extends SingleByteCodePage(CodePage1142.ebcdicToAsciiMapping, CodePage1142.asciiToEbcdicMapping) {
   override def codePageShortName: String = "cp1142"
 }
 
@@ -54,4 +54,6 @@ object CodePage1142 {
     }
     ebcdic2ascii
   }
+
+  lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping)
 }
@@ -22,7 +22,7 @@ package za.co.absa.cobrix.cobol.parser.encoding.codepage
   * It corresponds to code page 278 and only differs from it in position 5A, where the euro sign € is located instead
   * of the international currency symbol ¤.
   */
-class CodePage1143 extends SingleByteCodePage(CodePage1143.ebcdicToAsciiMapping) {
+class CodePage1143 extends SingleByteCodePage(CodePage1143.ebcdicToAsciiMapping, CodePage1143.asciiToEbcdicMapping) {
   override def codePageShortName: String = "cp1143"
 }
 
@@ -54,4 +54,6 @@ object CodePage1143 {
     }
     ebcdic2ascii
   }
+
+  lazy val asciiToEbcdicMapping: Array[Byte] = SingleByteCodePage.getReverseTable(ebcdicToAsciiMapping)
 }