#291 Add an ability to generate raw binary debugging debug fields.

AbsaOSS · May 29, 2020 · 257d51c · 257d51c
1 parent bfcde0d
commit 257d51c
Show file tree

Hide file tree

Showing 18 changed files with 2,549 additions and 36 deletions.
diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/CopybookParser.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/CopybookParser.scala
@@ -26,10 +26,11 @@ import za.co.absa.cobrix.cobol.parser.common.Constants
 import za.co.absa.cobrix.cobol.parser.decoders.FloatingPointFormat.FloatingPointFormat
 import za.co.absa.cobrix.cobol.parser.decoders.{DecoderSelector, FloatingPointFormat, StringDecoders}
 import za.co.absa.cobrix.cobol.parser.encoding.codepage.{CodePage, CodePageCommon}
-import za.co.absa.cobrix.cobol.parser.encoding.{EBCDIC, Encoding, HEX}
+import za.co.absa.cobrix.cobol.parser.encoding.{EBCDIC, Encoding, HEX, RAW}
 import za.co.absa.cobrix.cobol.parser.exceptions.SyntaxErrorException
+import za.co.absa.cobrix.cobol.parser.policies.DebugFieldsPolicy.DebugFieldsPolicy
 import za.co.absa.cobrix.cobol.parser.policies.StringTrimmingPolicy.StringTrimmingPolicy
-import za.co.absa.cobrix.cobol.parser.policies.{CommentPolicy, StringTrimmingPolicy}
+import za.co.absa.cobrix.cobol.parser.policies.{CommentPolicy, DebugFieldsPolicy, StringTrimmingPolicy}
 
 import scala.annotation.tailrec
 import scala.collection.immutable.HashMap
@@ -69,7 +70,7 @@ object CopybookParser {
     * @param isUtf16BigEndian     If true UTF-16 strings are considered big-endian.
     * @param floatingPointFormat  A format of floating-point numbers (IBM/IEEE754)
     * @param nonTerminals         A list of non-terminals that should be extracted as strings
-    * @param isDebug              If true, additional debug fields will be added alongside all non-redefined primitives
+    * @param debugFieldsPolicy    Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
     * @return Seq[Group] where a group is a record inside the copybook
     */
   def parse(copyBookContents: String,
@@ -85,7 +86,7 @@ object CopybookParser {
             floatingPointFormat: FloatingPointFormat = FloatingPointFormat.IBM,
             nonTerminals: Seq[String] = Nil,
             occursHandlers: Map[String, Map[String, Int]] = Map(),
-            isDebug: Boolean = false): Copybook = {
+            debugFieldsPolicy: DebugFieldsPolicy = DebugFieldsPolicy.NoDebug): Copybook = {
     parseTree(dataEnncoding,
       copyBookContents,
       dropGroupFillers,
@@ -99,7 +100,7 @@ object CopybookParser {
       floatingPointFormat,
       nonTerminals,
       occursHandlers,
-      isDebug)
+      debugFieldsPolicy)
   }
 
   /**
@@ -116,7 +117,7 @@ object CopybookParser {
     * @param isUtf16BigEndian     If true UTF-16 strings are considered big-endian.
     * @param floatingPointFormat  A format of floating-point numbers (IBM/IEEE754)
     * @param nonTerminals         A list of non-terminals that should be extracted as strings
-    * @param isDebug              If true, additional debug fields will be added alongside all non-redefined primitives
+    * @param debugFieldsPolicy    Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
     * @return Seq[Group] where a group is a record inside the copybook
     */
   def parseTree(copyBookContents: String,
@@ -131,7 +132,7 @@ object CopybookParser {
                 floatingPointFormat: FloatingPointFormat = FloatingPointFormat.IBM,
                 nonTerminals: Seq[String] = Nil,
                 occursHandlers: Map[String, Map[String, Int]] = Map(),
-                isDebug: Boolean = false): Copybook = {
+                debugFieldsPolicy: DebugFieldsPolicy = DebugFieldsPolicy.NoDebug): Copybook = {
     parseTree(EBCDIC,
       copyBookContents,
       dropGroupFillers,
@@ -145,7 +146,7 @@ object CopybookParser {
       floatingPointFormat,
       nonTerminals,
       occursHandlers,
-      isDebug)
+      debugFieldsPolicy)
   }
 
   /**
@@ -164,7 +165,7 @@ object CopybookParser {
     * @param isUtf16BigEndian     If true UTF-16 strings are considered big-endian.
     * @param floatingPointFormat  A format of floating-point numbers (IBM/IEEE754)
     * @param nonTerminals         A list of non-terminals that should be extracted as strings
-    * @param isDebug              If true, additional debug fields will be added alongside all non-redefined primitives
+    * @param debugFieldsPolicy    Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
     * @return Seq[Group] where a group is a record inside the copybook
     */
   @throws(classOf[SyntaxErrorException])
@@ -181,7 +182,7 @@ object CopybookParser {
                 floatingPointFormat: FloatingPointFormat,
                 nonTerminals: Seq[String],
                 occursHandlers: Map[String, Map[String, Int]],
-                isDebug: Boolean): Copybook = {
+                debugFieldsPolicy: DebugFieldsPolicy): Copybook = {
 
     val schemaANTLR: CopybookAST = ANTLRParser.parse(copyBookContents, enc, stringTrimmingPolicy, commentPolicy, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat)
 
@@ -205,7 +206,7 @@ object CopybookParser {
                     occursHandlers
                   )
                 ), segmentRedefines), correctedFieldParentMap
-            ), isDebug
+            ), debugFieldsPolicy
           )
         )
       } else {
@@ -220,7 +221,7 @@ object CopybookParser {
                     occursHandlers
                   )
                 ), segmentRedefines), correctedFieldParentMap
-            ), isDebug
+            ), debugFieldsPolicy
           )
         )
       }
@@ -835,20 +836,32 @@ object CopybookParser {
    * Add debugging fields if debug mode is enabled
    *
    * @param ast                An AST as a set of copybook records
-   * @param addDebuggingFields If true, debugging fields will be added
+   * @param debugFieldsPolicy  Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
    * @return The same AST with debugging fields added
    */
-  private def addDebugFields(ast: CopybookAST, addDebuggingFields: Boolean): CopybookAST = {
+  private def addDebugFields(ast: CopybookAST, debugFieldsPolicy: DebugFieldsPolicy): CopybookAST = {
     def getDebugField(field: Primitive): Primitive = {
+      val debugEncoding = debugFieldsPolicy match {
+        case DebugFieldsPolicy.HexValue => HEX
+        case DebugFieldsPolicy.RawValue => RAW
+        case _ => throw new IllegalStateException(s"Unexpected debug fields policy: $debugFieldsPolicy.")
+      }
+
+      val debugDecoder = debugFieldsPolicy match {
+        case DebugFieldsPolicy.HexValue => StringDecoders.decodeHex _
+        case DebugFieldsPolicy.RawValue => StringDecoders.decodeRaw _
+        case _ => throw new IllegalStateException(s"Unexpected debug fields policy: $debugFieldsPolicy.")
+      }
+
       val size = field.binaryProperties.dataSize
       val debugFieldName = field.name + "_debug"
-      val debugDataType = AlphaNumeric(s"X($size)", size, None, Some(HEX), None)
+      val debugDataType = AlphaNumeric(s"X($size)", size, None, Some(debugEncoding), None)
 
       val debugField = field.copy(name = debugFieldName,
         dataType = debugDataType,
         redefines = Some(field.name),
         isDependee = false,
-        decode = StringDecoders.decodeHex) (parent = field.parent)
+        decode = debugDecoder) (parent = field.parent)
 
       debugField
     }
@@ -870,7 +883,7 @@ object CopybookParser {
       group.withUpdatedChildren(newChildren)
     }
 
-    if (addDebuggingFields) {
+    if (debugFieldsPolicy != DebugFieldsPolicy.NoDebug) {
       processGroup(ast)
     } else {
       ast

diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/DecoderSelector.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/DecoderSelector.scala
@@ -86,6 +86,8 @@ object DecoderSelector {
         StringDecoders.decodeUtf16String(_, getStringStrimmingType(stringTrimmingPolicy), isUtf16BigEndian)
       case HEX =>
         StringDecoders.decodeHex
+      case RAW =>
+        StringDecoders.decodeRaw
     }
   }
 

diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/StringDecoders.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/decoders/StringDecoders.scala
@@ -131,6 +131,14 @@ object StringDecoders {
     new String(hexChars)
   }
 
+  /**
+    * A decoder that doesn't decode, but just passes the bytes the way they are.
+    *
+    * @param bytes        A byte array that represents the binary data
+    * @return A string representation of the bytes
+    */
+  def decodeRaw(bytes: Array[Byte]): Array[Byte] = bytes
+
   /**
     * A decoder for any EBCDIC uncompressed numbers supporting
     * <ul>

diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/Encoding.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/encoding/Encoding.scala
@@ -27,3 +27,5 @@ case object ASCII extends Encoding
 case object UTF16 extends Encoding
 
 case object HEX extends Encoding
+
+case object RAW extends Encoding
diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/policies/DebugFieldsPolicy.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/parser/policies/DebugFieldsPolicy.scala
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2018 ABSA Group Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package za.co.absa.cobrix.cobol.parser.policies
+
+object DebugFieldsPolicy extends Enumeration {
+  type DebugFieldsPolicy = Value
+
+  val NoDebug, HexValue, RawValue = Value
+
+  def withNameOpt(s: String): Option[Value] = {
+    val exactNames = values.find(_.toString == s)
+    if (exactNames.isEmpty) {
+      val sLowerCase = s.toLowerCase()
+      if (sLowerCase == "none" || sLowerCase == "false" || sLowerCase.isEmpty) {
+        Some(NoDebug)
+      } else if (sLowerCase == "hex" || sLowerCase == "true") {
+        Some(HexValue)
+      } else if (sLowerCase == "binary" || sLowerCase == "raw") {
+        Some(RawValue)
+      } else {
+        None
+      }
+    } else {
+      exactNames
+    }
+  }
+
+}
diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/FixedLenNestedReader.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/FixedLenNestedReader.scala
@@ -107,7 +107,7 @@ class FixedLenNestedReader[T: ClassTag](
         floatingPointFormat,
         nonTerminals,
         occursMappings,
-        readerProperties.isDebug)
+        readerProperties.debugFieldsPolicy)
     else
       Copybook.merge(
         copyBookContents.map(
@@ -124,7 +124,7 @@ class FixedLenNestedReader[T: ClassTag](
             floatingPointFormat,
             nonTerminals,
             occursMappings,
-            readerProperties.isDebug)
+            readerProperties.debugFieldsPolicy)
         )
       )
     new CobolSchema(schema, schemaRetentionPolicy, "",false)

diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/VarLenNestedReader.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/VarLenNestedReader.scala
@@ -157,7 +157,7 @@ class VarLenNestedReader[T : ClassTag](copybookContents: Seq[String],
         readerProperties.floatingPointFormat,
         readerProperties.nonTerminals,
         readerProperties.occursMappings,
-        readerProperties.isDebug)
+        readerProperties.debugFieldsPolicy)
     else
       Copybook.merge(copyBookContents.map(
         CopybookParser.parseTree(encoding,
@@ -173,7 +173,7 @@ class VarLenNestedReader[T : ClassTag](copybookContents: Seq[String],
           readerProperties.floatingPointFormat,
           nonTerminals = readerProperties.nonTerminals,
           readerProperties.occursMappings,
-          readerProperties.isDebug)
+          readerProperties.debugFieldsPolicy)
       ))
     val segIdFieldCount = readerProperties.multisegment.map(p => p.segmentLevelIds.size).getOrElse(0)
     val segmentIdPrefix = readerProperties.multisegment.map(p => p.segmentIdPrefix).getOrElse("")

diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/parameters/CobolParameters.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/parameters/CobolParameters.scala
@@ -18,6 +18,7 @@ package za.co.absa.cobrix.cobol.reader.parameters
 
 import za.co.absa.cobrix.cobol.parser.decoders.FloatingPointFormat.FloatingPointFormat
 import za.co.absa.cobrix.cobol.parser.policies.CommentPolicy
+import za.co.absa.cobrix.cobol.parser.policies.DebugFieldsPolicy.DebugFieldsPolicy
 import za.co.absa.cobrix.cobol.parser.policies.StringTrimmingPolicy.StringTrimmingPolicy
 import za.co.absa.cobrix.cobol.reader.policies.SchemaRetentionPolicy.SchemaRetentionPolicy
 
@@ -44,7 +45,7 @@ import za.co.absa.cobrix.cobol.reader.policies.SchemaRetentionPolicy.SchemaReten
   * @param commentPolicy          A comment truncation policy
   * @param dropGroupFillers       If true the parser will drop all FILLER fields, even GROUP FILLERS that have non-FILLER nested fields
   * @param nonTerminals           A list of non-terminals (GROUPS) to combine and parse as primitive fields
-  * @param isDebug                If true, additional debugging fields will be added
+  * @param debugFieldsPolicy      Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
   * @param debugIgnoreFileSize    If true the fixed length file reader won't check file size divisibility. Useful for debugging binary file / copybook mismatches.
   */
 case class CobolParameters(
@@ -69,6 +70,6 @@ case class CobolParameters(
                             dropGroupFillers:      Boolean,
                             nonTerminals:          Seq[String],
                             occursMappings:        Map[String, Map[String, Int]],
-                            isDebug:               Boolean,
+                            debugFieldsPolicy:     DebugFieldsPolicy,
                             debugIgnoreFileSize:   Boolean
                           )
diff --git a/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/parameters/ReaderParameters.scala b/cobol-parser/src/main/scala/za/co/absa/cobrix/cobol/reader/parameters/ReaderParameters.scala
@@ -18,8 +18,9 @@ package za.co.absa.cobrix.cobol.reader.parameters
 
 import za.co.absa.cobrix.cobol.parser.decoders.FloatingPointFormat
 import za.co.absa.cobrix.cobol.parser.decoders.FloatingPointFormat.FloatingPointFormat
+import za.co.absa.cobrix.cobol.parser.policies.DebugFieldsPolicy.DebugFieldsPolicy
 import za.co.absa.cobrix.cobol.parser.policies.StringTrimmingPolicy.StringTrimmingPolicy
-import za.co.absa.cobrix.cobol.parser.policies.{CommentPolicy, StringTrimmingPolicy}
+import za.co.absa.cobrix.cobol.parser.policies.{CommentPolicy, DebugFieldsPolicy, StringTrimmingPolicy}
 import za.co.absa.cobrix.cobol.reader.policies.SchemaRetentionPolicy.SchemaRetentionPolicy
 import za.co.absa.cobrix.cobol.reader.policies.SchemaRetentionPolicy
 
@@ -53,7 +54,7 @@ import za.co.absa.cobrix.cobol.reader.policies.SchemaRetentionPolicy
   * @param commentPolicy           A comment truncation policy
   * @param dropGroupFillers        If true the parser will drop all FILLER fields, even GROUP FILLERS that have non-FILLER nested fields
   * @param nonTerminals            A list of non-terminals (GROUPS) to combine and parse as primitive fields
-  * @param isDebug                 If true, additional debugging fields will be added
+  * @param debugFieldsPolicy       Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
   * @param recordHeaderParser      A parser used to parse data field record headers
   * @param rhpAdditionalInfo       An optional additional option string passed to a custom record header parser
   * @param inputFileNameColumn     A column name to add to the dataframe. The column will contain input file name for each record similar to 'input_file_name()' function
@@ -87,7 +88,7 @@ case class ReaderParameters(
                              dropGroupFillers:        Boolean = false,
                              nonTerminals:            Seq[String] = Nil,
                              occursMappings:          Map[String, Map[String, Int]] = Map(),
-                             isDebug:                 Boolean = false,
+                             debugFieldsPolicy:       DebugFieldsPolicy = DebugFieldsPolicy.NoDebug,
                              recordHeaderParser:      Option[String] = None,
                              rhpAdditionalInfo:       Option[String] = None,
                              inputFileNameColumn:     String = ""