Skip to content

Commit

Permalink
#291 Add an ability to generate raw binary debugging debug fields.
Browse files Browse the repository at this point in the history
  • Loading branch information
yruslan committed May 29, 2020
1 parent bfcde0d commit 257d51c
Show file tree
Hide file tree
Showing 18 changed files with 2,549 additions and 36 deletions.
Expand Up @@ -26,10 +26,11 @@ import za.co.absa.cobrix.cobol.parser.common.Constants
import za.co.absa.cobrix.cobol.parser.decoders.FloatingPointFormat.FloatingPointFormat
import za.co.absa.cobrix.cobol.parser.decoders.{DecoderSelector, FloatingPointFormat, StringDecoders}
import za.co.absa.cobrix.cobol.parser.encoding.codepage.{CodePage, CodePageCommon}
import za.co.absa.cobrix.cobol.parser.encoding.{EBCDIC, Encoding, HEX}
import za.co.absa.cobrix.cobol.parser.encoding.{EBCDIC, Encoding, HEX, RAW}
import za.co.absa.cobrix.cobol.parser.exceptions.SyntaxErrorException
import za.co.absa.cobrix.cobol.parser.policies.DebugFieldsPolicy.DebugFieldsPolicy
import za.co.absa.cobrix.cobol.parser.policies.StringTrimmingPolicy.StringTrimmingPolicy
import za.co.absa.cobrix.cobol.parser.policies.{CommentPolicy, StringTrimmingPolicy}
import za.co.absa.cobrix.cobol.parser.policies.{CommentPolicy, DebugFieldsPolicy, StringTrimmingPolicy}

import scala.annotation.tailrec
import scala.collection.immutable.HashMap
Expand Down Expand Up @@ -69,7 +70,7 @@ object CopybookParser {
* @param isUtf16BigEndian If true UTF-16 strings are considered big-endian.
* @param floatingPointFormat A format of floating-point numbers (IBM/IEEE754)
* @param nonTerminals A list of non-terminals that should be extracted as strings
* @param isDebug If true, additional debug fields will be added alongside all non-redefined primitives
* @param debugFieldsPolicy Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
* @return Seq[Group] where a group is a record inside the copybook
*/
def parse(copyBookContents: String,
Expand All @@ -85,7 +86,7 @@ object CopybookParser {
floatingPointFormat: FloatingPointFormat = FloatingPointFormat.IBM,
nonTerminals: Seq[String] = Nil,
occursHandlers: Map[String, Map[String, Int]] = Map(),
isDebug: Boolean = false): Copybook = {
debugFieldsPolicy: DebugFieldsPolicy = DebugFieldsPolicy.NoDebug): Copybook = {
parseTree(dataEnncoding,
copyBookContents,
dropGroupFillers,
Expand All @@ -99,7 +100,7 @@ object CopybookParser {
floatingPointFormat,
nonTerminals,
occursHandlers,
isDebug)
debugFieldsPolicy)
}

/**
Expand All @@ -116,7 +117,7 @@ object CopybookParser {
* @param isUtf16BigEndian If true UTF-16 strings are considered big-endian.
* @param floatingPointFormat A format of floating-point numbers (IBM/IEEE754)
* @param nonTerminals A list of non-terminals that should be extracted as strings
* @param isDebug If true, additional debug fields will be added alongside all non-redefined primitives
* @param debugFieldsPolicy Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
* @return Seq[Group] where a group is a record inside the copybook
*/
def parseTree(copyBookContents: String,
Expand All @@ -131,7 +132,7 @@ object CopybookParser {
floatingPointFormat: FloatingPointFormat = FloatingPointFormat.IBM,
nonTerminals: Seq[String] = Nil,
occursHandlers: Map[String, Map[String, Int]] = Map(),
isDebug: Boolean = false): Copybook = {
debugFieldsPolicy: DebugFieldsPolicy = DebugFieldsPolicy.NoDebug): Copybook = {
parseTree(EBCDIC,
copyBookContents,
dropGroupFillers,
Expand All @@ -145,7 +146,7 @@ object CopybookParser {
floatingPointFormat,
nonTerminals,
occursHandlers,
isDebug)
debugFieldsPolicy)
}

/**
Expand All @@ -164,7 +165,7 @@ object CopybookParser {
* @param isUtf16BigEndian If true UTF-16 strings are considered big-endian.
* @param floatingPointFormat A format of floating-point numbers (IBM/IEEE754)
* @param nonTerminals A list of non-terminals that should be extracted as strings
* @param isDebug If true, additional debug fields will be added alongside all non-redefined primitives
* @param debugFieldsPolicy Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
* @return Seq[Group] where a group is a record inside the copybook
*/
@throws(classOf[SyntaxErrorException])
Expand All @@ -181,7 +182,7 @@ object CopybookParser {
floatingPointFormat: FloatingPointFormat,
nonTerminals: Seq[String],
occursHandlers: Map[String, Map[String, Int]],
isDebug: Boolean): Copybook = {
debugFieldsPolicy: DebugFieldsPolicy): Copybook = {

val schemaANTLR: CopybookAST = ANTLRParser.parse(copyBookContents, enc, stringTrimmingPolicy, commentPolicy, ebcdicCodePage, asciiCharset, isUtf16BigEndian, floatingPointFormat)

Expand All @@ -205,7 +206,7 @@ object CopybookParser {
occursHandlers
)
), segmentRedefines), correctedFieldParentMap
), isDebug
), debugFieldsPolicy
)
)
} else {
Expand All @@ -220,7 +221,7 @@ object CopybookParser {
occursHandlers
)
), segmentRedefines), correctedFieldParentMap
), isDebug
), debugFieldsPolicy
)
)
}
Expand Down Expand Up @@ -835,20 +836,32 @@ object CopybookParser {
* Add debugging fields if debug mode is enabled
*
* @param ast An AST as a set of copybook records
* @param addDebuggingFields If true, debugging fields will be added
* @param debugFieldsPolicy Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
* @return The same AST with debugging fields added
*/
private def addDebugFields(ast: CopybookAST, addDebuggingFields: Boolean): CopybookAST = {
private def addDebugFields(ast: CopybookAST, debugFieldsPolicy: DebugFieldsPolicy): CopybookAST = {
def getDebugField(field: Primitive): Primitive = {
val debugEncoding = debugFieldsPolicy match {
case DebugFieldsPolicy.HexValue => HEX
case DebugFieldsPolicy.RawValue => RAW
case _ => throw new IllegalStateException(s"Unexpected debug fields policy: $debugFieldsPolicy.")
}

val debugDecoder = debugFieldsPolicy match {
case DebugFieldsPolicy.HexValue => StringDecoders.decodeHex _
case DebugFieldsPolicy.RawValue => StringDecoders.decodeRaw _
case _ => throw new IllegalStateException(s"Unexpected debug fields policy: $debugFieldsPolicy.")
}

val size = field.binaryProperties.dataSize
val debugFieldName = field.name + "_debug"
val debugDataType = AlphaNumeric(s"X($size)", size, None, Some(HEX), None)
val debugDataType = AlphaNumeric(s"X($size)", size, None, Some(debugEncoding), None)

val debugField = field.copy(name = debugFieldName,
dataType = debugDataType,
redefines = Some(field.name),
isDependee = false,
decode = StringDecoders.decodeHex) (parent = field.parent)
decode = debugDecoder) (parent = field.parent)

debugField
}
Expand All @@ -870,7 +883,7 @@ object CopybookParser {
group.withUpdatedChildren(newChildren)
}

if (addDebuggingFields) {
if (debugFieldsPolicy != DebugFieldsPolicy.NoDebug) {
processGroup(ast)
} else {
ast
Expand Down
Expand Up @@ -86,6 +86,8 @@ object DecoderSelector {
StringDecoders.decodeUtf16String(_, getStringStrimmingType(stringTrimmingPolicy), isUtf16BigEndian)
case HEX =>
StringDecoders.decodeHex
case RAW =>
StringDecoders.decodeRaw
}
}

Expand Down
Expand Up @@ -131,6 +131,14 @@ object StringDecoders {
new String(hexChars)
}

/**
* A decoder that doesn't decode, but just passes the bytes the way they are.
*
* @param bytes A byte array that represents the binary data
* @return A string representation of the bytes
*/
def decodeRaw(bytes: Array[Byte]): Array[Byte] = bytes

/**
* A decoder for any EBCDIC uncompressed numbers supporting
* <ul>
Expand Down
Expand Up @@ -27,3 +27,5 @@ case object ASCII extends Encoding
case object UTF16 extends Encoding

case object HEX extends Encoding

case object RAW extends Encoding
@@ -0,0 +1,42 @@
/*
* Copyright 2018 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package za.co.absa.cobrix.cobol.parser.policies

object DebugFieldsPolicy extends Enumeration {
type DebugFieldsPolicy = Value

val NoDebug, HexValue, RawValue = Value

def withNameOpt(s: String): Option[Value] = {
val exactNames = values.find(_.toString == s)
if (exactNames.isEmpty) {
val sLowerCase = s.toLowerCase()
if (sLowerCase == "none" || sLowerCase == "false" || sLowerCase.isEmpty) {
Some(NoDebug)
} else if (sLowerCase == "hex" || sLowerCase == "true") {
Some(HexValue)
} else if (sLowerCase == "binary" || sLowerCase == "raw") {
Some(RawValue)
} else {
None
}
} else {
exactNames
}
}

}
Expand Up @@ -107,7 +107,7 @@ class FixedLenNestedReader[T: ClassTag](
floatingPointFormat,
nonTerminals,
occursMappings,
readerProperties.isDebug)
readerProperties.debugFieldsPolicy)
else
Copybook.merge(
copyBookContents.map(
Expand All @@ -124,7 +124,7 @@ class FixedLenNestedReader[T: ClassTag](
floatingPointFormat,
nonTerminals,
occursMappings,
readerProperties.isDebug)
readerProperties.debugFieldsPolicy)
)
)
new CobolSchema(schema, schemaRetentionPolicy, "",false)
Expand Down
Expand Up @@ -157,7 +157,7 @@ class VarLenNestedReader[T : ClassTag](copybookContents: Seq[String],
readerProperties.floatingPointFormat,
readerProperties.nonTerminals,
readerProperties.occursMappings,
readerProperties.isDebug)
readerProperties.debugFieldsPolicy)
else
Copybook.merge(copyBookContents.map(
CopybookParser.parseTree(encoding,
Expand All @@ -173,7 +173,7 @@ class VarLenNestedReader[T : ClassTag](copybookContents: Seq[String],
readerProperties.floatingPointFormat,
nonTerminals = readerProperties.nonTerminals,
readerProperties.occursMappings,
readerProperties.isDebug)
readerProperties.debugFieldsPolicy)
))
val segIdFieldCount = readerProperties.multisegment.map(p => p.segmentLevelIds.size).getOrElse(0)
val segmentIdPrefix = readerProperties.multisegment.map(p => p.segmentIdPrefix).getOrElse("")
Expand Down
Expand Up @@ -18,6 +18,7 @@ package za.co.absa.cobrix.cobol.reader.parameters

import za.co.absa.cobrix.cobol.parser.decoders.FloatingPointFormat.FloatingPointFormat
import za.co.absa.cobrix.cobol.parser.policies.CommentPolicy
import za.co.absa.cobrix.cobol.parser.policies.DebugFieldsPolicy.DebugFieldsPolicy
import za.co.absa.cobrix.cobol.parser.policies.StringTrimmingPolicy.StringTrimmingPolicy
import za.co.absa.cobrix.cobol.reader.policies.SchemaRetentionPolicy.SchemaRetentionPolicy

Expand All @@ -44,7 +45,7 @@ import za.co.absa.cobrix.cobol.reader.policies.SchemaRetentionPolicy.SchemaReten
* @param commentPolicy A comment truncation policy
* @param dropGroupFillers If true the parser will drop all FILLER fields, even GROUP FILLERS that have non-FILLER nested fields
* @param nonTerminals A list of non-terminals (GROUPS) to combine and parse as primitive fields
* @param isDebug If true, additional debugging fields will be added
* @param debugFieldsPolicy Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
* @param debugIgnoreFileSize If true the fixed length file reader won't check file size divisibility. Useful for debugging binary file / copybook mismatches.
*/
case class CobolParameters(
Expand All @@ -69,6 +70,6 @@ case class CobolParameters(
dropGroupFillers: Boolean,
nonTerminals: Seq[String],
occursMappings: Map[String, Map[String, Int]],
isDebug: Boolean,
debugFieldsPolicy: DebugFieldsPolicy,
debugIgnoreFileSize: Boolean
)
Expand Up @@ -18,8 +18,9 @@ package za.co.absa.cobrix.cobol.reader.parameters

import za.co.absa.cobrix.cobol.parser.decoders.FloatingPointFormat
import za.co.absa.cobrix.cobol.parser.decoders.FloatingPointFormat.FloatingPointFormat
import za.co.absa.cobrix.cobol.parser.policies.DebugFieldsPolicy.DebugFieldsPolicy
import za.co.absa.cobrix.cobol.parser.policies.StringTrimmingPolicy.StringTrimmingPolicy
import za.co.absa.cobrix.cobol.parser.policies.{CommentPolicy, StringTrimmingPolicy}
import za.co.absa.cobrix.cobol.parser.policies.{CommentPolicy, DebugFieldsPolicy, StringTrimmingPolicy}
import za.co.absa.cobrix.cobol.reader.policies.SchemaRetentionPolicy.SchemaRetentionPolicy
import za.co.absa.cobrix.cobol.reader.policies.SchemaRetentionPolicy

Expand Down Expand Up @@ -53,7 +54,7 @@ import za.co.absa.cobrix.cobol.reader.policies.SchemaRetentionPolicy
* @param commentPolicy A comment truncation policy
* @param dropGroupFillers If true the parser will drop all FILLER fields, even GROUP FILLERS that have non-FILLER nested fields
* @param nonTerminals A list of non-terminals (GROUPS) to combine and parse as primitive fields
* @param isDebug If true, additional debugging fields will be added
* @param debugFieldsPolicy Specifies if debugging fields need to be added and what should they contain (false, hex, raw).
* @param recordHeaderParser A parser used to parse data field record headers
* @param rhpAdditionalInfo An optional additional option string passed to a custom record header parser
* @param inputFileNameColumn A column name to add to the dataframe. The column will contain input file name for each record similar to 'input_file_name()' function
Expand Down Expand Up @@ -87,7 +88,7 @@ case class ReaderParameters(
dropGroupFillers: Boolean = false,
nonTerminals: Seq[String] = Nil,
occursMappings: Map[String, Map[String, Int]] = Map(),
isDebug: Boolean = false,
debugFieldsPolicy: DebugFieldsPolicy = DebugFieldsPolicy.NoDebug,
recordHeaderParser: Option[String] = None,
rhpAdditionalInfo: Option[String] = None,
inputFileNameColumn: String = ""
Expand Down

0 comments on commit 257d51c

Please sign in to comment.