-
Notifications
You must be signed in to change notification settings - Fork 6
/
transformer.go
318 lines (271 loc) · 9.77 KB
/
transformer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
// Package transform implements code which can use a JSON schema with transform sections to convert a JSON file to
// match the schema format.
package transform
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"strconv"
"strings"
"github.com/antchfx/xmlquery"
"github.com/GannettDigital/jsonparser"
"github.com/GannettDigital/jstransform/jsonschema"
)
// inputFormat denotes the type of transform to perfrom, the options are 'JSON' or 'XML'
type inputFormat string
const (
jsonInput = inputFormat("JSON")
xmlInput = inputFormat("XML")
)
// JSONTransformer - a type implemented by the jstransform.Transformer
type JSONTransformer interface {
Transform(raw json.RawMessage) (json.RawMessage, error)
}
// Transformer uses a JSON schema and the transform sections within it to take a set of JSON and transform it to
// matching the schema.
// More details on the transform section of the schema are found at
// https://github.com/GannettDigital/jstransform/blob/master/transform.adoc
type Transformer struct {
schema *jsonschema.Schema
transformIdentifier string // Used to select the proper transform Instructions
root instanceTransformer
format inputFormat
}
// NewTransformer returns a Transformer using the schema given.
// The transformIdentifier is used to select the appropriate transform section from the schema.
// It expects the transforms to be performed on JSON data
func NewTransformer(schema *jsonschema.Schema, tranformIdentifier string) (*Transformer, error) {
return newTransformer(schema, tranformIdentifier, jsonInput)
}
// NewXMLTransformer returns a Transformer using the schema given.
// The transformIdentifier is used to select the appropriate transform section from the schema.
// It expects the transforms to be performed on XML data
func NewXMLTransformer(schema *jsonschema.Schema, tranformIdentifier string) (*Transformer, error) {
return newTransformer(schema, tranformIdentifier, xmlInput)
}
func newTransformer(schema *jsonschema.Schema, tranformIdentifier string, format inputFormat) (*Transformer, error) {
tr := &Transformer{schema: schema, transformIdentifier: tranformIdentifier, format: format}
emptyJSON := []byte(`{}`)
var err error
if schema.Properties != nil {
tr.root, err = newObjectTransformer("$", tranformIdentifier, emptyJSON, format)
} else if schema.Items != nil {
tr.root, err = newArrayTransformer("$", tranformIdentifier, emptyJSON, format)
} else {
return nil, errors.New("no Properties nor Items found for schema")
}
if err != nil {
return nil, fmt.Errorf("failed initializing root transformer: %v", err)
}
if err := jsonschema.WalkRaw(schema, tr.walker); err != nil {
return nil, err
}
return tr, nil
}
// Transform takes the provided JSON and converts the JSON to match the pre-defined JSON Schema using the transform
// sections in the schema.
//
// By default fields with no Transform section but with matching path and type are copied verbatim into the new
// JSON structure. Fields which are missing from the input are set to a default value in the output.
//
// Errors are returned for failures to perform operations but are not returned for empty fields which are either
// omitted from the output or set to an empty value.
//
// Validation of the output against the schema is the final step in the process.
func (tr *Transformer) Transform(raw json.RawMessage) (json.RawMessage, error) {
if tr.format == jsonInput {
return tr.jsonTransform(raw)
}
if tr.format == xmlInput {
return tr.xmlTransform(raw)
}
return nil, fmt.Errorf("unknown transform type %s, must be 'JSON' or 'XML'", tr.format)
}
func (tr *Transformer) jsonTransform(raw json.RawMessage) (json.RawMessage, error) {
var in interface{}
if err := json.Unmarshal(raw, &in); err != nil {
return nil, fmt.Errorf("failed to parse input JSON: %v", err)
}
transformed, err := tr.root.transform(in, nil)
if err != nil {
return nil, fmt.Errorf("failed transformation: %v", err)
}
out, err := json.Marshal(transformed)
if err != nil {
return nil, fmt.Errorf("failed to JSON marsal transformed data: %v", err)
}
valid, err := tr.schema.Validate(out)
if err != nil {
return nil, fmt.Errorf("input successfully transformed but did not match schema: %v", err)
}
if !valid {
return nil, errors.New("schema validation of the transformed result reports invalid")
}
return out, nil
}
func (tr *Transformer) xmlTransform(raw []byte) ([]byte, error) {
xmlDoc, err := xmlquery.Parse(bytes.NewReader(raw))
if err != nil {
return nil, fmt.Errorf("failed to parse input XML: %v", err)
}
transformed, err := tr.root.transform(xmlDoc, nil)
if err != nil {
return nil, fmt.Errorf("failed transformation: %v", err)
}
out, err := json.Marshal(transformed)
if err != nil {
return nil, fmt.Errorf("failed to JSON marsal transformed data: %v", err)
}
valid, err := tr.schema.Validate(out)
if err != nil {
return nil, fmt.Errorf("transformed result validation error: %v", err)
}
if !valid {
return nil, errors.New("schema validation of the transformed result reports invalid")
}
return out, nil
}
// findParent walks the instanceTransformer tree to find the parent of the given path
func (tr *Transformer) findParent(path string) (instanceTransformer, error) {
path = strings.Replace(path, "[", ".[", -1)
splits := strings.Split(path, ".")
if splits[0] != "$" {
// TODO this will probably choke on a root level array
return nil, errors.New("paths must start with '$'")
}
parentSplits := splits[1 : len(splits)-1]
parent := tr.root
for _, sp := range parentSplits {
if sp == "[*]" {
parent = parent.child()
continue
}
parent = parent.selectChild(sp)
}
return parent, nil
}
// walker is a WalkFunc for the Transformer which builds an representation of the fields and transforms in the schema.
// This is later used to do the actual transform for incoming data
func (tr *Transformer) walker(path string, value json.RawMessage) error {
instanceType, err := jsonparser.GetString(value, "type")
if err != nil {
return fmt.Errorf("failed to extract instance type: %v", err)
}
var iTransformer instanceTransformer
switch instanceType {
case "object":
iTransformer, err = newObjectTransformer(path, tr.transformIdentifier, value, tr.format)
case "array":
iTransformer, err = newArrayTransformer(path, tr.transformIdentifier, value, tr.format)
default:
iTransformer, err = newScalarTransformer(path, tr.transformIdentifier, value, instanceType, tr.format)
}
if err != nil {
return fmt.Errorf("failed to initialize transformer: %v", err)
}
parent, err := tr.findParent(path)
if err != nil {
return err
}
if err := parent.addChild(iTransformer); err != nil {
return err
}
return nil
}
// saveInTree is used recursively to add values the tree based on the path even if the parents are nil.
func saveInTree(tree map[string]interface{}, path string, value interface{}) error {
if value == nil {
return nil
}
splits := strings.Split(path, ".")
if splits[0] == "$" {
path = path[2:]
splits = splits[1:]
}
if len(splits) == 1 {
return saveLeaf(tree, splits[0], value)
}
arraySplits := strings.Split(splits[0], "[")
if len(arraySplits) != 1 { // the case of an array or nested arrays with an object in them
var sValue []interface{}
if rawSlice, ok := tree[arraySplits[0]]; ok {
sValue = rawSlice.([]interface{})
}
newTreeMap := make(map[string]interface{})
newValue, err := saveInSlice(sValue, arraySplits[1:], newTreeMap)
if err != nil {
return err
}
tree[arraySplits[0]] = newValue
return saveInTree(newTreeMap, strings.Join(splits[1:], "."), value)
}
var newTreeMap map[string]interface{}
newTree, ok := tree[splits[0]]
if !ok || newTree == nil {
newTreeMap = make(map[string]interface{})
} else {
newTreeMap, ok = newTree.(map[string]interface{})
if !ok {
return fmt.Errorf("value at %q is not a map[string]interface{}", splits[0])
}
}
tree[splits[0]] = newTreeMap
return saveInTree(newTreeMap, strings.Join(splits[1:], "."), value)
}
// saveLeaf will save a leaf value in the tree at the given path. If the path specifies an array or set of nested
// arrays it will build the array items as needed to reach the specified index. New array items are created as nil.
// Any nested array items will be recursively treated the same way.
func saveLeaf(tree map[string]interface{}, path string, value interface{}) error {
arraySplits := strings.Split(path, "[")
if len(arraySplits) == 1 {
tree[path] = value
return nil
}
var sValue []interface{}
if rawSlice, ok := tree[arraySplits[0]]; ok {
sValue = rawSlice.([]interface{})
}
newValue, err := saveInSlice(sValue, arraySplits[1:], value)
if err != nil {
return err
}
tree[arraySplits[0]] = newValue
return nil
}
func saveInSlice(current []interface{}, arraySplits []string, value interface{}) ([]interface{}, error) {
index, err := strconv.Atoi(strings.Trim(arraySplits[0], "]"))
if err != nil {
return nil, fmt.Errorf("failed to determine index of %q", arraySplits[0])
}
if current == nil {
current = make([]interface{}, 0, index)
}
// fill up the slice slots with nil if the slice isn't the right size
for j := len(current); j <= index; j++ {
current = append(current, nil)
}
if len(arraySplits) == 1 {
// if this is the last array split save the value and break
if newValue, ok := value.(map[string]interface{}); ok { // special case combine existing values into new value if a map
if oldValue, ok := current[index].(map[string]interface{}); ok {
for k, v := range oldValue {
if _, ok := newValue[k]; !ok {
newValue[k] = v
}
}
value = newValue
}
}
current[index] = value
return current, nil
}
// recurse as needed
nested, ok := current[index].([]interface{})
if !ok {
nested = nil
}
newValue, err := saveInSlice(nested, arraySplits[1:], value)
current[index] = newValue
return current, nil
}