-
Notifications
You must be signed in to change notification settings - Fork 70
/
GrammarUtils.java
118 lines (107 loc) · 4.33 KB
/
GrammarUtils.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/**
* Copyright (C) 2016-2021 Expedia, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hotels.bdp.waggledance.mapping.service;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
public final class GrammarUtils {
private static final String OR_SEPARATOR = "|";
private static final Splitter OR_SPLITTER = Splitter.on(OR_SEPARATOR);
private static final Joiner OR_JOINER = Joiner.on(OR_SEPARATOR);
private GrammarUtils() {}
@VisibleForTesting
static String[] splitPattern(String prefix, String pattern) {
if (pattern.startsWith(prefix)) {
return new String[] { prefix, pattern.substring(prefix.length()) };
}
// Find the longest sub-pattern that matches the prefix
String subPattern = pattern;
int index = pattern.length();
while (index >= 0) {
String subPatternRegex = subPattern.replaceAll("\\*", ".*");
if (prefix.matches(subPatternRegex)) {
if (subPattern.endsWith("*")) {
// * is a multi character match so belongs to prefix and pattern.
return new String[] { subPattern, pattern.substring(subPattern.length() - 1) };
}
// Dot is a one character x match so can't belong to the pattern anymore.
return new String[] { subPattern, pattern.substring(subPattern.length()) };
}
// Skip last * or . and find the next sub-pattern
if (subPattern.endsWith("*") || subPattern.endsWith(".")) {
subPattern = subPattern.substring(0, subPattern.length() - 1);
}
int lastStar = subPattern.lastIndexOf('*');
int lastDot = subPattern.lastIndexOf('.');
if (lastStar > lastDot) {
index = lastStar;
if (lastStar >= 0) {
subPattern = subPattern.substring(0, index + 1);
}
} else {
index = lastDot;
if (lastDot >= 0) {
subPattern = subPattern.substring(0, subPattern.length() - 1);
}
}
}
return new String[] {};
}
/**
* Selects Waggle Dance database mappings that can potentially match the provided pattern.
* <p>
* This implementation is using {@link org.apache.hadoop.hive.metastore.ObjectStore#getDatabases(String)} as reference
* for pattern matching.
* <p>
* To learn more about Hive DDL patterns refer to
* <a href="https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-Show">Language
* Manual</a> for details
*
* @param prefixes Federation prefixes
* @param dbPatterns Database name patterns
* @return A map of possible database prefixes to be used for interrogation with their pattern
*/
public static Map<String, String> selectMatchingPrefixes(Set<String> prefixes, String dbPatterns) {
Map<String, String> matchingPrefixes = new HashMap<>();
if ((dbPatterns == null) || "*".equals(dbPatterns)) {
for (String prefix : prefixes) {
matchingPrefixes.put(prefix, dbPatterns);
}
return matchingPrefixes;
}
Map<String, List<String>> prefixPatterns = new HashMap<>();
for (String subPattern : OR_SPLITTER.split(dbPatterns)) {
for (String prefix : prefixes) {
String[] subPatternParts = splitPattern(prefix, subPattern);
if (subPatternParts.length == 0) {
continue;
}
List<String> prefixPatternList = prefixPatterns.computeIfAbsent(prefix, k -> new ArrayList<>());
prefixPatternList.add(subPatternParts[1]);
}
}
for (Entry<String, List<String>> prefixPatternEntry : prefixPatterns.entrySet()) {
matchingPrefixes.put(prefixPatternEntry.getKey(), OR_JOINER.join(prefixPatternEntry.getValue()));
}
return matchingPrefixes;
}
}