### [Add Bold Tag in String](https://leetcode.com/problems/add-bold-tag-in-string/)

Given a string s and a list of strings dict, you need to add a closed pair of bold tag <b> and </b> to wrap the substrings in s that exist in dict. If two such substrings overlap, you need to wrap them together by only one pair of closed bold tag. Also, if two substrings wrapped by bold tags are consecutive, you need to combine them.

Example 1:
```
Input: 
s = "abcxyz123"
dict = ["abc","123"]

Output:
"<b>abc</b>xyz<b>123</b>"
```

Example 2:
```
Input: 
s = "aaabbcc"
dict = ["aaa","aab","bc"]
Output:
"<b>aaabbc</b>c"
```

Note:
- The given dict won't contain duplicates, and its length won't exceed 100.
- All the strings in input have length in range [1, 1000].


In [1]:
class Solution(object):
    def addBoldTag(self, s, word_list):
        """
        :type s: str
        :type dict: List[str]
        :rtype: str
        """
        
        # My brute force solution worked in some cases, but not in other.
        # learnt a different solution after reading the notes
        
        # I was slightly fixated on generating substrings and then looking up
        # the substring in the word set in the previous solution.
        # That had to be reversed totally.
        
        # in this solution, we compare each word in the word_list with
        # substring of S at every starting position. Mark the corresponding
        # chars as bold or not bold
        
        # Complexity:
        #    Space: O(N)
        #    Time: I believe it is N * W, 
        #          where N = length of s, W = sum of length of words in word_list
        
        # edge cases
        if not s:
            return ""
        
        if not word_list:
            return s
        
        N = len(s)
        bold = [False] * N
        
        for i in range(N):
            substr = s[i:]
            for word in word_list:
                if substr.startswith(word):
                    bold_marker = [True] * len(word)
                    bold[i:i+len(word)] = bold_marker
        
        # now generate the bold tags from the markers
        bolded_string = ""
        BOLD_START = "<b>"
        BOLD_END = "</b>"
        
        for i in range(N):
            start = ""
            if bold[i] and (i == 0 or not bold[i-1]):
                bolded_string += BOLD_START
            
            bolded_string += s[i]
            
            if bold[i] and (i == N-1 or not bold[i+1]):
                bolded_string += BOLD_END
        
        return bolded_string
        
    def addBoldTagWrongSolution(self, s, word_list):
        """
        :type s: str
        :type dict: List[str]
        :rtype: str
        """
        # abc -> 0, 2
        # 123 -> 6, 8
        
        # aaa -> 0, 2
        # aab -> 1, 3
        # bc -> 4, 5
        
        # if we merge the substring intervals using th given conditions:
        #   - merge overlapping substrings
        #   - merge adjacent substrings if both need to be tagged as bold
        #
        # convert the dict into map for easy lookups
        # after merging the substrings, find the range of indices missing from
        # the merged substrings
        
        # (0, 5)
        # (0, 5) (6:) prev-end:curstart - take the substr as is.
        # make sure capture the tail end.
        
        # edge cases
        #   empty string
        #   empty dict
        #   dict with no matching substring
        if not s:
            return ""
        
        if not word_list:
            return s
        
        
        def is_overlapping(intv1, intv2):
            i1, j1 = intv1
            i2, j2 = intv2
            if i2 >= i1 and i2-1 <= j1:
                return True
            else:
                return False
        
        wordset = set(word_list)
        
        # find the intervals
        intervals = []
        N = len(s)
        
        for i in range(N):
            for j in range(i+1, N+1):
                if s[i:j] in wordset:
                    if len(intervals) > 0 and is_overlapping(intervals[-1], [i, j]):
                        
                        intervals[-1][1] = max(intervals[-1][1], j)
                        # print("Merging {} to intervals".format(s[intervals[-1][0]:intervals[-1][1]]))
                    else:
                        # print("Adding {} to intervals".format(s[i:j]))
                        intervals.append([i, j])
                    
        if not intervals:
            # no valid substring found in wordset
            # return the string s as is. 
            return s
        
        
        bolded_string = ""
        prev_start, prev_end = 0, 0
        for i in range(len(intervals)):
            cur_start, cur_end = intervals[i]
            
            unbolded = s[prev_end:cur_start]
            bolded = "<b>" + s[cur_start:cur_end] + "</b>"
            
            bolded_string += unbolded + bolded
            prev_end = cur_end
        
        bolded_string += s[prev_end:]
        
        return bolded_string            

In [5]:
# running through some tests
tests = {
    "test" : [
        {
            "input": {
                "s":"abcdef",
                "word_list": ["a","c","e","g"]
            },
            "output": "<b>a</b>b<b>c</b>d<b>e</b>f"
        },
        {
            "input": {
                "s":"abcxyz123",
                "word_list": ["abc","123"]
            },
            "output": "<b>abc</b>xyz<b>123</b>"
        },
        {
            "input": {
                "s":"aaabbcc",
                "word_list": ["aaa","aab","bc"]
            },
            "output": "<b>aaabbc</b>c"
        },
        {
            "input": {
                "s":"aaabbcc",
                "word_list": ["aaa","aab","bc","aaabbcc"]
            },
            "output": "<b>aaabbcc</b>"
        }        
    ]
}

In [6]:
solution = Solution()
for test in tests["test"]:
    s, word_list = test["input"]["s"], test["input"]["word_list"]
    assert(solution.addBoldTag(s, word_list) == test["output"])