In [9]:
words1 = ["great", "acting", "life", "skills"]

words2 = ["fine", "drama", "talent", "health"]

pairs = [
    ["great", "good"],
    ["fine", "good"],
    ["acting", "drama"],
    ["skills", "talent"]
]

# output: 0.75

In [10]:
class UnionFind:
    def __init__(self):
        self.father = {}

    def add(self, x):
        if x not in self.father:
            self.father[x] = x

    def find(self, x):
        root = x
        while self.father[root] != root:
            root = self.father[root]

        while x != root:
            x_father = self.father[x]
            self.father[x] = root
            x = x_father
        return root

    def merge(self, x, y):
        root_x = self.find(x)
        root_y = self.find(y)
        if root_x != root_y:
            self.father[root_y] = root_x

    def is_connected(self, x, y):
        return self.find(x) == self.find(y)



In [11]:
def get_LCS(words1, words2, uf):
    m, n = len(words1), len(words2)
    dp = [[0] * (n + 1) for _ in range(2)]

    for i in range(1, m + 1):
        for j in range(1, n + 1):
            if uf.is_connected(words1[i - 1], words2[j - 1]):
                dp[i % 2][j] = dp[(i - 1) % 2][j - 1] + 1
            else:
                dp[i % 2][j] = max(dp[(i - 1) % 2][j], dp[i % 2][j - 1])

    return dp[m % 2][n]

def get_similarity(words1, words2, pairs):
    uf = UnionFind()

    for word in words1:
        uf.add(word)
    for word in words2:
        uf.add(word)

    for pair in pairs:
        uf.add(pair[0])
        uf.add(pair[1])
        uf.merge(pair[0], pair[1])

    similarity = get_LCS(words1, words2, uf)
    return similarity * 2 / (len(words1) + len(words2))

In [12]:
get_similarity(words1, words2, pairs) # time: O(n * m + p), space: O(n + m + p)

0.75