diff --git a/docs/assets/images/QuickFind.png b/docs/assets/images/QuickFind.png new file mode 100644 index 00000000..faf50096 Binary files /dev/null and b/docs/assets/images/QuickFind.png differ diff --git a/docs/assets/images/WeightedUnion.png b/docs/assets/images/WeightedUnion.png new file mode 100644 index 00000000..a2b1a79c Binary files /dev/null and b/docs/assets/images/WeightedUnion.png differ diff --git a/docs/assets/images/WeightedUnionLeetCode.png b/docs/assets/images/WeightedUnionLeetCode.png new file mode 100644 index 00000000..e3c49766 Binary files /dev/null and b/docs/assets/images/WeightedUnionLeetCode.png differ diff --git a/src/main/java/dataStructures/disjointSet/README.md b/src/main/java/dataStructures/disjointSet/README.md index 37548571..581f3709 100644 --- a/src/main/java/dataStructures/disjointSet/README.md +++ b/src/main/java/dataStructures/disjointSet/README.md @@ -3,11 +3,14 @@ ## Background A disjoint-set structure also known as a union-find or merge-find set, is a data structure -keeps track of a partition of a set into disjoint (non-overlapping) subsets. In CS2040s, this +keeps track of a partition of a set into disjoint (non-overlapping) subsets. + +In CS2040s, this is introduced in the context of checking for dynamic connectivity. For instance, Kruskal's algorithm -in graph theory to find minimum spanning tree of the graph utilizes disjoint set to efficiently -query if there exists a path between 2 nodes.
-It supports 2 main operations: +in graph theory to find minimum spanning tree of a graph utilizes disjoint set to efficiently +query if there already exists a path between 2 nodes. + +Generally, there are 2 main operations: 1. Union: Join two subsets into a single subset 2. Find: Determine which subset a particular element is in. In practice, this is often done to check @@ -17,12 +20,26 @@ The Disjoint Set structure is often introduced in 3 parts, with each iteration b previous either in time or space complexity (or both). More details can be found in the respective folders. Below is a brief overview: -1. Quick Find - Elements are assigned a component identity. +1. **Quick Find** - Elements are assigned a component identity. Querying for connectivity and updating usually tracked with an internal array. -2. Quick Union - Component an element belongs to is now tracked with a tree structure. Nothing to enforce +2. **Quick Union** - Component an element belongs to is now tracked with a tree structure. Nothing to enforce a balanced tree and hence complexity does not necessarily improve - Note, this is not implemented but details can be found under weighted union folder. -3. Weighted Union - Same idea of using a tree, but constructed in a way that the tree is balanced, leading to improved +3. **Weighted Union** - Same idea of using a tree, but constructed in a way that the tree is balanced, leading to improved complexities. Can be further augmented with path compression. + +## Applications +Because of its efficiency and simplicity in implementing, Disjoint Set structures are widely used in practice: +1. As mentioned, it is often sued as a helper structure for Kruskal's MST algorithm +2. It can be used in the context of network connectivity + - Managing a network of computers + - Or even analyse social networks, finding communities and determining if two users are connected through a chain +3. Can be part of clustering algorithms to group data points based on similarity - useful for ML +4. It can be used to detect cycles in dependency graphs, e.g, software dependency management systems +5. It can be used for image processing, in labelling different connected components of an image + +## Notes +Disjoint Set is a data structure designed to keep track of a set of elements partitioned into a number of +non-overlapping subsets. It is not suited for handling duplicates and so our implementation ignores duplicates. diff --git a/src/main/java/dataStructures/disjointSet/quickFind/DisjointSet.java b/src/main/java/dataStructures/disjointSet/quickFind/DisjointSet.java new file mode 100644 index 00000000..f5a5dce8 --- /dev/null +++ b/src/main/java/dataStructures/disjointSet/quickFind/DisjointSet.java @@ -0,0 +1,99 @@ +package dataStructures.disjointSet.quickFind; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Implementation of quick-find structure; Turns a list of objects into a data structure that supports union operations + * + * @param generic type of object to be stored + */ +public class DisjointSet { + private final Map identifier; + + /** + * Basic constructor to create the Disjoint Set data structure. + */ + public DisjointSet() { + identifier = new HashMap<>(); + } + + /** + * Constructor to initialize Disjoint Set with a known list of objects. + * @param objects + */ + public DisjointSet(List objects) { + identifier = new HashMap<>(); + int size = objects.size(); + for (int i = 0; i < size; i++) { + // internally, component identity is tracked with integers + identifier.put(objects.get(i), identifier.size()); // each obj initialize with a unique identity using size; + } + } + + public int size() { + return identifier.size(); + } + + /** + * Adds an object into the structure. + * @param obj + */ + public void add(T obj) { + identifier.put(obj, identifier.size()); + } + + /** + * Checks if object a and object b are in the same component. + * @param a + * @param b + * @return a boolean value + */ + public boolean find(T a, T b) { + if (!identifier.containsKey(a) || !identifier.containsKey(b)) { // key(s) does not even exist + return false; + } + return identifier.get(a).equals(identifier.get(b)); + } + + /** + * Merge the components of object a and object b. + * @param a + * @param b + */ + public void union(T a, T b) { + if (!identifier.containsKey(a) || !identifier.containsKey(b)) { // key(s) does not even exist; do nothing + return; + } + + if (identifier.get(a).equals(identifier.get(b))) { // already same; do nothing + return; + } + + int compOfA = identifier.get(a); + int compOfB = identifier.get(b); + for (T obj : identifier.keySet()) { + if (identifier.get(obj).equals(compOfA)) { + identifier.put(obj, compOfB); + } + } + } + + /** + * Retrieves all elements that are in the same component as the specified object. Not a typical operation + * but here to illustrate other use case. + * @param a + * @return a list of objects + */ + public List retrieveFromSameComponent(T a) { + List ret = new ArrayList<>(); + for (T obj : identifier.keySet()) { + if (find(a, obj)) { + ret.add(obj); + } + } + return ret; + } +} diff --git a/src/main/java/dataStructures/disjointSet/quickFind/README.md b/src/main/java/dataStructures/disjointSet/quickFind/README.md index df11348b..cf1b6a7f 100644 --- a/src/main/java/dataStructures/disjointSet/quickFind/README.md +++ b/src/main/java/dataStructures/disjointSet/quickFind/README.md @@ -2,11 +2,18 @@ ## Background Every object will be assigned a component identity. The implementation of Quick Find often involves -an underlying array that tracks the component identity of each object. +an underlying array or hash map that tracks the component identity of each object. +Our implementation uses a hash map (to easily handle the case when objects aren't integers). + +
+ +
+ Credits: CS2040s Lecture Slides +
### Union Between the two components, decide on the component d, to represent the combined set. Let the other -component's identity be d'. Simply iterate over the component identifier array, and for any element with +component's identity be d'. Simply iterate over the component identifier array / map, and for any element with identity d', assign it to d. ### Find diff --git a/src/main/java/dataStructures/disjointSet/quickFind/generalised/QuickFind.java b/src/main/java/dataStructures/disjointSet/quickFind/generalised/QuickFind.java deleted file mode 100644 index cd75ab6c..00000000 --- a/src/main/java/dataStructures/disjointSet/quickFind/generalised/QuickFind.java +++ /dev/null @@ -1,87 +0,0 @@ -package dataStructures.disjointSet.quickFind.generalised; - -import java.util.ArrayList; -import java.util.List; - -/** - * Implementation of quick-find structure; Turns a list of objects into a data structure that supports union operations - * - * @param generic type of object to be stored - */ -public class QuickFind { - private final List objects; - private final List identity; - - /** - * TODO documentation - */ - public QuickFind() { - this.objects = new ArrayList<>(); - this.identity = new ArrayList<>(); - } - - /** - * TODO documentation - * - * @param input - */ - public QuickFind(List input) { - this.objects = input; - this.identity = new ArrayList<>(); - for (int i = 0; i < input.size(); i++) { - this.identity.add(i); - } - } - - /** - * Adds a new item into the existing list. - * - * @param item to be added - */ - public void add(T item) { - objects.add(item); - identity.add(identity.size()); // identity of the new item - } - - /** - * Merges the objects in two different components identified by a member from each. - * - * @param objOne object in one of the components - * @param objTwo object in another component - */ - public void union(T objOne, T objTwo) { - if (!objects.contains(objOne) || !objects.contains(objTwo)) { - System.out.println("One or more of the objects do not exist!"); - return; - } - int idxOne = objects.indexOf(objOne); - int compOne = identity.get(idxOne); - int idxTwo = objects.indexOf(objTwo); - int compTwo = identity.get(idxTwo); - - int size = this.objects.size(); - for (int i = 0; i < size; i++) { - if (identity.get(i) == compOne) { - identity.set(i, compTwo); - } - } - } - - /** - * Retrieves all elements in the same component as the given object. - * - * @param objOne - * @return list of elements. - */ - public List retrieveComponent(T objOne) { - int idx = objects.indexOf(objOne); - int comp = identity.get(idx); - List ret = new ArrayList<>(); - for (int i = 0; i < objects.size(); i++) { - if (identity.get(i) == comp) { - ret.add(objects.get(i)); - } - } - return ret; - } -} diff --git a/src/main/java/dataStructures/disjointSet/quickFind/simplified/QuickFind.java b/src/main/java/dataStructures/disjointSet/quickFind/simplified/QuickFind.java deleted file mode 100644 index bb58478c..00000000 --- a/src/main/java/dataStructures/disjointSet/quickFind/simplified/QuickFind.java +++ /dev/null @@ -1,59 +0,0 @@ -package dataStructures.disjointSet.quickFind.simplified; - -import java.util.ArrayList; -import java.util.List; - -/** - * Simplified implementation of quick-find where the objects are integers range from 1-n. - */ -public class QuickFind { - private final int[] identity; - private final int size; - - /** - * TODO documentation - * - * @param size - */ - public QuickFind(int size) { - // we will ignore index 0. So index 1 corresponds to element 1, index 2 corresponds with element 2 and so on. - this.identity = new int[size + 1]; - this.size = size; - for (int i = 0; i < size + 1; i++) { - this.identity[i] = i; - } - } - - /** - * Forms a union between elements of two different groups merging all the elements with the same identity as - * the former element to that of the latter element. - * - * @param fr identity of the first element - * @param to identity of the second element - */ - public void union(int fr, int to) { - int updateComp = identity[fr]; - for (int i = 1; i < size + 1; i++) { - if (identity[i] == updateComp) { - identity[i] = identity[to]; // updates element i's identity to that of - } - } - } - - /** - * Retrieves all the elements in the component whose identity is the same as that of the given element. - * - * @param element whose component we would lie to find. - * @return all elements in the component - */ - public List retrieveComponent(int element) { - int id = identity[element]; - List ret = new ArrayList<>(); - for (int i = 1; i < this.size + 1; i++) { - if (identity[i] == id) { - ret.add(i); - } - } - return ret; - } -} diff --git a/src/main/java/dataStructures/disjointSet/weightedUnion/DisjointSet.java b/src/main/java/dataStructures/disjointSet/weightedUnion/DisjointSet.java new file mode 100644 index 00000000..929028da --- /dev/null +++ b/src/main/java/dataStructures/disjointSet/weightedUnion/DisjointSet.java @@ -0,0 +1,123 @@ +package dataStructures.disjointSet.weightedUnion; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.HashMap; + +/** + * Implementation of weighted-union structure; + * Turns a list of objects into a data structure that supports union operations. + *

+ * Note that implementation below includes path compression. Refer to README for more details + * + * @param generic type of object to be stored + */ +public class DisjointSet { + private final Map parents; + private final Map size; + + /** + * Basic constructor to initialize Disjoint Set structure using weighted union concept. + */ + public DisjointSet() { + parents = new HashMap<>(); + size = new HashMap<>(); + } + + /** + * Constructor to initialize Disjoint Set structure with a known list of objects. + * @param objects + */ + public DisjointSet(List objects) { + parents = new HashMap<>(); + size = new HashMap<>(); + for (int i = 0; i < objects.size(); i++) { + T obj = objects.get(i); + parents.put(obj, obj); // initially, every object forms a tree, with itself as the root + size.put(obj, 1); // each tree has size 1 at the start + } + } + + /** + * Internal helper method to find the root (identifier) of an object. Note that path compression has been included. + * A point of concern might be performing path compression would require updating the sizes tracked by each node + * for consistency's sake. But doing so does not affect correctness of algorithm. + * Because all the algorithm needs is the correct size of each subtree to decide on how to union + * and shifting descendants around does not affect size of subtree. + * @param obj + * @return the root of the subtree. + */ + private T findRoot(T obj) { + while (!obj.equals(parents.get(obj))) { + T parent = parents.get(obj); + // START OF PATH COMPRESSION + T grandParent = parents.get(parent); + parents.put(obj, grandParent); // powerful one-liner to reduce the height of trees every traversal + // END + obj = parent; + } + return obj; + } + + public int size() { + return parents.size(); + } + + /** + * Adds an object into the structure. + * @param obj + */ + public void add(T obj) { + parents.put(obj, obj); + size.put(obj, 1); + } + + /** + * Checks if object a and object b are in the same component. + * @param a + * @param b + * @return + */ + public boolean find(T a, T b) { + T rootOfA = findRoot(a); + T rootOfB = findRoot(b); + return rootOfA.equals(rootOfB); + } + + /** + * Merge the components of object a and object b. + * @param a + * @param b + */ + public void union(T a, T b) { + T rootOfA = findRoot(a); + T rootOfB = findRoot(b); + int sizeA = size.get(rootOfA); + int sizeB = size.get(rootOfB); + + if (sizeA < sizeB) { + parents.put(rootOfA, rootOfB); // update root A to be child of root B + size.put(rootOfB, size.get(rootOfB) + size.get(rootOfA)); // update size of bigger tree + } else { + parents.put(rootOfB, rootOfA); // update root B to be child of root A + size.put(rootOfA, size.get(rootOfA) + size.get(rootOfB)); // update size of bigger tree + } + } + + /** + * Retrieves all elements that are in the same component as the specified object. Not a typical operation + * but here to illustrate other use case. + * @param a + * @return a list of objects + */ + public List retrieveFromSameComponent(T a) { + List ret = new ArrayList<>(); + for (T obj : parents.keySet()) { + if (find(a, obj)) { + ret.add(obj); + } + } + return ret; + } +} diff --git a/src/main/java/dataStructures/disjointSet/weightedUnion/README.md b/src/main/java/dataStructures/disjointSet/weightedUnion/README.md index cc9be4b4..42e50a54 100644 --- a/src/main/java/dataStructures/disjointSet/weightedUnion/README.md +++ b/src/main/java/dataStructures/disjointSet/weightedUnion/README.md @@ -4,15 +4,15 @@ Here, we consider a completely different approach. We consider the use of trees. Every element can be thought of as a tree node and starts off in its own component. Under this representation, it is likely that at any given point, we might have a forest of trees, and that's perfectly fine. The root node of each tree -simply represents the component / set of all elements in the same set.
+simply represents the identity / is a representative of all elements in the same component.
Note that the trees here are not necessarily binary trees. In fact, more often than not, we will have nodes with multiple children nodes. ### Union Between the two components, decide on the component to represent the combined set as before. Now, union is simply assigning the root node of one tree to be the child of the root node of another. Hence, its name. -One thing to note is that to identify the component of the object involves traversing to the root node of the -tree. +Identifying the component of the object involves traversing to the root node of the tree. Also, note that +union operations **can result in a forest**. ### Find For each of the node, we traverse up the tree from the current node until the root. Check if the @@ -26,13 +26,39 @@ are balanced. **Space**: O(n), implementation still involves wrapping the n elements with some structure / wrapper (e.g. Node class). # Weighted Union + ## Background Now, we improve upon the Quick Union structure by ensuring trees constructed are 'balanced'. Balanced trees have a nice property that the height of the tree will be upper-bounded by O(log(n)). This considerably speeds up Union operations.
-We additionally track the size of each tree and ensure that whenever there is a union between 2 elements, the smaller -tree will be the child of a larger tree. -It can be mathematically (induction) shown the height of the tree is bounded by O(log(n)). +We additionally track the size of each tree and ensure that whenever there is a union between 2 elements, **the smaller +tree becomes a child of the larger tree.** +It can be mathematically shown the height of the tree is bounded by O(log(n)). + +

+ +
+ Credits: CS2040s Lecture Slides +
+ +### Intuition - Why It Works +First, it is crucial to know that Weighted Union's efficiency relies on careful **construction** of the trees.
+Every element / object starts off in its own tree (i.e. its own component). When two components are merged, the smaller +objects of the smaller tree becomes part of the larger tree (by setting the root node of the smaller tree as a child). +
Note that if the trees are of the same size, it does not matter which is assigned. + +Notice that trees will only increase in height when it's size is doubled. Working on this intuition, one can show +(by induction) that a tree of height h has at least 2^h elements. Consequently, +**a tree of size n is at most height of logn**. + +### Implementation Details +The concept introduces the idea of constructing trees and forests and certainly, one can similarly implement a +Node wrapper class to represent objects as nodes in a tree.
+But notice that the operations only need knowledge of the parent node and the size of the tree +(which is tracked by the root). In other words, using internal lists and arrays to track is sufficient to +simulate the construction of trees. + +Our implementation does this. ## Complexity Analysis **Time**: O(log(n)) for Union and Find operations. @@ -52,3 +78,9 @@ Interested readers can find out more [here](https://dl.acm.org/doi/pdf/10.1145/3 **Space**: O(n) ## Notes +### Sample Demo - LeetCode 684: Redundant Connections +The 'objects' in the question are given to be integers. Using int arrays instead of HashMap mapping in our +implementation would suffice. But below uses the code exactly from our implementation to show its versatility. + + + diff --git a/src/test/java/dataStructures/disjointSet/quickFind/DisjointSetTest.java b/src/test/java/dataStructures/disjointSet/quickFind/DisjointSetTest.java new file mode 100644 index 00000000..7d0c1695 --- /dev/null +++ b/src/test/java/dataStructures/disjointSet/quickFind/DisjointSetTest.java @@ -0,0 +1,78 @@ +package dataStructures.disjointSet.quickFind; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.junit.Assert; +import org.junit.Test; + +public class DisjointSetTest { + @Test + public void construct_shouldCorrectlyInitializeEmpty() { + DisjointSet ds = new DisjointSet<>(); + Assert.assertEquals(ds.size(), 0); + } + + @Test + public void construct_shouldCorrectlyInitializeNonEmpty() { + List lst = Arrays.asList("andre", "chang xian", "jun neng", "kai ting", "shu heng"); + + DisjointSet ds = new DisjointSet<>(lst); + Assert.assertEquals(ds.size(), 5); + + Assert.assertFalse(ds.find("andre", "kai ting")); + } + + @Test + public void find_shouldCorrectlyFindItself() { + List lst = Arrays.asList("andre", "chang xian", "jun neng"); + + DisjointSet ds = new DisjointSet<>(lst); + Assert.assertTrue(ds.find("chang xian", "chang xian")); + } + + @Test + public void union_shouldCorrectlyUpdate() { + List lst = Arrays.asList("andre", "chang xian", "jun neng", "kai ting", "shu heng"); + + DisjointSet ds = new DisjointSet<>(lst); + + Assert.assertFalse(ds.find("andre", "kai ting")); + + ds.union("andre", "kai ting"); + Assert.assertTrue(ds.find("andre", "kai ting")); + Assert.assertFalse(ds.find("andre", "chang xian")); + Assert.assertFalse(ds.find("andre", "shu heng")); + Assert.assertFalse(ds.find("jun neng", "kai ting")); + } + + @Test + public void retrieve_shouldCorrectlyRetrieveComponents() { + List lst = Arrays.asList("andre", "chang xian", "jun neng", "kai ting", "shu heng", "seth", "gilbert"); + + DisjointSet ds = new DisjointSet<>(lst); + ds.union("andre", "kai ting"); + ds.union("chang xian", "jun neng"); + ds.union("jun neng", "gilbert"); + ds.union("chang xian", "seth"); + + List resultA = ds.retrieveFromSameComponent("kai ting"); + Collections.sort(resultA); + List expectedA = Arrays.asList("andre", "kai ting"); + Collections.sort(expectedA); + Assert.assertEquals(expectedA, resultA); + + List resultB = ds.retrieveFromSameComponent("gilbert"); + Collections.sort(resultB); + List expectedB = Arrays.asList("chang xian", "jun neng", "seth", "gilbert"); + Collections.sort(expectedB); + Assert.assertEquals(expectedB, resultB); + + List resultC = ds.retrieveFromSameComponent("shu heng"); + Collections.sort(resultC); + List expectedC = Arrays.asList("shu heng"); + Collections.sort(expectedC); + Assert.assertEquals(expectedC, resultC); + } +} diff --git a/src/test/java/dataStructures/disjointSet/weightedUnion/DisjointSetTest.java b/src/test/java/dataStructures/disjointSet/weightedUnion/DisjointSetTest.java new file mode 100644 index 00000000..4e923091 --- /dev/null +++ b/src/test/java/dataStructures/disjointSet/weightedUnion/DisjointSetTest.java @@ -0,0 +1,78 @@ +package dataStructures.disjointSet.weightedUnion; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.junit.Assert; +import org.junit.Test; + +public class DisjointSetTest { + @Test + public void construct_shouldCorrectlyInitializeEmpty() { + DisjointSet ds = new DisjointSet<>(); + Assert.assertEquals(ds.size(), 0); + } + + @Test + public void construct_shouldCorrectlyInitializeNonEmpty() { + List lst = Arrays.asList("andre", "chang xian", "jun neng", "kai ting", "shu heng"); + + DisjointSet ds = new DisjointSet<>(lst); + Assert.assertEquals(ds.size(), 5); + + Assert.assertFalse(ds.find("andre", "kai ting")); + } + + @Test + public void find_shouldCorrectlyFindItself() { + List lst = Arrays.asList("andre", "chang xian", "jun neng"); + + DisjointSet ds = new DisjointSet<>(lst); + Assert.assertTrue(ds.find("chang xian", "chang xian")); + } + + @Test + public void union_shouldCorrectlyUpdate() { + List lst = Arrays.asList("andre", "chang xian", "jun neng", "kai ting", "shu heng"); + + DisjointSet ds = new DisjointSet<>(lst); + + Assert.assertFalse(ds.find("andre", "kai ting")); + + ds.union("andre", "kai ting"); + Assert.assertTrue(ds.find("andre", "kai ting")); + Assert.assertFalse(ds.find("andre", "chang xian")); + Assert.assertFalse(ds.find("andre", "shu heng")); + Assert.assertFalse(ds.find("jun neng", "kai ting")); + } + + @Test + public void retrieve_shouldCorrectlyRetrieveComponents() { + List lst = Arrays.asList("andre", "chang xian", "jun neng", "kai ting", "shu heng", "seth", "gilbert"); + + DisjointSet ds = new DisjointSet<>(lst); + ds.union("andre", "kai ting"); + ds.union("chang xian", "jun neng"); + ds.union("jun neng", "gilbert"); + ds.union("chang xian", "seth"); + + List resultA = ds.retrieveFromSameComponent("kai ting"); + Collections.sort(resultA); + List expectedA = Arrays.asList("andre", "kai ting"); + Collections.sort(expectedA); + Assert.assertEquals(expectedA, resultA); + + List resultB = ds.retrieveFromSameComponent("gilbert"); + Collections.sort(resultB); + List expectedB = Arrays.asList("chang xian", "jun neng", "seth", "gilbert"); + Collections.sort(expectedB); + Assert.assertEquals(expectedB, resultB); + + List resultC = ds.retrieveFromSameComponent("shu heng"); + Collections.sort(resultC); + List expectedC = Arrays.asList("shu heng"); + Collections.sort(expectedC); + Assert.assertEquals(expectedC, resultC); + } +}