| 
 | 1 | +package com.thealgorithms.compression;  | 
 | 2 | + | 
 | 3 | +import java.util.Arrays;  | 
 | 4 | +import java.util.HashMap;  | 
 | 5 | +import java.util.Map;  | 
 | 6 | + | 
 | 7 | +/**  | 
 | 8 | + * Implementation of the Burrows-Wheeler Transform (BWT) and its inverse.  | 
 | 9 | + * <p>  | 
 | 10 | + * BWT is a reversible data transformation algorithm that rearranges a string into runs of  | 
 | 11 | + * similar characters. While not a compression algorithm itself, it significantly improves  | 
 | 12 | + * the compressibility of data for subsequent algorithms like Move-to-Front encoding and  | 
 | 13 | + * Run-Length Encoding.  | 
 | 14 | + * </p>  | 
 | 15 | + *  | 
 | 16 | + * <p>The transform works by:  | 
 | 17 | + * <ol>  | 
 | 18 | + *   <li>Generating all rotations of the input string</li>  | 
 | 19 | + *   <li>Sorting these rotations lexicographically</li>  | 
 | 20 | + *   <li>Taking the last column of the sorted matrix as output</li>  | 
 | 21 | + *   <li>Recording the index of the original string in the sorted matrix</li>  | 
 | 22 | + * </ol>  | 
 | 23 | + * </p>  | 
 | 24 | + *  | 
 | 25 | + * <p><b>Important:</b> The input string should end with a unique end-of-string marker  | 
 | 26 | + * (typically '$') that:  | 
 | 27 | + * <ul>  | 
 | 28 | + *   <li>Does not appear anywhere else in the text</li>  | 
 | 29 | + *   <li>Is lexicographically smaller than all other characters</li>  | 
 | 30 | + *   <li>Ensures unique rotations and enables correct inverse transformation</li>  | 
 | 31 | + * </ul>  | 
 | 32 | + * Without this marker, the inverse transform may not correctly reconstruct the original string.  | 
 | 33 | + * </p>  | 
 | 34 | + *  | 
 | 35 | + * <p><b>Time Complexity:</b>  | 
 | 36 | + * <ul>  | 
 | 37 | + *   <li>Forward transform: O(n² log n) where n is the string length</li>  | 
 | 38 | + *   <li>Inverse transform: O(n) using the LF-mapping technique</li>  | 
 | 39 | + * </ul>  | 
 | 40 | + * </p>  | 
 | 41 | + *  | 
 | 42 | + * <p><b>Example:</b></p>  | 
 | 43 | + * <pre>  | 
 | 44 | + * Input:  "banana$"  | 
 | 45 | + * Output: BWTResult("annb$aa", 4)  | 
 | 46 | + *         - "annb$aa" is the transformed string (groups similar characters)  | 
 | 47 | + *         - 4 is the index of the original string in the sorted rotations  | 
 | 48 | + * </pre>  | 
 | 49 | + *  | 
 | 50 | + * @see <a href="https://en.wikipedia.org/wiki/Burrows%E2%80%93Wheeler_transform">Burrows–Wheeler transform (Wikipedia)</a>  | 
 | 51 | + */  | 
 | 52 | +public final class BurrowsWheelerTransform {  | 
 | 53 | + | 
 | 54 | +    private BurrowsWheelerTransform() {  | 
 | 55 | +    }  | 
 | 56 | + | 
 | 57 | +    /**  | 
 | 58 | +     * A container for the result of the forward BWT.  | 
 | 59 | +     * <p>  | 
 | 60 | +     * Contains the transformed string and the index of the original string  | 
 | 61 | +     * in the sorted rotations matrix, both of which are required for the  | 
 | 62 | +     * inverse transformation.  | 
 | 63 | +     * </p>  | 
 | 64 | +     */  | 
 | 65 | +    public static class BWTResult {  | 
 | 66 | +        /** The transformed string (last column of the sorted rotation matrix) */  | 
 | 67 | +        public final String transformed;  | 
 | 68 | + | 
 | 69 | +        /** The index of the original string in the sorted rotations matrix */  | 
 | 70 | +        public final int originalIndex;  | 
 | 71 | + | 
 | 72 | +        /**  | 
 | 73 | +         * Constructs a BWTResult with the transformed string and original index.  | 
 | 74 | +         *  | 
 | 75 | +         * @param transformed the transformed string (L-column)  | 
 | 76 | +         * @param originalIndex the index of the original string in sorted rotations  | 
 | 77 | +         */  | 
 | 78 | +        public BWTResult(String transformed, int originalIndex) {  | 
 | 79 | +            this.transformed = transformed;  | 
 | 80 | +            this.originalIndex = originalIndex;  | 
 | 81 | +        }  | 
 | 82 | + | 
 | 83 | +        @Override  | 
 | 84 | +        public boolean equals(Object obj) {  | 
 | 85 | +            if (this == obj) {  | 
 | 86 | +                return true;  | 
 | 87 | +            }  | 
 | 88 | +            if (obj == null || getClass() != obj.getClass()) {  | 
 | 89 | +                return false;  | 
 | 90 | +            }  | 
 | 91 | +            BWTResult bwtResult = (BWTResult) obj;  | 
 | 92 | +            return originalIndex == bwtResult.originalIndex && transformed.equals(bwtResult.transformed);  | 
 | 93 | +        }  | 
 | 94 | + | 
 | 95 | +        @Override  | 
 | 96 | +        public int hashCode() {  | 
 | 97 | +            return 31 * transformed.hashCode() + originalIndex;  | 
 | 98 | +        }  | 
 | 99 | + | 
 | 100 | +        @Override  | 
 | 101 | +        public String toString() {  | 
 | 102 | +            return "BWTResult[transformed=" + transformed + ", originalIndex=" + originalIndex + "]";  | 
 | 103 | +        }  | 
 | 104 | +    }  | 
 | 105 | + | 
 | 106 | +    /**  | 
 | 107 | +     * Performs the forward Burrows-Wheeler Transform on the input string.  | 
 | 108 | +     * <p>  | 
 | 109 | +     * The algorithm generates all cyclic rotations of the input, sorts them  | 
 | 110 | +     * lexicographically, and returns the last column of this sorted matrix  | 
 | 111 | +     * along with the position of the original string.  | 
 | 112 | +     * </p>  | 
 | 113 | +     *  | 
 | 114 | +     * <p><b>Note:</b> It is strongly recommended that the input string ends with  | 
 | 115 | +     * a unique end-of-string marker (e.g., '$') that is lexicographically smaller  | 
 | 116 | +     * than any other character in the string. This ensures correct inversion.</p>  | 
 | 117 | +     *  | 
 | 118 | +     * @param text the input string to transform; must not be {@code null}  | 
 | 119 | +     * @return a {@link BWTResult} object containing the transformed string (L-column)  | 
 | 120 | +     *         and the index of the original string in the sorted rotations matrix;  | 
 | 121 | +     *         returns {@code BWTResult("", -1)} for empty input  | 
 | 122 | +     * @throws NullPointerException if {@code text} is {@code null}  | 
 | 123 | +     */  | 
 | 124 | +    public static BWTResult transform(String text) {  | 
 | 125 | +        if (text == null || text.isEmpty()) {  | 
 | 126 | +            return new BWTResult("", -1);  | 
 | 127 | +        }  | 
 | 128 | + | 
 | 129 | +        int n = text.length();  | 
 | 130 | + | 
 | 131 | +        // Generate all rotations of the input string  | 
 | 132 | +        String[] rotations = new String[n];  | 
 | 133 | +        for (int i = 0; i < n; i++) {  | 
 | 134 | +            rotations[i] = text.substring(i) + text.substring(0, i);  | 
 | 135 | +        }  | 
 | 136 | + | 
 | 137 | +        // Sort rotations lexicographically  | 
 | 138 | +        Arrays.sort(rotations);  | 
 | 139 | +        int originalIndex = Arrays.binarySearch(rotations, text);  | 
 | 140 | +        StringBuilder lastColumn = new StringBuilder(n);  | 
 | 141 | +        for (int i = 0; i < n; i++) {  | 
 | 142 | +            lastColumn.append(rotations[i].charAt(n - 1));  | 
 | 143 | +        }  | 
 | 144 | + | 
 | 145 | +        return new BWTResult(lastColumn.toString(), originalIndex);  | 
 | 146 | +    }  | 
 | 147 | + | 
 | 148 | +    /**  | 
 | 149 | +     * Performs the inverse Burrows-Wheeler Transform using the LF-mapping technique.  | 
 | 150 | +     * <p>  | 
 | 151 | +     * The LF-mapping (Last-First mapping) is an efficient method to reconstruct  | 
 | 152 | +     * the original string from the BWT output without explicitly reconstructing  | 
 | 153 | +     * the entire sorted rotations matrix.  | 
 | 154 | +     * </p>  | 
 | 155 | +     *  | 
 | 156 | +     * <p>The algorithm works by:  | 
 | 157 | +     * <ol>  | 
 | 158 | +     *   <li>Creating the first column by sorting the BWT string</li>  | 
 | 159 | +     *   <li>Building a mapping from first column indices to last column indices</li>  | 
 | 160 | +     *   <li>Following this mapping starting from the original index to reconstruct the string</li>  | 
 | 161 | +     * </ol>  | 
 | 162 | +     * </p>  | 
 | 163 | +     *  | 
 | 164 | +     * @param bwtString the transformed string (L-column) from the forward transform; must not be {@code null}  | 
 | 165 | +     * @param originalIndex the index of the original string row from the forward transform;  | 
 | 166 | +     *                      use -1 for empty strings  | 
 | 167 | +     * @return the original, untransformed string; returns empty string if input is empty or {@code originalIndex} is -1  | 
 | 168 | +     * @throws NullPointerException if {@code bwtString} is {@code null}  | 
 | 169 | +     * @throws IllegalArgumentException if {@code originalIndex} is out of valid range (except -1)  | 
 | 170 | +     */  | 
 | 171 | +    public static String inverseTransform(String bwtString, int originalIndex) {  | 
 | 172 | +        if (bwtString == null || bwtString.isEmpty() || originalIndex == -1) {  | 
 | 173 | +            return "";  | 
 | 174 | +        }  | 
 | 175 | + | 
 | 176 | +        int n = bwtString.length();  | 
 | 177 | +        if (originalIndex < 0 || originalIndex >= n) {  | 
 | 178 | +            throw new IllegalArgumentException("Original index must be between 0 and " + (n - 1) + ", got: " + originalIndex);  | 
 | 179 | +        }  | 
 | 180 | + | 
 | 181 | +        char[] lastColumn = bwtString.toCharArray();  | 
 | 182 | +        char[] firstColumn = bwtString.toCharArray();  | 
 | 183 | +        Arrays.sort(firstColumn);  | 
 | 184 | + | 
 | 185 | +        // Create the "next" array for LF-mapping.  | 
 | 186 | +        // next[i] stores the row index in the last column that corresponds to firstColumn[i]  | 
 | 187 | +        int[] next = new int[n];  | 
 | 188 | + | 
 | 189 | +        // Track the count of each character seen so far in the last column  | 
 | 190 | +        Map<Character, Integer> countMap = new HashMap<>();  | 
 | 191 | + | 
 | 192 | +        // Store the first occurrence index of each character in the first column  | 
 | 193 | +        Map<Character, Integer> firstOccurrence = new HashMap<>();  | 
 | 194 | + | 
 | 195 | +        for (int i = 0; i < n; i++) {  | 
 | 196 | +            if (!firstOccurrence.containsKey(firstColumn[i])) {  | 
 | 197 | +                firstOccurrence.put(firstColumn[i], i);  | 
 | 198 | +            }  | 
 | 199 | +        }  | 
 | 200 | + | 
 | 201 | +        // Build the LF-mapping  | 
 | 202 | +        for (int i = 0; i < n; i++) {  | 
 | 203 | +            char c = lastColumn[i];  | 
 | 204 | +            int count = countMap.getOrDefault(c, 0);  | 
 | 205 | +            int firstIndex = firstOccurrence.get(c);  | 
 | 206 | +            next[firstIndex + count] = i;  | 
 | 207 | +            countMap.put(c, count + 1);  | 
 | 208 | +        }  | 
 | 209 | + | 
 | 210 | +        // Reconstruct the original string by following the LF-mapping  | 
 | 211 | +        StringBuilder originalString = new StringBuilder(n);  | 
 | 212 | +        int currentRow = originalIndex;  | 
 | 213 | +        for (int i = 0; i < n; i++) {  | 
 | 214 | +            originalString.append(firstColumn[currentRow]);  | 
 | 215 | +            currentRow = next[currentRow];  | 
 | 216 | +        }  | 
 | 217 | + | 
 | 218 | +        return originalString.toString();  | 
 | 219 | +    }  | 
 | 220 | +}  | 
0 commit comments