Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 157 additions & 0 deletions src/main/java/com/thealgorithms/compression/ArithmeticCoding.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
package com.thealgorithms.compression;

import java.math.BigDecimal;
import java.math.MathContext;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* An implementation of the Arithmetic Coding algorithm.
*
* <p>
* Arithmetic coding is a form of entropy encoding used in lossless data
* compression. It encodes an entire message into a single number, a fraction n
* where (0.0 <= n < 1.0). Unlike Huffman coding, which assigns a specific
* bit sequence to each symbol, arithmetic coding represents the message as a
* sub-interval of the [0, 1) interval.
* </p>
*
* <p>
* This implementation uses BigDecimal for precision to handle the shrinking
* intervals, making it suitable for educational purposes to demonstrate the
* core logic.
* </p>
*
* <p>
* Time Complexity: O(n*m) for compression and decompression where n is the
* length of the input and m is the number of unique symbols, due to the need
* to calculate symbol probabilities.
* </p>
*
* <p>
* References:
* <ul>
* <li><a href="https://en.wikipedia.org/wiki/Arithmetic_coding">Wikipedia:
* Arithmetic coding</a></li>
* </ul>
* </p>
*/
public final class ArithmeticCoding {

private ArithmeticCoding() {
}

/**
* Compresses a string using the Arithmetic Coding algorithm.
*
* @param uncompressed The string to be compressed.
* @return The compressed representation as a BigDecimal number.
* @throws IllegalArgumentException if the input string is null or empty.
*/
public static BigDecimal compress(String uncompressed) {
if (uncompressed == null || uncompressed.isEmpty()) {
throw new IllegalArgumentException("Input string cannot be null or empty.");
}

Map<Character, Symbol> probabilityTable = calculateProbabilities(uncompressed);

BigDecimal low = BigDecimal.ZERO;
BigDecimal high = BigDecimal.ONE;

for (char symbol : uncompressed.toCharArray()) {
BigDecimal range = high.subtract(low);
Symbol sym = probabilityTable.get(symbol);

high = low.add(range.multiply(sym.high()));
low = low.add(range.multiply(sym.low()));
}

return low; // Return the lower bound of the final interval
}

/**
* Decompresses a BigDecimal number back into the original string.
*
* @param compressed The compressed BigDecimal number.
* @param length The length of the original uncompressed string.
* @param probabilityTable The probability table used during compression.
* @return The original, uncompressed string.
*/
public static String decompress(BigDecimal compressed, int length, Map<Character, Symbol> probabilityTable) {
StringBuilder decompressed = new StringBuilder();

// Create a sorted list of symbols for deterministic decompression, matching the
// order used in calculateProbabilities
List<Map.Entry<Character, Symbol>> sortedSymbols = new ArrayList<>(probabilityTable.entrySet());
sortedSymbols.sort(Map.Entry.comparingByKey());

BigDecimal low = BigDecimal.ZERO;
BigDecimal high = BigDecimal.ONE;

for (int i = 0; i < length; i++) {
BigDecimal range = high.subtract(low);

// Find which symbol the compressed value falls into
for (Map.Entry<Character, Symbol> entry : sortedSymbols) {
Symbol sym = entry.getValue();

// Calculate the actual range for this symbol in the current interval
BigDecimal symLow = low.add(range.multiply(sym.low()));
BigDecimal symHigh = low.add(range.multiply(sym.high()));

// Check if the compressed value falls within this symbol's range
if (compressed.compareTo(symLow) >= 0 && compressed.compareTo(symHigh) < 0) {
decompressed.append(entry.getKey());

// Update the interval for the next iteration
low = symLow;
high = symHigh;
break;
}
}
}

return decompressed.toString();
}

/**
* Calculates the frequency and probability range for each character in the
* input string in a deterministic order.
*
* @param text The input string.
* @return A map from each character to a Symbol object containing its
* probability range.
*/
public static Map<Character, Symbol> calculateProbabilities(String text) {
Map<Character, Integer> frequencies = new HashMap<>();
for (char c : text.toCharArray()) {
frequencies.put(c, frequencies.getOrDefault(c, 0) + 1);
}

// Sort the characters to ensure a deterministic order for the probability table
List<Character> sortedKeys = new ArrayList<>(frequencies.keySet());
Collections.sort(sortedKeys);

Map<Character, Symbol> probabilityTable = new HashMap<>();
BigDecimal currentLow = BigDecimal.ZERO;
int total = text.length();

for (char symbol : sortedKeys) {
BigDecimal probability = BigDecimal.valueOf(frequencies.get(symbol)).divide(BigDecimal.valueOf(total), MathContext.DECIMAL128);
BigDecimal high = currentLow.add(probability);
probabilityTable.put(symbol, new Symbol(currentLow, high));
currentLow = high;
}

return probabilityTable;
}

/**
* Helper class to store the probability range [low, high) for a symbol.
*/
public record Symbol(BigDecimal low, BigDecimal high) {
}
}
136 changes: 136 additions & 0 deletions src/main/java/com/thealgorithms/compression/LZW.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
package com.thealgorithms.compression;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* An implementation of the Lempel-Ziv-Welch (LZW) algorithm.
*
* <p>
* LZW is a universal lossless data compression algorithm created by Abraham
* Lempel, Jacob Ziv, and Terry Welch. It works by building a dictionary of
* strings encountered during compression and replacing occurrences of those
* strings with a shorter code.
* </p>
*
* <p>
* This implementation handles standard ASCII characters and provides methods for
* both compression and decompression.
* <ul>
* <li>Compressing "TOBEORNOTTOBEORTOBEORNOT" results in a list of integer
* codes.</li>
* <li>Decompressing that list of codes results back in the original
* string.</li>
* </ul>
* </p>
*
* <p>
* Time Complexity: O(n) for both compression and decompression, where n is the
* length of the input string.
* </p>
*
* <p>
* References:
* <ul>
* <li><a href="https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch">Wikipedia:
* Lempel–Ziv–Welch</a></li>
* </ul>
* </p>
*/
public final class LZW {

/**
* Private constructor to prevent instantiation of this utility class.
*/
private LZW() {
}

/**
* Compresses a string using the LZW algorithm.
*
* @param uncompressed The string to be compressed. Can be null.
* @return A list of integers representing the compressed data. Returns an empty
* list if the input is null or empty.
*/
public static List<Integer> compress(String uncompressed) {
if (uncompressed == null || uncompressed.isEmpty()) {
return new ArrayList<>();
}

// Initialize dictionary with single characters (ASCII 0-255)
int dictSize = 256;
Map<String, Integer> dictionary = new HashMap<>();
for (int i = 0; i < dictSize; i++) {
dictionary.put("" + (char) i, i);
}

String w = "";
List<Integer> result = new ArrayList<>();
for (char c : uncompressed.toCharArray()) {
String wc = w + c;
if (dictionary.containsKey(wc)) {
// If the new string is in the dictionary, extend the current string
w = wc;
} else {
// Otherwise, output the code for the current string
result.add(dictionary.get(w));
// Add the new string to the dictionary
dictionary.put(wc, dictSize++);
// Start a new current string
w = "" + c;
}
}

// Output the code for the last remaining string
result.add(dictionary.get(w));
return result;
}

/**
* Decompresses a list of integers back into a string using the LZW algorithm.
*
* @param compressed A list of integers representing the compressed data. Can be
* null.
* @return The original, uncompressed string. Returns an empty string if the
* input is null or empty.
*/
public static String decompress(List<Integer> compressed) {
if (compressed == null || compressed.isEmpty()) {
return "";
}

// Initialize dictionary with single characters (ASCII 0-255)
int dictSize = 256;
Map<Integer, String> dictionary = new HashMap<>();
for (int i = 0; i < dictSize; i++) {
dictionary.put(i, "" + (char) i);
}

// Decompress the first code
String w = "" + (char) (int) compressed.removeFirst();
StringBuilder result = new StringBuilder(w);

for (int k : compressed) {
String entry;
if (dictionary.containsKey(k)) {
// The code is in the dictionary
entry = dictionary.get(k);
} else if (k == dictSize) {
// Special case for sequences like "ababab"
entry = w + w.charAt(0);
} else {
throw new IllegalArgumentException("Bad compressed k: " + k);
}

result.append(entry);

// Add new sequence to the dictionary
dictionary.put(dictSize++, w + entry.charAt(0));

w = entry;
}
return result.toString();
}
}
Loading