From 2a7e484f22b5d332743287cadcfe81e919a81874 Mon Sep 17 00:00:00 2001 From: ddaniel27 Date: Fri, 14 Jun 2024 14:50:48 -0500 Subject: [PATCH 1/3] [NEW IMPLEMENTATION] RLE compression algorithm --- compression/rlecoding.go | 73 +++++++++++++++ compression/rlecoding_test.go | 161 ++++++++++++++++++++++++++++++++++ 2 files changed, 234 insertions(+) create mode 100644 compression/rlecoding.go create mode 100644 compression/rlecoding_test.go diff --git a/compression/rlecoding.go b/compression/rlecoding.go new file mode 100644 index 000000000..c3995acc4 --- /dev/null +++ b/compression/rlecoding.go @@ -0,0 +1,73 @@ +/* +rlecoding.go +descrption: run length encoding and decoding +details: +Run-length encoding (RLE) is a simple form of data compression in which runs of data are stored as a single data value and count, rather than as the original run. This is useful when the data contains many repeated values. For example, the data "WWWWWWWWWWWWBWWWWWWWWWWWWBBB" can be compressed to "12W1B12W3B". The algorithm is simple and can be implemented in a few lines of code. +author(s) [ddaniel27](https://github.com/ddaniel27) +*/ +package compression + +import ( + "bytes" + "fmt" + "regexp" + "strconv" + "strings" +) + +// RLEncode takes a string and returns its run-length encoding +func RLEncode(data string) string { + var result string + count := 1 + for i := 0; i < len(data); i++ { + if i+1 < len(data) && data[i] == data[i+1] { + count++ + continue + } + result += fmt.Sprintf("%d%c", count, data[i]) + count = 1 + } + return result +} + +// RLEdecode takes a run-length encoded string and returns the original string +func RLEdecode(data string) string { + var result string + regex := regexp.MustCompile(`(\d+)(\w)`) + + for _, match := range regex.FindAllStringSubmatch(data, -1) { + num, _ := strconv.Atoi(match[1]) + result += strings.Repeat(match[2], num) + } + + return result +} + +// RLEncodebytes takes a byte slice and returns its run-length encoding as a byte slice +func RLEncodebytes(data []byte) []byte { + var result []byte + var count byte = 1 + + for i := 0; i < len(data); i++ { + if i+1 < len(data) && data[i] == data[i+1] { + count++ + continue + } + result = append(result, count, data[i]) + count = 1 + } + + return result +} + +// RLEdecodebytes takes a run-length encoded byte slice and returns the original byte slice +func RLEdecodebytes(data []byte) []byte { + var result []byte + + for i := 0; i < len(data); i += 2 { + count := int(data[i]) + result = append(result, bytes.Repeat([]byte{data[i+1]}, count)...) + } + + return result +} diff --git a/compression/rlecoding_test.go b/compression/rlecoding_test.go new file mode 100644 index 000000000..0dd286f3b --- /dev/null +++ b/compression/rlecoding_test.go @@ -0,0 +1,161 @@ +package compression_test + +import ( + "bytes" + "testing" + + "github.com/TheAlgorithms/Go/compression" +) + +func TestCompression_RLEncode(t *testing.T) { + tests := []struct { + name string + data string + want string + }{ + { + name: "test 1", + data: "WWWWWWWWWWWWBWWWWWWWWWWWWBBB", + want: "12W1B12W3B", + }, + { + name: "test 2", + data: "AABCCCDEEEE", + want: "2A1B3C1D4E", + }, + { + name: "test 3", + data: "AAAABBBCCDA", + want: "4A3B2C1D1A", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := compression.RLEncode(tt.data); got != tt.want { + t.Errorf("RLEncode() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestCompression_RLEdecode(t *testing.T) { + tests := []struct { + name string + data string + want string + }{ + { + name: "test 1", + data: "12W1B12W3B", + want: "WWWWWWWWWWWWBWWWWWWWWWWWWBBB", + }, + { + name: "test 2", + data: "2A1B3C1D4E", + want: "AABCCCDEEEE", + }, + { + name: "test 3", + data: "4A3B2C1D1A", + want: "AAAABBBCCDA", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := compression.RLEdecode(tt.data); got != tt.want { + t.Errorf("RLEdecode() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestCompression_RLEncodebytes(t *testing.T) { + tests := []struct { + name string + data []byte + want []byte + }{ + { + name: "test 1", + data: []byte("WWWWWWWWWWWWBWWWWWWWWWWWWBBB"), + want: []byte{12, 'W', 1, 'B', 12, 'W', 3, 'B'}, + }, + { + name: "test 2", + data: []byte("AABCCCDEEEE"), + want: []byte{2, 'A', 1, 'B', 3, 'C', 1, 'D', 4, 'E'}, + }, + { + name: "test 3", + data: []byte("AAAABBBCCDA"), + want: []byte{4, 'A', 3, 'B', 2, 'C', 1, 'D', 1, 'A'}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := compression.RLEncodebytes(tt.data); !bytes.Equal(got, tt.want) { + t.Errorf("RLEncodebytes() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestCompression_RLEdecodebytes(t *testing.T) { + tests := []struct { + name string + data []byte + want []byte + }{ + { + name: "test 1", + data: []byte{12, 'W', 1, 'B', 12, 'W', 3, 'B'}, + want: []byte("WWWWWWWWWWWWBWWWWWWWWWWWWBBB"), + }, + { + name: "test 2", + data: []byte{2, 'A', 1, 'B', 3, 'C', 1, 'D', 4, 'E'}, + want: []byte("AABCCCDEEEE"), + }, + { + name: "test 3", + data: []byte{4, 'A', 3, 'B', 2, 'C', 1, 'D', 1, 'A'}, + want: []byte("AAAABBBCCDA"), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := compression.RLEdecodebytes(tt.data); !bytes.Equal(got, tt.want) { + t.Errorf("RLEdecodebytes() = %v, want %v", got, tt.want) + } + }) + } +} + +/* --- BENCHMARKS --- */ +func BenchmarkRLEncode(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = compression.RLEncode("WWWWWWWWWWWWBWWWWWWWWWWWWBBB") + } +} + +func BenchmarkRLEdecode(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = compression.RLEdecode("12W1B12W3B") + } +} + +func BenchmarkRLEncodebytes(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = compression.RLEncodebytes([]byte("WWWWWWWWWWWWBWWWWWWWWWWWWBBB")) + } +} + +func BenchmarkRLEdecodebytes(b *testing.B) { + for i := 0; i < b.N; i++ { + _ = compression.RLEdecodebytes([]byte{12, 'W', 1, 'B', 12, 'W', 3, 'B'}) + } +} From d5a5f88bcdf4da150f5b351e78f7123dd95c5c03 Mon Sep 17 00:00:00 2001 From: ddaniel27 Date: Sun, 23 Jun 2024 09:47:28 -0500 Subject: [PATCH 2/3] [FIX] Fix typo --- compression/rlecoding.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compression/rlecoding.go b/compression/rlecoding.go index c3995acc4..c08d3e4bd 100644 --- a/compression/rlecoding.go +++ b/compression/rlecoding.go @@ -1,6 +1,6 @@ /* rlecoding.go -descrption: run length encoding and decoding +description: run length encoding and decoding details: Run-length encoding (RLE) is a simple form of data compression in which runs of data are stored as a single data value and count, rather than as the original run. This is useful when the data contains many repeated values. For example, the data "WWWWWWWWWWWWBWWWWWWWWWWWWBBB" can be compressed to "12W1B12W3B". The algorithm is simple and can be implemented in a few lines of code. author(s) [ddaniel27](https://github.com/ddaniel27) From db4b3df8e7022432ac27daaeda6d364f4b739687 Mon Sep 17 00:00:00 2001 From: ddaniel27 Date: Mon, 24 Jun 2024 23:08:49 -0500 Subject: [PATCH 3/3] [FIX] Suggestion added --- compression/rlecoding_test.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/compression/rlecoding_test.go b/compression/rlecoding_test.go index 0dd286f3b..bf9af6bf6 100644 --- a/compression/rlecoding_test.go +++ b/compression/rlecoding_test.go @@ -7,7 +7,7 @@ import ( "github.com/TheAlgorithms/Go/compression" ) -func TestCompression_RLEncode(t *testing.T) { +func TestCompressionRLEncode(t *testing.T) { tests := []struct { name string data string @@ -39,7 +39,7 @@ func TestCompression_RLEncode(t *testing.T) { } } -func TestCompression_RLEdecode(t *testing.T) { +func TestCompressionRLEDecode(t *testing.T) { tests := []struct { name string data string @@ -71,7 +71,7 @@ func TestCompression_RLEdecode(t *testing.T) { } } -func TestCompression_RLEncodebytes(t *testing.T) { +func TestCompressionRLEncodeBytes(t *testing.T) { tests := []struct { name string data []byte @@ -103,7 +103,7 @@ func TestCompression_RLEncodebytes(t *testing.T) { } } -func TestCompression_RLEdecodebytes(t *testing.T) { +func TestCompressionRLEDecodeBytes(t *testing.T) { tests := []struct { name string data []byte @@ -142,19 +142,19 @@ func BenchmarkRLEncode(b *testing.B) { } } -func BenchmarkRLEdecode(b *testing.B) { +func BenchmarkRLEDecode(b *testing.B) { for i := 0; i < b.N; i++ { _ = compression.RLEdecode("12W1B12W3B") } } -func BenchmarkRLEncodebytes(b *testing.B) { +func BenchmarkRLEncodeBytes(b *testing.B) { for i := 0; i < b.N; i++ { _ = compression.RLEncodebytes([]byte("WWWWWWWWWWWWBWWWWWWWWWWWWBBB")) } } -func BenchmarkRLEdecodebytes(b *testing.B) { +func BenchmarkRLEDecodeBytes(b *testing.B) { for i := 0; i < b.N; i++ { _ = compression.RLEdecodebytes([]byte{12, 'W', 1, 'B', 12, 'W', 3, 'B'}) }