Skip to content

Commit d34d5c2

Browse files
1fisedisiriak
authored andcommitted
Refresh shannon-fano algorithm and add tests (#68)
* Refresh shannon-fano algorithm and add tests * Reduce shannon-fano complexity
1 parent bd21eac commit d34d5c2

File tree

2 files changed

+339
-0
lines changed

2 files changed

+339
-0
lines changed
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
using Algorithms.DataCompression;
2+
using NUnit.Framework;
3+
4+
namespace Algorithms.Tests.DataCompression
5+
{
6+
public static class ShannonFanoTests
7+
{
8+
[Test]
9+
[Parallelizable]
10+
public static void ThatAlgorithWorks()
11+
{
12+
// Arrange
13+
var shannonFano = new ShannonFano();
14+
15+
const string phrase = "This is a string";
16+
17+
// Act
18+
var result = shannonFano.Compress(phrase);
19+
20+
// Assert
21+
Assert.IsNotEmpty(result);
22+
Assert.AreEqual("1001111000110100001101011100100111001101001100101", result);
23+
}
24+
25+
[Test]
26+
[Parallelizable]
27+
public static void ThatAlgorithWorksWord()
28+
{
29+
// Arrange
30+
var shannonFano = new ShannonFano();
31+
32+
const string word = "Hello";
33+
34+
// Act
35+
var result = shannonFano.Compress(word);
36+
37+
// Assert
38+
Assert.IsNotEmpty(result);
39+
Assert.AreEqual("1111100010", result);
40+
}
41+
}
42+
}
Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
6+
namespace Algorithms.DataCompression
7+
{
8+
public class ShannonFano
9+
{
10+
private class FanoNode
11+
{
12+
public float Probability;
13+
public readonly int[] Arr = new int[20];
14+
public int Top;
15+
}
16+
17+
/// <summary>
18+
/// The input text length.
19+
/// </summary>
20+
private int Len { get; set; }
21+
22+
private int Pos { get; set; }
23+
24+
public string Compress(string inputText)
25+
{
26+
var str = inputText.ToLowerInvariant().Replace(" ", "#");
27+
Len = str.Length;
28+
29+
var textAsCharArray = str.ToCharArray();
30+
var initCharCountArray = GetCharFrequencies(textAsCharArray);
31+
32+
var (fFreqArray, fCharArray) =
33+
FlagCharsByFrequency(initCharCountArray, textAsCharArray);
34+
35+
SortArrays(fFreqArray, fCharArray);
36+
37+
var pArr = GetProbabilities(fFreqArray);
38+
39+
Shannon(0, Pos - 1, pArr);
40+
41+
var code = GetShannonCode(pArr);
42+
43+
var finalStr = GetShannonString(str, fCharArray, code);
44+
return finalStr;
45+
}
46+
47+
/// <summary>
48+
/// Find the frequency(# of incidences) for each caracter on the text.
49+
/// </summary>
50+
/// <returns>Updateds frequency Array</returns>
51+
private int[] GetCharFrequencies(IReadOnlyList<char> text)
52+
{
53+
var temp = new int[Len];
54+
var count = 1;
55+
56+
for (var i = 0; i < Len; i++)
57+
{
58+
for (var j = i + 1; j < Len; j++)
59+
{
60+
if (text[i] == text[j])
61+
{
62+
count++;
63+
}
64+
}
65+
66+
temp[i] = count;
67+
count = 1;
68+
}
69+
70+
return temp;
71+
}
72+
73+
/// <summary>
74+
/// Segregated chars by frequency.
75+
/// </summary>
76+
/// <param name="countArray">initial count per char</param>
77+
/// <param name="textAsCharArray">text message as array of chars.</param>
78+
/// <returns>Filtered arrays</returns>
79+
private Tuple<int[], char[]> FlagCharsByFrequency(
80+
IReadOnlyList<int> countArray, IReadOnlyList<char> textAsCharArray)
81+
{
82+
var filteredFreqArray = new int[Len];
83+
var filteredCharArray = new char[Len];
84+
85+
var flag = false;
86+
87+
for (var i = 0; i < Len; i++)
88+
{
89+
for (var j = 0; j < Len; j++)
90+
{
91+
if (textAsCharArray[i] == filteredCharArray[j])
92+
{
93+
flag = true;
94+
}
95+
}
96+
97+
if (!flag)
98+
{
99+
filteredCharArray[Pos] = textAsCharArray[i];
100+
filteredFreqArray[Pos] = countArray[i];
101+
Pos++;
102+
}
103+
104+
flag = false;
105+
}
106+
107+
return new Tuple<int[], char[]>(filteredFreqArray, filteredCharArray);
108+
}
109+
110+
/// <summary>
111+
/// Sorts arrays symbols based on frequency.
112+
/// </summary>
113+
private void SortArrays(IList<int> fFreqArray, IList<char> fCharArray)
114+
{
115+
for (var i = 0; i < Pos; i++)
116+
{
117+
for (var j = i + 1; j < Pos; j++)
118+
{
119+
if (fFreqArray[i] <= fFreqArray[j])
120+
{
121+
continue;
122+
}
123+
124+
var temp = fFreqArray[i];
125+
var ch = fCharArray[i];
126+
fFreqArray[i] = fFreqArray[j];
127+
fCharArray[i] = fCharArray[j];
128+
fFreqArray[j] = temp;
129+
fCharArray[j] = ch;
130+
}
131+
}
132+
}
133+
134+
/// <summary>
135+
/// Given the frequency array of the input text
136+
/// gets the arithmetic mean for each character.
137+
/// </summary>
138+
/// <param name="fFreqArray">Char freq array.</param>
139+
/// <returns></returns>
140+
private List<FanoNode> GetProbabilities(IReadOnlyList<int> fFreqArray)
141+
{
142+
var f = new List<FanoNode>();
143+
144+
for (var i = 0; i < Pos; i++)
145+
{
146+
var prob = fFreqArray[i] / (double)Len;
147+
f.Add(new FanoNode
148+
{
149+
Probability = (float)prob,
150+
Top = -1
151+
});
152+
}
153+
return f;
154+
}
155+
156+
/// <summary>
157+
/// Joins the encoded string
158+
/// </summary>
159+
/// <param name="text">Text message</param>
160+
/// <param name="fCharArray">Frequency char array</param>
161+
/// <param name="code">encoding</param>
162+
/// <returns>Shannon String</returns>
163+
private static string GetShannonString(string text, char[] fCharArray, string[] code)
164+
{
165+
var sbl = new StringBuilder(" ");
166+
167+
foreach (var item in text)
168+
{
169+
var index = Array.IndexOf(fCharArray, item);
170+
sbl.Append(code.ElementAt(index));
171+
}
172+
173+
sbl.Replace(" ", "");
174+
175+
return sbl.ToString();
176+
}
177+
178+
/// <summary>
179+
/// Creates code.
180+
/// </summary>
181+
/// <param name="pArr">Probability array</param>
182+
/// <returns></returns>
183+
private string[] GetShannonCode(IReadOnlyList<FanoNode> pArr)
184+
{
185+
var code = new string[Pos];
186+
187+
for (var i = Pos - 1; i >= 0; i--)
188+
{
189+
var sbl = new StringBuilder(" ");
190+
191+
for (var j = 0; j <= pArr[i].Top; j++)
192+
{
193+
sbl.Append(pArr[i].Arr[j]);
194+
}
195+
196+
code[i] = sbl.ToString();
197+
}
198+
199+
return code;
200+
}
201+
202+
private static void UpdateTopValue(IReadOnlyList<FanoNode> f, int h, int tempIndex)
203+
{
204+
f[h].Arr[++f[h].Top] = 0;
205+
f[tempIndex].Arr[++f[tempIndex].Top] = 1;
206+
}
207+
208+
private static void UpdateTopValue(IReadOnlyList<FanoNode> f, int h, int tempIndex, int k)
209+
{
210+
int i;
211+
for (i = tempIndex; i <= k; i++)
212+
{
213+
f[i].Arr[++f[i].Top] = 1;
214+
}
215+
216+
for (i = k + 1; i <= h; i++)
217+
{
218+
f[i].Arr[++f[i].Top] = 0;
219+
}
220+
}
221+
222+
private static float GetInitDifference(IReadOnlyList<FanoNode> f, int h)
223+
{
224+
var set1 = f.AsEnumerable().Take(h).Sum(x => x.Probability);
225+
var set2 = f[h].Probability;
226+
227+
var diff1 = set1 - set2;
228+
if (diff1 < 0)
229+
{
230+
diff1 *= -1;
231+
}
232+
233+
return diff1;
234+
}
235+
236+
private static void Shannon(int l, int h, IReadOnlyList<FanoNode> f)
237+
{
238+
while (true)
239+
{
240+
var tempIndex = l;
241+
if (tempIndex == h || tempIndex > h)
242+
{
243+
return;
244+
}
245+
246+
if (tempIndex + 1 == h)
247+
{
248+
UpdateTopValue(f, h, tempIndex);
249+
}
250+
else
251+
{
252+
var diff1 = GetInitDifference(f, h);
253+
254+
var j = 2;
255+
var k = 0;
256+
257+
while (j != h - tempIndex + 1)
258+
{
259+
k = h - j;
260+
float set2 = 0;
261+
262+
var set1 = f.AsEnumerable().Take(k + 1).Sum(x => x.Probability);
263+
264+
int i;
265+
for (i = h; i > k; i--)
266+
{
267+
set2 += f[i].Probability;
268+
}
269+
270+
var diff2 = set1 - set2;
271+
if (diff2 < 0)
272+
{
273+
diff2 *= -1;
274+
}
275+
276+
if (diff2 >= diff1)
277+
{
278+
break;
279+
}
280+
281+
diff1 = diff2;
282+
j++;
283+
}
284+
285+
k++;
286+
UpdateTopValue(f, h, tempIndex, k);
287+
288+
Shannon(tempIndex, k, f);
289+
l = k + 1;
290+
continue;
291+
}
292+
293+
break;
294+
}
295+
}
296+
}
297+
}

0 commit comments

Comments
 (0)