Skip to content

Commit 03d21df

Browse files
Implement huffman encoding in ocaml (#750)
* implement huffman encoding in ocaml * Cleaning up some code and comments * Update contents/huffman_encoding/code/ocaml/huffman.ml Co-authored-by: Dimitri Belopopsky <ShadowMitia@users.noreply.github.com> * Update contents/huffman_encoding/code/ocaml/huffman.ml Co-authored-by: Dimitri Belopopsky <ShadowMitia@users.noreply.github.com> * Update contents/huffman_encoding/code/ocaml/huffman.ml Co-authored-by: Dimitri Belopopsky <ShadowMitia@users.noreply.github.com> * Update contents/huffman_encoding/code/ocaml/huffman.ml Co-authored-by: Dimitri Belopopsky <ShadowMitia@users.noreply.github.com> * Update contents/huffman_encoding/code/ocaml/huffman.ml Co-authored-by: Dimitri Belopopsky <ShadowMitia@users.noreply.github.com> * Update contents/huffman_encoding/code/ocaml/huffman.ml Co-authored-by: Dimitri Belopopsky <ShadowMitia@users.noreply.github.com> * Update contents/huffman_encoding/code/ocaml/huffman.ml Co-authored-by: Dimitri Belopopsky <ShadowMitia@users.noreply.github.com> * Update contents/huffman_encoding/code/ocaml/huffman.ml Co-authored-by: Dimitri Belopopsky <ShadowMitia@users.noreply.github.com> * Update contents/huffman_encoding/code/ocaml/huffman.ml Co-authored-by: Dimitri Belopopsky <ShadowMitia@users.noreply.github.com> * replace infix operator [@] with [List.append] Co-authored-by: Dimitri Belopopsky <ShadowMitia@users.noreply.github.com>
1 parent 83675ef commit 03d21df

File tree

1 file changed

+161
-0
lines changed

1 file changed

+161
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
2+
(* Takes in a string and a character that needs to be removed from
3+
the string that is input
4+
For eg. in a string : "Chennai" I want to remove char : 'n'
5+
such that we get the old value and the new value as a tuple
6+
7+
TL;DR : "Chennai" -> ("Cheai", "Chennai")
8+
*)
9+
let char_diff str ch =
10+
let res = String.concat "" (String.split_on_char ch str) in
11+
(res, str)
12+
13+
(* Converts "ABCD" -> ['A'; 'B'; 'C'; 'D'] *)
14+
let str_to_charlist s = List.init (String.length s) (String.get s)
15+
16+
(* Takes a word like "bibbity_bob" and converts to a tuple list of
17+
unique characters with their frequency
18+
19+
TL;DR :
20+
"bibbity_bobbity" ->
21+
[('b', 6); ('i', 3); ('t', 2); ('y', 2); ('_', 1); ('o', 1)]
22+
*)
23+
let counter str =
24+
let char_lst = str_to_charlist str in
25+
let rec loop acc str char_lst =
26+
match char_lst with
27+
| [] -> List.filter (fun (_,y) -> y != 0) (List.rev acc)
28+
|> List.map (fun (x, y) -> (Printf.sprintf "%c" x, y))
29+
| hd :: tl ->
30+
let (new_str, old_str) = char_diff str hd in
31+
loop
32+
((hd, (String.length old_str - String.length new_str)) :: acc)
33+
new_str tl in
34+
loop [] str char_lst
35+
36+
(* References -> https://ocaml.org/learn/tutorials/99problems.html *)
37+
38+
module Pq = struct
39+
type 'a t = {
40+
data: 'a list array;
41+
mutable first: int;
42+
}
43+
44+
let make size = {
45+
data = Array.make size [];
46+
first = size;
47+
}
48+
49+
let add q p x =
50+
q.data.(p) <- x :: q.data.(p);
51+
q.first <- min p q.first
52+
53+
let get_min q =
54+
if q.first = Array.length (q.data) then None
55+
else
56+
match q.data.(q.first) with
57+
| [] -> assert false
58+
| hd :: tl ->
59+
let p = q.first in
60+
q.data.(q.first) <- tl;
61+
while q.first < (Array.length (q.data)) && q.data.(q.first) = [] do
62+
q.first <- q.first + 1
63+
done;
64+
Some(p,hd)
65+
end
66+
67+
type tree = Leaf of string | Node of tree * tree
68+
69+
let rec create_huffman_tree q =
70+
match Pq.get_min q, Pq.get_min q with
71+
| Some(p1, t1), Some(p2, t2) ->
72+
Pq.add q (p1 + p2) (Node(t1, t2));
73+
create_huffman_tree q
74+
| Some(_, t), None | None, Some(_, t) -> t
75+
| None, None -> assert false
76+
77+
let rec prefixes_of_tree prefix trees = match trees with
78+
| Leaf s -> [(s, prefix)]
79+
| Node (t0, t1) ->
80+
List.append (prefixes_of_tree (prefix ^ "0") t0) (prefixes_of_tree (prefix ^ "1") t1)
81+
82+
let huffman huffman_tree = prefixes_of_tree "" huffman_tree
83+
84+
(* Helper functions *)
85+
let char_to_str = Printf.sprintf "%c"
86+
87+
let str_list msg =
88+
List.map char_to_str (str_to_charlist msg)
89+
90+
let list_to_string lst =
91+
String.concat "" lst
92+
93+
(* Encoding and decoding functions *)
94+
let encode codebook x =
95+
List.filter (fun (ch, _) -> ch = x) codebook |> fun x ->
96+
List.hd x |> snd
97+
98+
let encode_msg codebook msg =
99+
List.map (fun x -> encode codebook x) (str_list msg) |>
100+
list_to_string (List.map (fun x -> encode codebook x) (str_list msg))
101+
102+
let decode codebook key =
103+
List.find_opt (fun (_,code) -> key = code) codebook
104+
105+
106+
let decode_msg codebook msg =
107+
let decoded_message = ref "" in
108+
let code = ref "" in
109+
let msg_list = str_list msg in
110+
List.iter (fun bit ->
111+
code := !code ^ bit;
112+
match (decode codebook !code) with
113+
| None -> ()
114+
| Some v ->
115+
decoded_message := !decoded_message ^ (fst v);
116+
code := "";
117+
) msg_list;
118+
!decoded_message
119+
120+
(* Printing functions below *)
121+
let print_codebook codebook =
122+
let _ = Printf.printf "[\n" in
123+
let fmt_tup hd = Printf.sprintf "\t(%s, %s)" (fst hd) (snd hd) in
124+
let rec loop codebook = match codebook with
125+
| [] -> ()
126+
| hd :: [] ->
127+
let tup = fmt_tup hd in
128+
Printf.printf "%s\n]\n" tup
129+
| hd :: tl ->
130+
let tup = fmt_tup hd in
131+
Printf.printf "%s,\n" tup;
132+
loop tl in
133+
loop codebook
134+
135+
let rec print_huffman_tree huffman_tree =
136+
match huffman_tree with
137+
| Leaf a -> Printf.sprintf "%s" a
138+
| Node (l, r) ->
139+
let fmt_l = print_huffman_tree l in
140+
let fmt_r = print_huffman_tree r in
141+
Printf.sprintf "[%s,%s]" fmt_l fmt_r
142+
143+
144+
(* Main Function *)
145+
let _ =
146+
let message = "bibbity_bobbity" in
147+
let freq_ch_list = counter message in
148+
let size = List.fold_left (fun sum (_,p) -> sum + p) 0 freq_ch_list in
149+
let queue = Pq.make (size + 2) in
150+
let _ = List.iter (fun (s,f) -> Pq.add queue f (Leaf s)) freq_ch_list in
151+
let huffman_tree = create_huffman_tree queue in
152+
let codebook = huffman huffman_tree in
153+
let encoded_message = encode_msg codebook message in
154+
let decoded_message = decode_msg codebook encoded_message in
155+
let _ = Printf.printf "Message : %s\n" message in
156+
let _ = print_huffman_tree huffman_tree |> fun x ->
157+
Printf.printf "Huffman Tree : %s\n" x in
158+
let _ = Printf.printf "Codebook : ";print_codebook codebook in
159+
let _ = Printf.printf "Encoded Message : %s\n" encoded_message in
160+
let _ = Printf.printf "Decoded Message : %s\n" decoded_message in
161+
()

0 commit comments

Comments
 (0)