### Utils

In [1]:
import math
from pprint import pprint
from collections import Counter

class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

def sort_set(s):
    return sorted(s, key=lambda x: (len(x), x))

In [2]:
def find_codings(code: set, input_string: str,):
    """
    Find all valid sequences of codewords that can be used to encode the input string.

    Args:
        code: set of codewords
        input_string: input string
        current_coding: current partial coding

    Returns:
        list of all valid codings
    """

    def run(code: set, input_string: str, current_coding: list = None):
        results = []
        if not input_string:
            results.append(" ".join(current_coding))
            return results

        for word in code:
            if input_string.startswith(word):
                current_coding.append(word)
                results.extend(run(code, input_string[len(word) :], current_coding))
                current_coding.pop()

        return results
    
    return run(code, input_string, current_coding=[])

### Write a program in Python that applies the Sardinas-Patterson algorithm and returns the type of code received as input. You should distinguish the 3 different halt conditions.

In [3]:
def get_suffixes(code: set, s: dict):
    """Starting from two sets "code" and "s", representing S0 and Si, return the suffixes found"""
    suffixes = {}
    for s_word, s_seq in s.items():
        # Filter prefix/suffix in alphabet code
        prefix_codewords = [c_word for c_word in code if (c_word.startswith(s_word) or s_word.startswith(c_word) and c_word != s_word)]

        # Set c_word as the codewords in S0 and s_word as the codewords in Si
        for c_word in prefix_codewords:
            # Check if c is a prefix of s
            if c_word.startswith(s_word):
                suffix = c_word[len(s_word) :]
                if suffix:
                    suffixes[suffix] = sorted([c_word] + s_seq, key=lambda x: (x))
            # And vice versa
            elif s_word.startswith(c_word):
                suffix = s_word[len(c_word) :]
                if suffix:
                    suffixes[suffix] = sorted([c_word] + s_seq, key=lambda x: (x))

    return dict(sorted(suffixes.items()))


def is_uniquely_decodable(code: set, debug: bool = False):
    """Function that returns:
        - a bool if code is uniquely decodable
        - a sequence that prove the code is not UD
        - the witness of interection between S0 and Si
    """
    S0 = {word: [word] for word in code}
    all_S = [S0]

    i = 0
    max_tries = 1000
    for i in range(max_tries):
        Si = get_suffixes(code, all_S[i])
        print(f"S{i+1}: {sort_set(Si.keys()) if len(Si) > 0 else '[]'}")

        if debug:
            print(f"paths: {Si}")

        for suffix, seq in Si.items():
            if suffix in S0:
                print("Halt 1")
                ambiguous_sequence = seq[-1] + suffix
                return False, ambiguous_sequence, S0[suffix]  # Not uniquely decodable

        if not Si:
            print("Halt 2")
            return True, all_S, Si  # Uniquely decodable

        if any(Si.keys() == prev_suffixes.keys() for prev_suffixes in all_S):
            print("Halt 3")
            return True, all_S, Si  # Uniquely decodable

        all_S.append(Si)

    raise RuntimeError("Error, max tries reached")

### Tests

In [4]:
code = {"01", "0011", "2", "102", "0120"}
name = "E"
print("-" * len(f"{name}: {code}"))
print(f"{name}: {sort_set(code)}")
is_ud, seq1, intersection = is_uniquely_decodable(code)

if not is_ud:
    print(f"{bcolors.FAIL} Code {name} is Not Uniquely Decodable.")
    print(f"  Witness: {intersection}")
    print(f"Ambiguous sequence that proves not uniqueness: {seq1}")

    print(f"  {find_codings(code, seq1)} {bcolors.ENDC}")
else:
    print(f"{bcolors.OKGREEN} Code {name} is Uniquely Decodable. {bcolors.ENDC}")

-------------------------------------
E: ['2', '01', '102', '0011', '0120']
S1: ['20']
S2: ['0']
S3: ['1', '011', '120']
S4: ['1', '02']
S5: ['02']
S6: []
Halt 2
[92m Code E is Uniquely Decodable. [0m


In [5]:
code = {"00", "01", "10", "11"}
name = "A0"
print("-" * len(f"{name}: {code}"))
print(f"{name}: {sort_set(code)}")
is_ud, seq1, intersection = is_uniquely_decodable(code)

if not is_ud:
    print(f"{bcolors.FAIL} Code {name} is Not Uniquely Decodable.")
    print(f"  Witness: {intersection}")
    print(f"Ambiguous sequence that proves not uniqueness: {seq1}")

    print(f"  {find_codings(code, seq1)} {bcolors.ENDC}")
else:
    print(f"{bcolors.OKGREEN} Code {name} is Uniquely Decodable. {bcolors.ENDC}")


----------------------------
A0: ['00', '01', '10', '11']
S1: []
Halt 2
[92m Code A0 is Uniquely Decodable. [0m


In [6]:
code = {"0", "10", "110", "1110"}
name = "B0"
print("-" * len(f"{name}: {code}"))
print(f"{name}: {sort_set(code)}")
is_ud, seq1, intersection = is_uniquely_decodable(code)

if not is_ud:
    print(f"{bcolors.FAIL} Code {name} is Not Uniquely Decodable.")
    print(f"  Witness: {intersection}")
    print(f"Ambiguous sequence that proves not uniqueness: {seq1}")

    print(f"  {find_codings(code, seq1)} {bcolors.ENDC}")
else:
    print(f"{bcolors.OKGREEN} Code {name} is Uniquely Decodable. {bcolors.ENDC}")


------------------------------
B0: ['0', '10', '110', '1110']
S1: []
Halt 2
[92m Code B0 is Uniquely Decodable. [0m


In [16]:
code = {"0", "01", "10", "1"}
name = "D0"
print("-" * len(f"{name}: {code}"))
print(f"{name}: {sort_set(code)}")
is_ud, seq1, intersection = is_uniquely_decodable(code, True)

if not is_ud:
    print(f"{bcolors.FAIL} Code {name} is Not Uniquely Decodable.")
    print(f"  Witness: {intersection}")
    print(f"Ambiguous sequence that proves not uniqueness: {seq1}")

    print(f"  {find_codings(code, seq1)} {bcolors.ENDC}")
else:
    print(f"{bcolors.OKGREEN} Code {name} is Uniquely Decodable. {bcolors.ENDC}")

--------------------------
D0: ['0', '1', '01', '10']
S1: ['0', '1']
paths: {'0': ['1', '10'], '1': ['0', '01']}
Halt 1
[91m Code D0 is Not Uniquely Decodable.
  Witness: ['0']
Ambiguous sequence that proves not uniqueness: 100
  ['1 0 0', '10 0'] [0m


In [8]:
code = {"0", "01", "11"}
name = "UD"
print("-" * len(f"{name}: {code}"))
print(f"{name}: {sort_set(code)}")
is_ud, seq1, intersection = is_uniquely_decodable(code)

if not is_ud:
    print(f"{bcolors.FAIL} Code {name} is Not Uniquely Decodable.")
    print(f"  Witness: {intersection}")
    print(f"Ambiguous sequence that proves not uniqueness: {seq1}")

    print(f"  {find_codings(code, seq1)} {bcolors.ENDC}")
else:
    print(f"{bcolors.OKGREEN} Code {name} is Uniquely Decodable. {bcolors.ENDC}")

---------------------
UD: ['0', '01', '11']
S1: ['1']
S2: ['1']
Halt 3
[92m Code UD is Uniquely Decodable. [0m


### Apply the algorithm to test whether C={012, 0123, 4, 310, 1024, 2402, 2401, 4013} is UD.

In [9]:
code = {"012", "0123", "4", "310", "1024", "2402", "2401", "4013"}
name = "C1"
print("-" * len(f"{name}: {code}"))
print(f"{name}: {sort_set(code)}")
is_ud, seq1, intersection = is_uniquely_decodable(code)

if not is_ud:
    print(f"{bcolors.FAIL} Code {name} is Not Uniquely Decodable.")
    print(f"  Witness: {intersection}")
    print(f"Ambiguous sequence that proves not uniqueness: {seq1}")

    print(f"  {find_codings(code, seq1)} {bcolors.ENDC}")
else:
    print(f"{bcolors.OKGREEN} Code {name} is Uniquely Decodable. {bcolors.ENDC}")

---------------------------------------------------------------
C1: ['4', '012', '310', '0123', '1024', '2401', '2402', '4013']
S1: ['3', '013']
S2: ['10']
S3: ['24']
S4: ['01', '02']
S5: ['2', '23']
S6: ['401', '402']
S7: ['3', '01', '02']
S8: ['2', '10', '23']
S9: ['24', '401', '402']
S10: ['3', '01', '02']
Halt 3
[92m Code C1 is Uniquely Decodable. [0m


### Apply the algorithm to verify which of the following codes are UD

In [15]:
code = {"10", "010", "1", "1110"}
name = "C1"
print("-" * len(f"{name}: {code}"))
print(f"{name}: {code}")
is_ud, seq1, intersection = is_uniquely_decodable(code, True)

if not is_ud:
    print(f"{bcolors.FAIL} Code {name} is Not Uniquely Decodable.")
    print(f"  Witness: {intersection}")
    print(f"Ambiguous sequence that proves not uniqueness: {seq1}")

    print(f"  {find_codings(code, seq1)} {bcolors.ENDC}")
else:
    print(f"{bcolors.OKGREEN} Code {name} is Uniquely Decodable. {bcolors.ENDC}")

------------------------------
C1: {'1', '10', '010', '1110'}
S1: ['0', '110']
paths: {'0': ['1', '10'], '110': ['1', '1110']}
S2: ['10']
paths: {'10': ['1', '1', '1110']}
Halt 1
[91m Code C1 is Not Uniquely Decodable.
  Witness: ['10']
Ambiguous sequence that proves not uniqueness: 111010
  ['1 1 1 010', '1 1 10 10', '1110 10'] [0m


In [11]:
code = {"0", "001", "101", "11"}
name = "C2"
print("-" * len(f"{name}: {code}"))
print(f"{name}: {code}")
is_ud, seq1, intersection = is_uniquely_decodable(code)

if not is_ud:
    print(f"{bcolors.FAIL} Code {name} is Not Uniquely Decodable.")
    print(f"  Witness: {intersection}")
    print(f"Ambiguous sequence that proves not uniqueness: {seq1}")

    print(f"  {find_codings(code, seq1)} {bcolors.ENDC}")
else:
    print(f"{bcolors.OKGREEN} Code {name} is Uniquely Decodable. {bcolors.ENDC}")

-----------------------------
C2: {'11', '0', '001', '101'}
S1: ['01']
S2: ['1']
S3: ['1', '01']
S4: ['1', '01']
Halt 3
[92m Code C2 is Uniquely Decodable. [0m


In [12]:
code = {"0", "2", "03", "011", "104", "341", "11234"}
name = "C3"
print("-" * len(f"{name}: {code}"))
print(f"{name}: {sort_set(code)}")
is_ud, seq1, intersection = is_uniquely_decodable(code)

if not is_ud:
    print(f"{bcolors.FAIL} Code {name} is Not Uniquely Decodable.")
    print(f"  Witness: {intersection}")
    print(f"Ambiguous sequence that proves not uniqueness: {seq1}")

    print(f"  {find_codings(code, seq1)} {bcolors.ENDC}")
else:
    print(f"{bcolors.OKGREEN} Code {name} is Uniquely Decodable. {bcolors.ENDC}")

--------------------------------------------------
C3: ['0', '2', '03', '011', '104', '341', '11234']
S1: ['3', '11']
S2: ['41', '234']
S3: ['34']
S4: ['1']
S5: ['04', '1234']
S6: ['4']
S7: []
Halt 2
[92m Code C3 is Uniquely Decodable. [0m


In [13]:
code = {"01", "10", "001", "100", "000", "111"}
name = "C4"
print("-" * len(f"{name}: {code}"))
print(f"{name}: {sort_set(code)}")
is_ud, seq1, intersection = is_uniquely_decodable(code)

if not is_ud:
    print(f"{bcolors.FAIL} Code {name} is Not Uniquely Decodable.")
    print(f"  Witness: {intersection}")
    print(f"Ambiguous sequence that proves not uniqueness: {seq1}")

    print(f"  {find_codings(code, seq1)} {bcolors.ENDC}")
else:
    print(f"{bcolors.OKGREEN} Code {name} is Uniquely Decodable. {bcolors.ENDC}")

--------------------------------------------
C4: ['01', '10', '000', '001', '100', '111']
S1: ['0']
S2: ['1', '00', '01']
Halt 1
[91m Code C4 is Not Uniquely Decodable.
  Witness: ['01']
Ambiguous sequence that proves not uniqueness: 10001
  ['100 01', '10 001'] [0m


In [14]:
code = {"0", "01", "011", "0111"}
name = "C5"
print("-" * len(f"{name}: {code}"))
print(f"{name}: {sort_set(code)}")
is_ud, seq1, intersection = is_uniquely_decodable(code)

if not is_ud:
    print(f"{bcolors.FAIL} Code {name} is Not Uniquely Decodable.")
    print(f"  Witness: {intersection}")
    print(f"Ambiguous sequence that proves not uniqueness: {seq1}")

    print(f"  {find_codings(code, seq1)} {bcolors.ENDC}")
else:
    print(f"{bcolors.OKGREEN} Code {name} is Uniquely Decodable. {bcolors.ENDC}")


------------------------------
C5: ['0', '01', '011', '0111']
S1: ['1', '11', '111']
S2: []
Halt 2
[92m Code C5 is Uniquely Decodable. [0m
