## String normalization

In [1]:
from nimbro_utils.lazy import escape
text = f"🚧 🚜 This prôject page is a {escape['red']}work{escape['end']}\nin progress before   [final] publication!! 🚜 🚧"
print(text)

🚧 🚜 This prôject page is a [91mwork[0m
in progress before   [final] publication!! 🚜 🚧


### normalize_string()

In [2]:
from nimbro_utils.lazy import normalize_string
print(normalize_string(string=text))

This project page is a work
in progress before   [final] publication!!


### remove_unicode()

In [3]:
from nimbro_utils.lazy import remove_unicode
print(remove_unicode(string=text))

This project page is a [91mwork[0m
in progress before   [final] publication!!


### remove_whitespace()

In [4]:
from nimbro_utils.lazy import remove_whitespace
print(remove_whitespace(string=text))
print(remove_whitespace(string=text, reduce_to_single_space=True))

🚧🚜Thisprôjectpageisa[91mwork[0minprogressbefore[final]publication!!🚜🚧
🚧 🚜 This prôject page is a [91mwork[0m in progress before [final] publication!! 🚜 🚧


### remove_non_alpha_numeric()

In [5]:
from nimbro_utils.lazy import remove_non_alpha_numeric
print(remove_non_alpha_numeric(string=text))

Thisprjectpageisa91mwork0minprogressbeforefinalpublication


### remove_emoji()

In [6]:
from nimbro_utils.lazy import remove_emoji
print(remove_emoji(string=text))

This prôject page is a [91mwork[0m
in progress before   [final] publication!!


### remove_ansi_escape()

In [7]:
from nimbro_utils.lazy import remove_ansi_escape
print(remove_ansi_escape(string=text))

🚧 🚜 This prôject page is a work
in progress before   [final] publication!! 🚜 🚧


## String analysis

### levenshtein()

In [8]:
from nimbro_utils.lazy import levenshtein
print(levenshtein(string_a="test", string_b="Testing"))
print(levenshtein(string_a="test", string_b="Testing", normalization=True))

4
3


### levenshtein_match()

In [9]:
from nimbro_utils.lazy import levenshtein_match
match = levenshtein_match(word="Toy Car", labels=["apple", "toy", "toy_car", "coat_stand"], threshold=100, normalization=False)
print(match)
match = levenshtein_match(word="Toy Car", labels=["apple", "toy", "toy_car", "coat_stand"], threshold=3, normalization=False)
print(match)
match = levenshtein_match(word="Toy Car", labels=["apple", "toy", "toy_car", "coat_stand"], threshold=2, normalization=False)
print(match)
match = levenshtein_match(word="Toy Car", labels=["apple", "toy", "toy_car", "coat_stand"], threshold=0, normalization=True)
print(match)
match = levenshtein_match(word="Apples", labels=["apple", "toy", "toy_car", "coat_stand"], threshold=0, normalization=True)
print(match)

toy_car
toy_car
None
toy_car
None


### is_url()

In [10]:
from nimbro_utils.lazy import is_url
print("http://vlm-gist.github.io/", is_url("http://vlm-gist.github.io/"))
print("http://vlm-gist.github.io", is_url("http://vlm-gist.github.io"))
print("http:/vlm-gist.github.io/", is_url("http:/vlm-gist.github.io/"))
print("https://vlm-gist.github.io/", is_url("https://vlm-gist.github.io/"))
print("vlm-gist.github.io", is_url("vlm-gist.github.io"))

http://vlm-gist.github.io/ True
http://vlm-gist.github.io True
http:/vlm-gist.github.io/ False
https://vlm-gist.github.io/ True
vlm-gist.github.io False


### is_attribute_name()

In [11]:
from nimbro_utils.lazy import is_attribute_name
print("var", is_attribute_name("var"))
print("vâr", is_attribute_name("vâr")) # sepcial character
print("my var", is_attribute_name("my var")) # whitespace
print("str", is_attribute_name("str")) # shaddowing type is allowed
print("def", is_attribute_name("def")) # Python keyword
print("raise", is_attribute_name("raise")) # Python keyword

var True
vâr True
my var False
str True
def False
raise False


### count_tokens()

In [12]:
from nimbro_utils.lazy import count_tokens
print(count_tokens(string="This is a test sentence.", encoding_name="cl100k_base"))

6


### split_sentences()

In [13]:
from nimbro_utils.lazy import split_sentences
sentences = split_sentences("This is the first sentence. Is this the second one? Yes, it was !!")
for i, sentence in enumerate(sentences):
    print(f"{i}:", sentence)

0: This is the first sentence.
1: Is this the second one?
2: Yes, it was !!


### extract_json()

In [14]:
from nimbro_utils.lazy import extract_json
text = """
```json
{}
[
    {
      "test": ""
    },
    {
      "joke": "Why did the st{u}dent eat his homework? Because the teacher said it was a piece of cake!"
    }
]
```
"""
print(extract_json(string=text, first_over_longest=True))
print(extract_json(string=text, first_over_longest=False))

{}
[{'test': ''}, {'joke': 'Why did the st{u}dent eat his homework? Because the teacher said it was a piece of cake!'}]
