# Removing emoji using Regex

In [1]:
import re

In [2]:
def remove_emojis(text):
    # Define a regex pattern to match emoji characters
    emoji_pattern = re.compile(
        "["
        "\U0001F600-\U0001F64F"  # Emoticons
        "\U0001F300-\U0001F5FF"  # Symbols & Pictographs
        "\U0001F680-\U0001F6FF"  # Transport & Map Symbols
        "\U0001F1E0-\U0001F1FF"  # Flags (iOS)
        "\U00002702-\U000027B0"  # Dingbats
        "\U000024C2-\U0001F251"  # Enclosed Characters
        "]+",
        flags=re.UNICODE
    )

    # Replace emojis with an empty string
    return emoji_pattern.sub(r'', text)

In [3]:
text_with_emojis = "Hello 👋, I am excited 😄 about this! Let's code 🖥️ together."


In [4]:
text_without_emojis = remove_emojis(text_with_emojis)


In [5]:
text_without_emojis

"Hello , I am excited  about this! Let's code  together."

# Replacing the emoji with UTF endoded character

In [6]:
import re

In [7]:
def emoji_to_utf(emoji):
    """Convert a single emoji to its UTF-8 code."""
    # Encode the emoji to UTF-8 bytes
    utf_bytes = emoji.encode('utf-8')
    
    # Convert each byte to a hex string and join them with space
    utf_code = ' '.join(f'U+{byte:02X}' for byte in utf_bytes)
    
    return utf_code

In [8]:
def replace_emojis_with_utf(text):
    """Replace all emojis in the given text with their UTF-8 codes."""
    # Define a regex pattern to match emoji characters
    emoji_pattern = re.compile(
        "["
        "\U0001F600-\U0001F64F"  # Emoticons
        "\U0001F300-\U0001F5FF"  # Symbols & Pictographs
        "\U0001F680-\U0001F6FF"  # Transport & Map Symbols
        "\U0001F1E0-\U0001F1FF"  # Flags (iOS)
        "\U00002702-\U000027B0"  # Dingbats
        "\U000024C2-\U0001F251"  # Enclosed Characters
        "]+",
        flags=re.UNICODE
    )
    
    # Replace each emoji in the text with its UTF-8 code
    return emoji_pattern.sub(lambda match: emoji_to_utf(match.group(0)), text)

In [9]:
text_with_emojis = "Hello 👋, I am excited 😄 about this! Let's code 🖥️ together. Go 🇺🇸!"

In [10]:
text_with_utf = replace_emojis_with_utf(text_with_emojis)

In [11]:
text_with_utf

"Hello U+F0 U+9F U+91 U+8B, I am excited U+F0 U+9F U+98 U+84 about this! Let's code U+F0 U+9F U+96 U+A5 U+EF U+B8 U+8F together. Go U+F0 U+9F U+87 U+BA U+F0 U+9F U+87 U+B8!"