In [None]:
from emoji import EMOJI_DATA

emoji_list = list(EMOJI_DATA.keys())


In [4]:
len(emoji_list)

5042

In [6]:
from bs4 import BeautifulSoup
from tqdm import tqdm
import random
import os
import emoji

class EmojiInjectionGenerator:
    
    def __init__(self):
        self.font_path = '/Users/promachowdhury/fonts/NotoColorEmoji.ttf'
        self.text_selectors = ['.time', '.headline', '.label-text', '.title', '.date', '.label-text11', '.label-text12']
        self.setup_emojis()

    def setup_emojis(self):
        all_emojis = list(emoji.EMOJI_DATA.keys())
        
        self.emoji_cats = {
            'faces': [e for e in all_emojis if any(word in emoji.demojize(e).lower() 
                     for word in ['face', 'smile', 'grin', 'joy', 'laugh', 'wink', 'cry', 'angry'])][:50],
            'animals': [e for e in all_emojis if any(word in emoji.demojize(e).lower() 
                       for word in ['dog', 'cat', 'bird', 'fish', 'bear', 'lion', 'tiger', 'monkey'])][:30],
            'objects': [e for e in all_emojis if any(word in emoji.demojize(e).lower() 
                       for word in ['ball', 'car', 'phone', 'computer', 'book', 'music', 'game'])][:30],
            'food': [e for e in all_emojis if any(word in emoji.demojize(e).lower() 
                    for word in ['food', 'pizza', 'burger', 'cake', 'coffee', 'drink', 'fruit'])][:30],
            'hearts': [e for e in all_emojis if any(word in emoji.demojize(e).lower() 
                      for word in ['heart', 'love'])][:20],
            'random': random.sample(all_emojis, min(100, len(all_emojis)))
        }

    def find_text_elements(self, html_content):
        soup = BeautifulSoup(html_content, 'html.parser')
        text_elements = []
        
        for selector in self.text_selectors:
            elements = soup.select(selector)
            for idx, element in enumerate(elements):
                text_content = element.get_text(strip=True)
                if text_content and len(text_content) > 0:
                    text_elements.append({
                        'element': element,
                        'text': text_content,
                        'selector': selector,
                        'index': idx
                    })
        
        return text_elements

    def inject_emoji(self, text, emoji_char, position='random'):
        if len(text) <= 1:
            return text + emoji_char
        
        if position == 'start':
            return emoji_char + text
        elif position == 'end':
            return text + emoji_char
        elif position == 'middle':
            mid_point = len(text) // 2
            return text[:mid_point] + emoji_char + text[mid_point:]
        elif position == 'random':
            insert_pos = random.randint(1, len(text))
            return text[:insert_pos] + emoji_char + text[insert_pos:]
        elif position == 'between_words':
            words = text.split(' ')
            if len(words) > 1:
                word_pos = random.randint(1, len(words))
                words.insert(word_pos, emoji_char)
                return ' '.join(words)
            else:
                return text + emoji_char
        else:
            return text + emoji_char

    def make_single_variant(self, html_content, css_content, variant_id):
        soup = BeautifulSoup(html_content, 'html.parser')
        text_elements = self.find_text_elements(html_content)
        
        if len(text_elements) == 0:
            return html_content, css_content
        
        category = random.choice(list(self.emoji_cats.keys()))
        position_style = random.choice(['random', 'middle', 'between_words', 'start', 'end'])
        
        emoji_css = f"""
@font-face {{
    font-family: 'NotoColorEmoji';
    src: url('{self.font_path}') format('truetype');
    font-display: swap;
}}

.emoji-text {{
    font-family: 'NotoColorEmoji', sans-serif !important;
}}
"""
        
        for elem_idx, element_info in enumerate(text_elements):
            found_elements = soup.select(element_info['selector'])
            if element_info['index'] < len(found_elements):
                target_element = found_elements[element_info['index']]
                
                emoji_char = random.choice(self.emoji_cats[category])
                
                original_text = target_element.get_text(strip=True)
                new_text = self.inject_emoji(original_text, emoji_char, position_style)
                
                target_element.string = new_text
                existing_classes = target_element.get('class', [])
                target_element['class'] = existing_classes + ['emoji-text']
        
        updated_css = css_content + "\n\n" + emoji_css
        
        return str(soup), updated_css

    def make_multi_variant(self, html_content, css_content, variant_id):
        soup = BeautifulSoup(html_content, 'html.parser')
        text_elements = self.find_text_elements(html_content)
        
        if len(text_elements) == 0:
            return html_content, css_content
        
        emoji_css = f"""
@font-face {{
    font-family: 'NotoColorEmoji';
    src: url('{self.font_path}') format('truetype');
    font-display: swap;
}}

.multi-emoji-text {{
    font-family: 'NotoColorEmoji', sans-serif !important;
}}
"""
        
        for elem_idx, element_info in enumerate(text_elements):
            found_elements = soup.select(element_info['selector'])
            if element_info['index'] < len(found_elements):
                target_element = found_elements[element_info['index']]
                
                num_emojis = random.randint(2, 4)
                selected_emojis = []
                
                for _ in range(num_emojis):
                    category = random.choice(list(self.emoji_cats.keys()))
                    emoji_char = random.choice(self.emoji_cats[category])
                    selected_emojis.append(emoji_char)
                
                original_text = target_element.get_text(strip=True)
                modified_text = original_text
                
                for emoji_char in selected_emojis:
                    position = random.choice(['random', 'middle', 'between_words'])
                    modified_text = self.inject_emoji(modified_text, emoji_char, position)
                
                target_element.string = modified_text
                existing_classes = target_element.get('class', [])
                target_element['class'] = existing_classes + ['multi-emoji-text']
        
        updated_css = css_content + "\n\n" + emoji_css
        
        return str(soup), updated_css

    def make_chaos_variant(self, html_content, css_content, variant_id):
        soup = BeautifulSoup(html_content, 'html.parser')
        text_elements = self.find_text_elements(html_content)
        
        if len(text_elements) == 0:
            return html_content, css_content
        
        emoji_css = f"""
@font-face {{
    font-family: 'NotoColorEmoji';
    src: url('{self.font_path}') format('truetype');
    font-display: swap;
}}

.chaos-emoji-text {{
    font-family: 'NotoColorEmoji', sans-serif !important;
}}
"""
        
        all_emojis = self.emoji_cats['random']
        
        for elem_idx, element_info in enumerate(text_elements):
            found_elements = soup.select(element_info['selector'])
            if element_info['index'] < len(found_elements):
                target_element = found_elements[element_info['index']]
                
                original_text = target_element.get_text(strip=True)
                modified_text = original_text
                
                num_injections = random.randint(1, 6)
                
                for _ in range(num_injections):
                    emoji_char = random.choice(all_emojis)
                    position = random.choice(['random', 'start', 'middle', 'end', 'between_words'])
                    modified_text = self.inject_emoji(modified_text, emoji_char, position)
                
                target_element.string = modified_text
                existing_classes = target_element.get('class', [])
                target_element['class'] = existing_classes + ['chaos-emoji-text']
        
        updated_css = css_content + "\n\n" + emoji_css
        
        return str(soup), updated_css

    def create_variants(self, html_content, css_content, num_variants=200):
        variants = []
        
        for variant_id in tqdm(range(num_variants), desc="Creating variants"):
            try:
                if variant_id % 3 == 0:
                    variant_html, variant_css = self.make_single_variant(html_content, css_content, variant_id)
                    variant_type = 'single_emoji'
                elif variant_id % 3 == 1:
                    variant_html, variant_css = self.make_multi_variant(html_content, css_content, variant_id)
                    variant_type = 'multi_emoji'
                else:
                    variant_html, variant_css = self.make_chaos_variant(html_content, css_content, variant_id)
                    variant_type = 'emoji_chaos'
                
                variants.append({
                    'id': variant_id,
                    'type': variant_type,
                    'html': variant_html,
                    'css': variant_css,
                    'status': 'success'
                })
                
            except Exception as e:
                variants.append({
                    'id': variant_id,
                    'status': 'failed',
                    'error': str(e)
                })
        
        return variants

    def save_files(self, variants, output_dir="emoji_variants"):
        os.makedirs(output_dir, exist_ok=True)
        
        for variant in tqdm(variants, desc="Saving files"):
            if variant['status'] == 'success':
                html_template = f"""<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8">
    <meta name="viewport" content="initial-scale=1, width=device-width">
    <title>Emoji Variant {variant['id']} ({variant['type']})</title>
    <style>
{variant['css']}
    </style>
</head>
<body>
{variant['html'][variant['html'].find('<body>') + 6:variant['html'].find('</body>')]}
</body>
</html>"""
                
                filename = os.path.join(output_dir, f"emoji_{variant['id']:03d}_{variant['type']}.html")
                with open(filename, 'w', encoding='utf-8') as f:
                    f.write(html_template)

def main():
    try:
        with open('/Users/promachowdhury/whatBreaksIt/m3-dataset/seeds/variants_1/index.html', 'r', encoding='utf-8') as f:
            html_content = f.read()
        
        with open('/Users/promachowdhury/whatBreaksIt/m3-dataset/seeds/variants_1/index.css', 'r', encoding='utf-8') as f:
            css_content = f.read()
    except FileNotFoundError as e:
        print(f"File not found: {e}")
        return
    
    generator = EmojiInjectionGenerator()
    
    variants = generator.create_variants(html_content, css_content, num_variants=200)
    generator.save_files(variants)
    
    successful = [v for v in variants if v['status'] == 'success']
    print(f"\nDone: {len(successful)} variants created")

if __name__ == "__main__":
    main()

Creating variants: 100%|██████████| 200/200 [00:09<00:00, 21.86it/s]
Saving files: 100%|██████████| 200/200 [00:00<00:00, 4990.90it/s]


Done: 200 variants created



