In [2]:
from bs4 import BeautifulSoup
from tqdm import tqdm
import random
import os

class HTMLComponentOverlapGenerator:
    
    def __init__(self):
        self.offsets = [
            {'x': 5, 'y': 5}, {'x': 10, 'y': 0}, {'x': 0, 'y': 10},
            {'x': -5, 'y': 5}, {'x': 5, 'y': -5}, {'x': 15, 'y': 5},
            {'x': 0, 'y': 15}, {'x': 10, 'y': 10}, {'x': -10, 'y': 0},
            {'x': 20, 'y': 0}, {'x': 0, 'y': 20}, {'x': -15, 'y': -5}
        ]
        
        self.opacity_vals = [0.5, 0.6, 0.7, 0.8, 0.9]
        self.scale_vals = [0.8, 0.9, 1.0, 1.1, 1.2]
        
    def find_components(self, html_content):
        soup = BeautifulSoup(html_content, 'html.parser')
        components = []
        
        selectors = [
            '.filter-chip-01', '.filter-chip-02', '.card-01', '.button-1', '.button-2',
            '.nav-item-1', '.nav-item-2', '.icon', '.image-icon', '.selected-icon',
            '.top-app-bar', '.status-bar', '.navigation-bar', '.gesture-bar'
        ]
        
        for selector in selectors:
            elements = soup.select(selector)
            for idx, element in enumerate(elements):
                component_info = {
                    'element': element,
                    'selector': selector,
                    'index': idx,
                    'tag': element.name,
                    'classes': element.get('class', []),
                    'html_content': str(element),
                    'component_type': self.get_type(selector)
                }
                components.append(component_info)
        
        return components

    def get_type(self, selector):
        type_map = {
            '.filter-chip-01': 'chip',
            '.filter-chip-02': 'selected-chip', 
            '.card-01': 'card',
            '.button-1': 'button',
            '.button-2': 'icon-button',
            '.nav-item-1': 'nav-item',
            '.nav-item-2': 'nav-item',
            '.icon': 'icon',
            '.image-icon': 'image',
            '.selected-icon': 'small-icon',
            '.top-app-bar': 'header',
            '.status-bar': 'status',
            '.navigation-bar': 'navbar',
            '.gesture-bar': 'gesture'
        }
        return type_map.get(selector, 'component')

    def make_overlap_variant(self, html_content, css_content, variant_id):
        soup = BeautifulSoup(html_content, 'html.parser')
        components = self.find_components(html_content)
        
        if len(components) == 0:
            return html_content, css_content
        
        overlap_css = ""
        
        for comp_idx, component_info in enumerate(components):
            found_elements = soup.select(component_info['selector'])
            target_component = None
            
            if comp_idx < len(found_elements):
                target_component = found_elements[component_info['index']]
            
            if not target_component:
                continue
            
            container_id = f"comp-container-{variant_id}-{comp_idx}"
            original_id = f"comp-original-{variant_id}-{comp_idx}"
            
            container = soup.new_tag('div', id=container_id)
            container['style'] = "position: relative; display: inline-block;"
            
            target_component['id'] = original_id
            target_component.wrap(container)
            
            num_overlaps = random.randint(1, 4)
            
            for overlap_idx in range(num_overlaps):
                overlap_id = f"comp-overlap-{variant_id}-{comp_idx}-{overlap_idx}"
                offset = random.choice(self.offsets)
                opacity = random.choice(self.opacity_vals)
                scale = random.choice(self.scale_vals)
                
                overlap_component = soup.new_tag('div', id=overlap_id)
                overlap_component.append(BeautifulSoup(component_info['html_content'], 'html.parser'))
                
                target_component.insert_after(overlap_component)
                
                overlap_css += f"""
#{overlap_id} {{
    position: absolute !important;
    top: {offset['y'] + overlap_idx * 2}px !important;
    left: {offset['x'] + overlap_idx * 2}px !important;
    z-index: {300 + overlap_idx} !important;
    opacity: {opacity} !important;
    transform: scale({scale}) !important;
    pointer-events: none !important;
    filter: blur({overlap_idx * 0.5}px) !important;
}}
"""
        
        overlap_css += f"""
[id^="comp-container-{variant_id}-"] {{
    position: relative !important;
    display: inline-block !important;
}}

[id^="comp-original-{variant_id}-"] {{
    position: relative !important;
    z-index: 1 !important;
}}
"""
        
        updated_css = css_content + "\n\n" + overlap_css
        
        return str(soup), updated_css

    def make_extreme_variant(self, html_content, css_content, variant_id):
        soup = BeautifulSoup(html_content, 'html.parser')
        components = self.find_components(html_content)
        
        if len(components) == 0:
            return html_content, css_content
        
        overlap_css = ""
        
        for comp_idx, component_info in enumerate(components):
            found_elements = soup.select(component_info['selector'])
            target_component = None
            
            if component_info['index'] < len(found_elements):
                target_component = found_elements[component_info['index']]
            
            if not target_component:
                continue
            
            container_id = f"extreme-comp-container-{variant_id}-{comp_idx}"
            original_id = f"extreme-comp-original-{variant_id}-{comp_idx}"
            
            container = soup.new_tag('div', id=container_id)
            container['style'] = "position: relative; display: inline-block;"
            
            target_component['id'] = original_id
            target_component.wrap(container)
            
            num_overlaps = random.randint(3, 6)
            
            for overlap_idx in range(num_overlaps):
                overlap_id = f"extreme-comp-overlap-{variant_id}-{comp_idx}-{overlap_idx}"
                
                offset_x = random.randint(-20, 20)
                offset_y = random.randint(-20, 20)
                opacity = random.uniform(0.3, 0.8)
                scale = random.uniform(0.7, 1.3)
                rotation = random.randint(-10, 10)
                
                overlap_component = soup.new_tag('div', id=overlap_id)
                overlap_component.append(BeautifulSoup(component_info['html_content'], 'html.parser'))
                target_component.insert_after(overlap_component)
                
                overlap_css += f"""
#{overlap_id} {{
    position: absolute !important;
    top: {offset_y}px !important;
    left: {offset_x}px !important;
    z-index: {500 + overlap_idx} !important;
    opacity: {opacity} !important;
    transform: scale({scale}) rotate({rotation}deg) !important;
    pointer-events: none !important;
    filter: blur({overlap_idx * 0.8}px) sepia({overlap_idx * 20}%) !important;
}}
"""
        
        overlap_css += f"""
[id^="extreme-comp-container-{variant_id}-"] {{
    position: relative !important;
    display: inline-block !important;
}}

[id^="extreme-comp-original-{variant_id}-"] {{
    position: relative !important;
    z-index: 1 !important;
}}
"""
        
        updated_css = css_content + "\n\n" + overlap_css
        
        return str(soup), updated_css

    def make_mixed_variant(self, html_content, css_content, variant_id):
        soup = BeautifulSoup(html_content, 'html.parser')
        components = self.find_components(html_content)
        
        if len(components) == 0:
            return html_content, css_content
        
        overlap_css = ""
        
        num_mixed = min(5, len(components))
        selected_components = random.sample(components, num_mixed)
        
        for mix_idx, source_comp in enumerate(selected_components):
            target_comp = random.choice([c for c in components if c != source_comp])
            
            source_elements = soup.select(source_comp['selector'])
            target_elements = soup.select(target_comp['selector'])
            
            if (source_comp['index'] < len(source_elements) and 
                target_comp['index'] < len(target_elements)):
                
                source_element = source_elements[source_comp['index']]
                target_element = target_elements[target_comp['index']]
                
                mixed_id = f"mixed-overlap-{variant_id}-{mix_idx}"
                container_id = f"mixed-container-{variant_id}-{mix_idx}"
                
                if not target_element.find_parent(attrs={'id': lambda x: x and 'mixed-container' in x}):
                    container = soup.new_tag('div', id=container_id)
                    container['style'] = "position: relative; display: inline-block;"
                    target_element.wrap(container)
                
                mixed_component = soup.new_tag('div', id=mixed_id)
                mixed_component.append(BeautifulSoup(source_comp['html_content'], 'html.parser'))
                target_element.insert_after(mixed_component)
                
                offset = random.choice(self.offsets)
                opacity = random.choice(self.opacity_vals)
                
                overlap_css += f"""
#{mixed_id} {{
    position: absolute !important;
    top: {offset['y']}px !important;
    left: {offset['x']}px !important;
    z-index: 800 !important;
    opacity: {opacity} !important;
    pointer-events: none !important;
    transform: scale(0.8) !important;
}}
"""
        
        updated_css = css_content + "\n\n" + overlap_css
        
        return str(soup), updated_css

    def create_variants(self, html_content, css_content, num_variants=200):
        variants = []
        
        for variant_id in tqdm(range(num_variants), desc="Creating variants"):
            try:
                if variant_id % 5 == 0:
                    variant_html, variant_css = self.make_extreme_variant(html_content, css_content, variant_id)
                    overlap_type = 'extreme'
                elif variant_id % 5 == 1:
                    variant_html, variant_css = self.make_mixed_variant(html_content, css_content, variant_id)
                    overlap_type = 'mixed'
                else:
                    variant_html, variant_css = self.make_overlap_variant(html_content, css_content, variant_id)
                    overlap_type = 'normal'
                
                variants.append({
                    'id': variant_id,
                    'type': overlap_type,
                    'html': variant_html,
                    'css': variant_css,
                    'status': 'success'
                })
                
            except Exception as e:
                variants.append({
                    'id': variant_id,
                    'status': 'failed',
                    'error': str(e)
                })
        
        return variants

    def save_files(self, variants, output_dir="component_overlap_variants"):
        os.makedirs(output_dir, exist_ok=True)
        
        for variant in tqdm(variants, desc="Saving files"):
            if variant['status'] == 'success':
                html_template = f"""<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8">
    <meta name="viewport" content="initial-scale=1, width=device-width">
    <title>Component Overlap Variant {variant['id']} ({variant['type']})</title>
    <style>
{variant['css']}
    </style>
</head>
<body>
{variant['html'][variant['html'].find('<body>') + 6:variant['html'].find('</body>')]}
</body>
</html>"""
                
                filename = os.path.join(output_dir, f"comp_overlap_{variant['id']:03d}_{variant['type']}.html")
                with open(filename, 'w', encoding='utf-8') as f:
                    f.write(html_template)

def main():
    try:
        with open('/Users/promachowdhury/whatBreaksIt/m3-dataset/seeds/variants_1/index.html', 'r', encoding='utf-8') as f:
            html_content = f.read()
        
        with open('/Users/promachowdhury/whatBreaksIt/m3-dataset/seeds/variants_1/index.css', 'r', encoding='utf-8') as f:
            css_content = f.read()
    except FileNotFoundError as e:
        print(f"File not found: {e}")
        return
    
    generator = HTMLComponentOverlapGenerator()
    
    variants = generator.create_variants(html_content, css_content, num_variants=200)
    generator.save_files(variants)
    
    successful = [v for v in variants if v['status'] == 'success']
    print(f"\nDone: {len(successful)} variants created")

if __name__ == "__main__":
    main()

Creating variants: 100%|██████████| 200/200 [00:28<00:00,  6.95it/s]
Saving files: 100%|██████████| 200/200 [00:00<00:00, 3429.52it/s]


Done: 200 variants created



