In [2]:
from bs4 import BeautifulSoup
from tqdm import tqdm
import random
import os

class FontVariantGenerator:
    
    def __init__(self):
        self.base_path = '/Users/promachowdhury/emotive_fonts/'
        self.font_folders = []
        self.font_names = []
        self.text_selectors = ['.time', '.headline', '.label-text', '.title', '.date', '.label-text11', '.label-text12']
        self.discover_fonts()

    def discover_fonts(self):
        try:
            if os.path.exists(self.base_path):
                for item in os.listdir(self.base_path):
                    item_path = os.path.join(self.base_path, item)
                    if os.path.isdir(item_path):
                        font_files = self.get_font_files(item_path + '/')
                        if font_files:
                            self.font_folders.append(item_path + '/')
                            self.font_names.append(item)
            else:
                self.font_folders = []
                self.font_names = []
                
        except Exception as e:
            self.font_folders = []
            self.font_names = []

    def find_text_elements(self, html_content):
        soup = BeautifulSoup(html_content, 'html.parser')
        text_elements = []
        
        for selector in self.text_selectors:
            elements = soup.select(selector)
            for idx, element in enumerate(elements):
                text_content = element.get_text(strip=True)
                if text_content and len(text_content) > 0:
                    text_elements.append({
                        'element': element,
                        'text': text_content,
                        'selector': selector,
                        'index': idx
                    })
        
        return text_elements

    def get_font_files(self, font_folder):
        font_files = []
        extensions = ['.woff2', '.woff', '.ttf', '.otf']
        
        try:
            if os.path.exists(font_folder):
                for file in os.listdir(font_folder):
                    if any(file.lower().endswith(ext) for ext in extensions):
                        font_files.append(file)
        except Exception as e:
            pass
        
        return font_files

    def make_font_css(self, font_folder, font_name):
        font_files = self.get_font_files(font_folder)
        
        if not font_files:
            return ""
        
        font_css = ""
        
        for i, font_file in enumerate(font_files):
            family_name = f"{font_name}-family-{i+1}"
            font_path = f"{font_folder}{font_file}"
            
            if font_file.endswith('.woff2'):
                font_format = 'woff2'
            elif font_file.endswith('.woff'):
                font_format = 'woff'
            elif font_file.endswith('.ttf'):
                font_format = 'truetype'
            elif font_file.endswith('.otf'):
                font_format = 'opentype'
            else:
                font_format = 'truetype'
            
            font_css += f"""
@font-face {{
    font-family: '{family_name}';
    src: url('{font_path}') format('{font_format}');
    font-display: swap;
}}
"""
        
        return font_css

    def make_single_font_variant(self, html_content, css_content, font_index, variant_id):
        soup = BeautifulSoup(html_content, 'html.parser')
        text_elements = self.find_text_elements(html_content)
        
        if len(text_elements) == 0:
            return html_content, css_content
        
        font_folder = self.font_folders[font_index]
        font_name = self.font_names[font_index]
        
        font_face_css = self.make_font_css(font_folder, font_name)
        
        if not font_face_css:
            return html_content, css_content
        
        font_css = font_face_css
        
        font_files = self.get_font_files(font_folder)
        if font_files:
            font_family_list = ', '.join([f"'{font_name}-family-{i+1}'" for i in range(len(font_files))])
            
            for elem_idx, element_info in enumerate(text_elements):
                found_elements = soup.select(element_info['selector'])
                if element_info['index'] < len(found_elements):
                    target_element = found_elements[element_info['index']]
                    element_id = f"font-text-{variant_id}-{elem_idx}"
                    target_element['id'] = element_id
                    
                    font_css += f"""
#{element_id} {{
    font-family: {font_family_list}, sans-serif !important;
    font-size: inherit !important;
    font-weight: inherit !important;
}}
"""
        
        updated_css = css_content + "\n\n" + font_css
        
        return str(soup), updated_css

    def make_mixed_variant(self, html_content, css_content, variant_id):
        soup = BeautifulSoup(html_content, 'html.parser')
        text_elements = self.find_text_elements(html_content)
        
        if len(text_elements) == 0:
            return html_content, css_content
        
        all_font_css = ""
        
        for i, font_folder in enumerate(self.font_folders):
            font_name = self.font_names[i]
            all_font_css += self.make_font_css(font_folder, font_name)
        
        for elem_idx, element_info in enumerate(text_elements):
            font_index = random.randint(0, len(self.font_folders) - 1)
            font_folder = self.font_folders[font_index]
            font_name = self.font_names[font_index]
            
            font_files = self.get_font_files(font_folder)
            if not font_files:
                continue
            
            found_elements = soup.select(element_info['selector'])
            if element_info['index'] < len(found_elements):
                target_element = found_elements[element_info['index']]
                element_id = f"mixed-font-text-{variant_id}-{elem_idx}"
                target_element['id'] = element_id
                
                font_family_list = ', '.join([f"'{font_name}-family-{i+1}'" for i in range(len(font_files))])
                
                all_font_css += f"""
#{element_id} {{
    font-family: {font_family_list}, sans-serif !important;
    font-size: inherit !important;
    font-weight: inherit !important;
}}
"""
        
        updated_css = css_content + "\n\n" + all_font_css
        
        return str(soup), updated_css

    def make_random_variant(self, html_content, css_content, variant_id):
        soup = BeautifulSoup(html_content, 'html.parser')
        text_elements = self.find_text_elements(html_content)
        
        if len(text_elements) == 0:
            return html_content, css_content
        
        all_font_css = ""
        
        for i, font_folder in enumerate(self.font_folders):
            font_name = self.font_names[i]
            all_font_css += self.make_font_css(font_folder, font_name)
        
        for elem_idx, element_info in enumerate(text_elements):
            num_fonts = random.randint(1, 3)
            selected_font_indices = random.sample(range(len(self.font_folders)), num_fonts)
            
            found_elements = soup.select(element_info['selector'])
            if element_info['index'] < len(found_elements):
                target_element = found_elements[element_info['index']]
                element_id = f"random-font-text-{variant_id}-{elem_idx}"
                target_element['id'] = element_id
                
                font_families = []
                for font_idx in selected_font_indices:
                    font_folder = self.font_folders[font_idx]
                    font_name = self.font_names[font_idx]
                    font_files = self.get_font_files(font_folder)
                    
                    if font_files:
                        for i in range(len(font_files)):
                            font_families.append(f"'{font_name}-family-{i+1}'")
                
                if font_families:
                    font_family_list = ', '.join(font_families)
                    
                    all_font_css += f"""
#{element_id} {{
    font-family: {font_family_list}, sans-serif !important;
    font-size: inherit !important;
    font-weight: inherit !important;
}}
"""
        
        updated_css = css_content + "\n\n" + all_font_css
        
        return str(soup), updated_css

    def create_variants(self, html_content, css_content, num_variants=200):
        if not self.font_folders:
            return []
        
        variants = []
        
        for variant_id in tqdm(range(num_variants), desc="Creating variants"):
            try:
                if variant_id % 7 == 0 and len(self.font_folders) > 0:
                    font_index = (variant_id // 7) % len(self.font_folders)
                    variant_html, variant_css = self.make_single_font_variant(html_content, css_content, font_index, variant_id)
                    variant_type = f'single_{self.font_names[font_index]}'
                elif variant_id % 7 == 1:
                    variant_html, variant_css = self.make_mixed_variant(html_content, css_content, variant_id)
                    variant_type = 'mixed'
                else:
                    variant_html, variant_css = self.make_random_variant(html_content, css_content, variant_id)
                    variant_type = 'random'
                
                variants.append({
                    'id': variant_id,
                    'type': variant_type,
                    'html': variant_html,
                    'css': variant_css,
                    'status': 'success'
                })
                
            except Exception as e:
                variants.append({
                    'id': variant_id,
                    'status': 'failed',
                    'error': str(e)
                })
        
        return variants

    def save_files(self, variants, output_dir="font_variants"):
        os.makedirs(output_dir, exist_ok=True)
        
        for variant in tqdm(variants, desc="Saving files"):
            if variant['status'] == 'success':
                html_template = f"""<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8">
    <meta name="viewport" content="initial-scale=1, width=device-width">
    <title>Font Variant {variant['id']} ({variant['type']})</title>
    <style>
{variant['css']}
    </style>
</head>
<body>
{variant['html'][variant['html'].find('<body>') + 6:variant['html'].find('</body>')]}
</body>
</html>"""
                
                filename = os.path.join(output_dir, f"font_{variant['id']:03d}_{variant['type']}.html")
                with open(filename, 'w', encoding='utf-8') as f:
                    f.write(html_template)

def main():
    try:
        with open('/Users/promachowdhury/whatBreaksIt/m3-dataset/seeds/variants_1/index.html', 'r', encoding='utf-8') as f:
            html_content = f.read()
        
        with open('/Users/promachowdhury/whatBreaksIt/m3-dataset/seeds/variants_1/index.css', 'r', encoding='utf-8') as f:
            css_content = f.read()
    except FileNotFoundError as e:
        print(f"File not found: {e}")
        return
    
    generator = FontVariantGenerator()
    
    variants = generator.create_variants(html_content, css_content, num_variants=200)
    generator.save_files(variants)
    
    successful = [v for v in variants if v['status'] == 'success']
    print(f"\nDone: {len(successful)} variants created")

if __name__ == "__main__":
    main()

Creating variants: 100%|██████████| 200/200 [00:09<00:00, 20.78it/s]
Saving files: 100%|██████████| 200/200 [00:00<00:00, 7447.47it/s]


Done: 200 variants created



