In [64]:
import numpy as np

class DistribucionEmpirica:
    def __init__(self, percentiles_definidos):
        """
        percentiles_definidos: diccionario con percentiles (0-100) como claves y valores en KB
        Ej: {10: 600, 25: 1385, 50: 2827, 75: 5252, 90: 9686}
        """
        self._construir_interpolador(percentiles_definidos)

    def _construir_interpolador(self, percentiles_definidos):
        # Convertir a listas ordenadas
        p_ordenados = sorted(percentiles_definidos.items())
        ps, vs = zip(*p_ordenados)

        # Asegurarse de tener extremos
        if 0 not in ps:
            ps = (0,) + ps
            vs = (vs[0] * 0.5,) + vs
        if 100 not in ps:
            ps = ps + (100,)
            vs = vs + (vs[-1] * 1.5,)

        # Guardar como percentiles 0-1
        self.percentiles = [p / 100 for p in ps]
        self.valores = list(vs)

    def muestra(self):
        u = np.random.rand()
        return np.interp(u, self.percentiles, self.valores)

    def muestras(self, n):
        u = np.random.rand(n)
        return np.interp(u, self.percentiles, self.valores)


In [None]:
total_kbytes = DistribucionEmpirica({
    10: 600,
    25: 1385,
    50: 2827,
    75: 5252,
    90: 9686
})

n_images = DistribucionEmpirica({
    10: 6,
    25: 21,
    50: 46,
    75: 102,
    90: 223
})

html_kbytes = DistribucionEmpirica({
    10: 8,
    25: 22,
    50: 64,
    75: 148,
    90: 292
})

In [149]:
total_size = total_kbytes.muestra()
num_images = int(n_images.muestra())
html_size = html_kbytes.muestra()

print(f"Total size: {total_size} KB")
print(f"Num images: {num_images}")
print(f"HTML size: {html_size} KB")

Total size: 4679.092756990574 KB
Num images: 233
HTML size: 198.42895745808886 KB


In [None]:
import os
import random
import struct
import numpy as np

def create_bmp(filename, target_size):
    height = 1
    if target_size < 54:
        raise ValueError(f"El tamaño deseado ({target_size}) es menor que el tamaño mínimo de un BMP (54 bytes)")

    # Calcular pixel_bytes ajustado al múltiplo de 3 más cercano hacia abajo
    pixel_bytes = (target_size - 54) // 3 * 3
    width = pixel_bytes // 3
    file_size = 54 + pixel_bytes
    
    with open(filename, 'wb') as f:
        f.write(b'BM')
        f.write(struct.pack('<I', file_size))
        f.write(b'\x00\x00\x00\x00')
        f.write(b'\x36\x00\x00\x00')
        f.write(b'\x28\x00\x00\x00')
        f.write(struct.pack('<I', width))
        f.write(struct.pack('<I', height))
        f.write(b'\x01\x00')
        f.write(b'\x18\x00')
        f.write(b'\x00\x00\x00\x00')
        f.write(struct.pack('<I', pixel_bytes))
        f.write(b'\x00\x00\x00\x00')
        f.write(b'\x00\x00\x00\x00')
        f.write(b'\x00\x00\x00\x00')
        f.write(b'\x00\x00\x00\x00')
        pixel_data = bytes(random.randint(0, 255) for _ in range(pixel_bytes))
        f.write(pixel_data)
    return file_size

def generate_image_sizes(n, total_size, min_size=54):
    if total_size < n * min_size:
        raise ValueError("Tamaño total insuficiente para generar imágenes con tamaño mínimo.")

    # Calcular el tamaño usable después de reservar el mínimo para cada imagen
    usable_size = total_size - n * min_size
    
    # Generar tamaños aleatorios iniciales usando una distribución normal
    mean_size = usable_size / n
    std_size = mean_size / 2  # Desviación estándar para variabilidad
    sizes = np.random.normal(mean_size, std_size, n)
    
    # Asegurar que los tamaños sean positivos
    sizes = np.clip(sizes, 0, None)
    
    # Convertir a enteros y ajustar para que sean compatibles con BMP (múltiplos de 3)
    sizes = np.floor(sizes / 3) * 3  # Redondear hacia abajo al múltiplo de 3 más cercano
    
    # Agregar el tamaño mínimo
    sizes = sizes + min_size
    
    # Ajustar los tamaños para que sumen exactamente total_size
    current_total = sizes.sum()
    if current_total != total_size:
        diff = total_size - current_total
        # Distribuir la diferencia en múltiplos de 3
        steps = int(diff // 3)  # Convertir a entero
        indices = np.random.choice(n, abs(steps), replace=True)
        for idx in indices:
            sizes[idx] += 3 if diff > 0 else -3
            if sizes[idx] < min_size:
                sizes[idx] = min_size  # Asegurar que no baje del mínimo
    
    # Verificar que todos los tamaños sean válidos
    sizes = np.clip(sizes, min_size, None).astype(int)
    
    return sizes

def create_html(image_files, html_size, folder, num_images):
    html_content = '<html>\n\t<head>\n\t\t<link rel=\'shortcut icon\' href=\'data:image/x-icon;,\' type=\'image/x-icon\'> \n'
    html_content += f'\t\t<title>htmlSize: {html_size} numImgs: {num_images}</title>\n'
    html_content += '\t\t<meta charset=\'utf-8\'/>\n\t</head>\n\t<body>'
    
    text_length = random.randint(10, 50)
    random_text = ''.join(random.choices('ABCDEFGHIJKLMNOPQRSTUVWXYZ', k=text_length))
    html_content += random_text
    
    for img in image_files:
        html_content += f'<img src="{img}" />\n'
        text_length = random.randint(20, 100)
        random_text = ''.join(random.choices('ABCDEFGHIJKLMNOPQRSTUVWXYZ', k=text_length))
        html_content += random_text
    
    current_size = len(html_content.encode('utf-8'))
    if current_size < html_size:
        extra_text = 'A' * (html_size - current_size)
        html_content += extra_text
    elif current_size > html_size:
        html_content = html_content[:html_size - len('</body></html>')] + '</body></html>'
    
    html_content += '</body></html>'
    
    with open(f'{folder}/index.html', 'w') as f:
        f.write(html_content)
    return len(html_content.encode('utf-8'))

def generate_webpage(total_size_bytes, num_images, html_size):
    min_html_size = 200 + num_images * 50
    if html_size < min_html_size:
        raise ValueError(f"El tamaño del HTML ({html_size}) es insuficiente para {num_images} imágenes.")
    if total_size_bytes < html_size + num_images * 54:
        raise ValueError("El tamaño total es insuficiente para generar las imágenes y el HTML.")
    
    images_total_size = total_size_bytes - html_size
    image_sizes = generate_image_sizes(num_images, images_total_size)
    
    base_folder = f"poblation/{total_size_bytes}/{num_images}"
    os.makedirs(base_folder, exist_ok=True)
    
    index = 0
    while os.path.exists(f"{base_folder}/{index:04d}"):
        index += 1
    folder_name = f"{base_folder}/{index:04d}"
    os.makedirs(folder_name, exist_ok=True)
    
    image_files = []
    for i, size in enumerate(image_sizes):
        pixels = (size - 54) // 3
        if pixels < 1:
            pixels = 1
        final_filename = f"{folder_name}/{i+1:04d}_{size:09d}.bmp"
        actual_size = create_bmp(final_filename, size)
        image_files.append(f"{i+1:04d}_{size:09d}.bmp")
    
    actual_html_size = create_html(image_files, html_size, folder_name, num_images)
            
if __name__ == "__main__":
    for i in range(10):
        total_size = int(total_kbytes.muestra() * 1024)
        print(f"Total size: {total_size} bytes")
        for j in range(30): 
            num_images = j*2 + 5
            html_size = int(html_kbytes.muestra() * 1024)
            
            print(f"Num images: {num_images}")
            print(f"HTML size: {html_size} bytes")
            
            generate_webpage(total_size, num_images, html_size)

Total size: 2414154 bytes
Num images: 5
HTML size: 57376 bytes
Num images: 7
HTML size: 6355 bytes
Num images: 9
HTML size: 40551 bytes
Num images: 11
HTML size: 29599 bytes
Num images: 13
HTML size: 39598 bytes
Num images: 15
HTML size: 48636 bytes
Num images: 17
HTML size: 245974 bytes
Num images: 19
HTML size: 52387 bytes
Num images: 21
HTML size: 255931 bytes
Num images: 23
HTML size: 25859 bytes
Num images: 25
HTML size: 124002 bytes
Num images: 27
HTML size: 13311 bytes
Num images: 29
HTML size: 17404 bytes
Num images: 31
HTML size: 70183 bytes
Num images: 33
HTML size: 37781 bytes
Num images: 35
HTML size: 101824 bytes
Num images: 37
HTML size: 5855 bytes
Num images: 39
HTML size: 48623 bytes
Num images: 41
HTML size: 168541 bytes
Num images: 43
HTML size: 427038 bytes
Num images: 45
HTML size: 202824 bytes
Num images: 47
HTML size: 64178 bytes
Num images: 49
HTML size: 165119 bytes
Num images: 51
HTML size: 282247 bytes
Num images: 53
HTML size: 145497 bytes
Num images: 55
HTML