In [1]:
import os
from PIL import Image
from transformers import pipeline, BitsAndBytesConfig

# -----------------------
# MODEL + QUANTIZATION
# -----------------------
model_id = "Qwen/Qwen2.5-VL-7B-Instruct"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True,
)

pipe = pipeline(
    task="text-generation",
    model=model_id,
    tokenizer=model_id,
    model_kwargs={"quantization_config": bnb_config},
    device_map="auto"
)

# -----------------------
# FOLDERS
# -----------------------
input_folder = "Images"
output_folder = "html_outputs_qwen2.5-7b"
os.makedirs(output_folder, exist_ok=True)

# -----------------------
# LOOP OVER IMAGES
# -----------------------
for fname in os.listdir(input_folder):

    if not fname.lower().endswith((".png", ".jpg", ".jpeg", ".webp")):
        continue

    img_path = os.path.join(input_folder, fname)
    img = Image.open(img_path).convert("RGB")

    prompt = """
Convert this wireframe into production-quality HTML + CSS.
Use semantic HTML. Output ONLY the HTML starting with <!DOCTYPE html>.
"""

    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image", "image": img},
                {"type": "text", "text": prompt},
            ],
        }
    ]

    # -----------------------
    # GENERATE
    # -----------------------
    output = pipe(messages, max_new_tokens=1800)[0]
    raw = output["generated_text"]

    # Fix: Qwen returns list of strings
    if isinstance(raw, list):
        result = "".join(raw)   # or raw[-1] if you only want the final part
    else:
        result = raw

    # -----------------------
    # SAVE
    # -----------------------
    base = os.path.splitext(fname)[0]
    save_path = os.path.join(output_folder, base + ".html")

    with open(save_path, "w", encoding="utf-8") as f:
        f.write(result)

    print("✔ Saved:", save_path)


  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 5/5 [00:10<00:00,  2.19s/it]
Device set to use cuda:0
The model 'Qwen2_5_VLForConditionalGeneration' is not supported for text-generation. Supported models are ['PeftModelForCausalLM', 'ApertusForCausalLM', 'ArceeForCausalLM', 'AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BitNetForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV2ForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaForCausalLM', 'DogeForCausalLM', 'Dots1ForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'Ernie4_5ForCausalLM', 'Ernie4_5_Moe

TypeError: sequence item 0: expected str instance, dict found

In [None]:
import os
import base64
import openai

# === SETUP ===
openai.api_key = ""  # Replace with your API key
image_folder = r"C:\Users\fa076154\Desktop\CAP6411\Images"           # Folder containing PNG or JPG wireframe images
output_folder = "./html_outputs_gpt4.1"        # Folder to save the HTML files

# === UTILITIES ===

def encode_image_to_base64(image_path):
    with open(image_path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")

def generate_html_from_image(base64_image):
    messages = [
        {
            "role": "system",
            "content": (
                "You are an expert web developer. Convert UI wireframe images into clean, "
                "Convert this wireframe into production-quality HTML + CSS."
                "Use semantic HTML. Output ONLY the HTML starting with <!DOCTYPE html>."
                "Only return valid HTML5 code. No separate CSS, include everything in one."
            )
        },
        {
            "role": "user",
            "content": [
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/png;base64,{base64_image}"
                    }
                },
                {
                    "type": "text",
                    "text": "Generate semantic HTML for this UI wireframe. Output only valid HTML code."
                }
            ]
        }
    ]

    response = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=messages,
        max_tokens=2000,
        temperature=0.2,
    )

    return response.choices[0].message.content

# === MAIN PROCESS ===

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

supported_extensions = (".png", ".jpg", ".jpeg", ".webp")

for filename in os.listdir(image_folder):
    if filename.lower().endswith(supported_extensions):
        image_path = os.path.join(image_folder, filename)
        print(f"Processing: {filename} ...")

        try:
            base64_img = encode_image_to_base64(image_path)
            html_output = generate_html_from_image(base64_img)

            html_filename = os.path.splitext(filename)[0] + ".html"
            html_path = os.path.join(output_folder, html_filename)

            with open(html_path, "w", encoding="utf-8") as f:
                f.write(html_output)

            print(f"✔ Saved: {html_path}")
        except Exception as e:
            print(f"❌ Error processing {filename}: {e}")


Processing: 1 (1).png ...
✔ Saved: ./html_outputs_gpt4.1\1 (1).html
Processing: 2 (1).png ...
✔ Saved: ./html_outputs_gpt4.1\2 (1).html
Processing: 3 (1).png ...
✔ Saved: ./html_outputs_gpt4.1\3 (1).html
Processing: 4.png ...
✔ Saved: ./html_outputs_gpt4.1\4.html
Processing: 5.png ...
✔ Saved: ./html_outputs_gpt4.1\5.html
Processing: 6.png ...
✔ Saved: ./html_outputs_gpt4.1\6.html
