In [21]:
# Load model directly
from transformers import AutoProcessor, AutoModelForImageTextToText, AutoTokenizer, pipeline, Trainer

processor = AutoProcessor.from_pretrained("bczhou/tiny-llava-v1-hf")
model = AutoModelForImageTextToText.from_pretrained("bczhou/tiny-llava-v1-hf")
tokenizer = AutoTokenizer.from_pretrained("bczhou/tiny-llava-v1-hf")

trainer = Trainer(
    model = model,
    tokenizer = tokenizer,
)

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  8.31it/s]
  trainer = Trainer(


In [22]:
from transformers import pipeline
from PIL import Image
model_id = "bczhou/tiny-llava-v1-hf"
pipe = pipeline("image-to-text", model=model_id)

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  8.34it/s]
Device set to use cpu


In [23]:
image = Image.open("./model/images/test.png")
prompt = "USER: <image>\nWhat is on this image\nASSISTANT:"
outputs = pipe(image, prompt=prompt, generate_kwargs={"max_new_tokens": 200})
print(outputs[0])


Passing `prompt` to the `image-to-text` pipeline is deprecated and will be removed in version 4.48 of 🤗 Transformers. Use the `image-text-to-text` pipeline instead
Expanding inputs for image tokens in LLaVa should be done in processing. Please add `patch_size` and `vision_feature_select_strategy` to the model's processing config or set directly with `processor.patch_size = {{patch_size}}` and processor.vision_feature_select_strategy = {{vision_feature_select_strategy}}`. Using processors without these attributes in the config is deprecated and will throw an error in v4.50.


{'generated_text': "USER:  \nWhat is on this image\nASSISTANT: The image is a screenshot of a website page displaying information about the game Borderlands 1. The page is in French and features a large image of a character, possibly a zombie, with a description of the game. The page also includes a list of the game's features, such as the number of players, the number of levels, and the number of weapons. The website is designed in a modern and visually appealing style, with a focus on the game's content and features."}


In [24]:
from datasets import load_dataset
from PIL import Image
from io import BytesIO
import requests
import os
import json
import uuid

In [25]:
os.listdir("dataset")
names = os.listdir("dataset/html")
print(names)


['samsungapps_com_desktop.html', 'instructure_com_mobile.html', 'hotmail_com_desktop.html', 'imgsmail_ru_mobile.html', 'google_it_desktop.html', 'me_com_desktop.html', 'si_com_mobile.html', 'cloudflare-dns_com_mobile.html', 'mediatek_com_desktop.html', 'a-mo_net_desktop.html', 'inmobi_com_mobile.html', 'ip-api_com_desktop.html', 'eset_com_mobile.html', 'rapid7_com_desktop.html', 'yelp_com_mobile.html', 'amazon_com_au_desktop.html', 'force_com_mobile.html', 'f5_com_mobile.html', 'seznam_cz_mobile.html', 'lenta_com_desktop.html', 'applovin_com_mobile.html', 'viber_com_mobile.html', 'vkuseraudio_net_mobile.html', 'foxnews_com_mobile.html', 'zemanta_com_desktop.html', 'vivoglobal_com_desktop.html', 'googlevideo_com_mobile.html', 'themeforest_net_desktop.html', 'bugsnag_com_desktop.html', 'ampproject_org_mobile.html', 'adsafeprotected_com_desktop.html', 'elpais_com_mobile.html', '1c_ru_desktop.html', 'onetag-sys_com_mobile.html', 'google_fr_mobile.html', 'digicert_com_desktop.html', 'amazon

In [26]:
import shutil
import os

def reorganize_dataset():
    """Rename files and copy them into a specific code folder.\n
        Tree should look like this:\n
        dataset
        ├── code
        │   ├── desktop
        │   │   ├── css
        │   │   └── html
        │   └── mobile
        │       ├── css
        │       └── html
        └── screens
            ├── desktop
            └── mobile
    """

    target_dirs = {
        "desktop_css": os.path.join("dataset", "code", "desktop", "css"),
        "desktop_html": os.path.join("dataset", "code", "desktop", "html"),
        "mobile_css": os.path.join("dataset", "code", "mobile", "css"),
        "mobile_html": os.path.join("dataset", "code", "mobile", "html"),
    }

    # Create the target directories if they don't exist
    for path in target_dirs.values():
        os.makedirs(path, exist_ok=True)

    # List of file names to be processed
    html_names = os.listdir("dataset/html")

    # Copy and rename files
    for name in html_names:
        if "_desktop.html" in name:
            new_name = name.replace("_desktop.html", ".html")
            shutil.copyfile(
                os.path.join("dataset", "html", name),
                os.path.join(target_dirs["desktop_html"], new_name),
            )
        elif "_mobile.html" in name:
            new_name = name.replace("_mobile.html", ".html")
            shutil.copyfile(
                os.path.join("dataset", "html", name),
                os.path.join(target_dirs["mobile_html"], new_name),
            )

    css_names = os.listdir("dataset/css")

    for name in css_names:
        if "_desktop.css" in name:
            new_name = name.replace("_desktop.css", ".css")
            shutil.copyfile(
                os.path.join("dataset", "css", name),
                os.path.join(target_dirs["desktop_css"], new_name),
            )
        elif "_mobile.css" in name:
            new_name = name.replace("_mobile.css", ".css")
            shutil.copyfile(
                os.path.join("dataset", "css", name),
                os.path.join(target_dirs["mobile_css"], new_name),
            )

    # Put mobile and desktop image folders inside the screens folder

    os.makedirs(os.path.join("dataset", "screens"), exist_ok=True)
    shutil.move(os.path.join("dataset", "desktop"), os.path.join("dataset", "screens"))
    shutil.move(os.path.join("dataset", "mobile"), os.path.join("dataset", "screens"))

### (Un)Comment this if you don't want to reorganize the dataset

In [27]:
# (Un)Comment this if you don't want to reorganize the dataset

# reorganize_dataset()

In [28]:
def format_code_answer(site_name):
    with open(f"dataset/code/mobile/html/{site_name}.html", "r") as f:
        html = f.read()
    with open(f"dataset/code/mobile/css/{site_name}.css", "r") as f:
        css = f.read()
    return f"HTML:\n{html}\n\nCSS:\n{css}"

In [30]:
def generate_dataset_json(output_folder, site_names):
    """Formats and saves the dataset as a JSON file.

    Args:
        output_folder (str): name of the folder to save the JSON file in
        site_names (list[str]): list of site names to generate JSON data for
    """
    # Initialize list to hold all JSON data
    json_data_list = []

    for name in site_names:
        if (name+'.html') not in os.listdir("dataset/code/mobile/html"):
            print(f'Missing {name}.html in dataset/code/mobile/html')
        if (name+'.css') not in os.listdir("dataset/code/mobile/css"):
            print(f'Missing {name}.css in dataset/code/mobile/css')
            continue

        formatted_answer = format_code_answer(name)

        # Structure for LLaVA JSON
        json_data = {
            "id": name,
            "image": f"{name}.jpg",
            "conversations": [
                {
                    "from": "human",
                    "value": "You're an expert in web development. From the given desktop screenshot, please generate the HTML and CSS code for the mobile version of the website."
                },
                {
                    "from": "gpt",
                    "value": formatted_answer
                }
            ]
        }

        # Append to list
        json_data_list.append(json_data)

    print(len(json_data_list))

    # Save the JSON data list to a file
    json_output_path = os.path.join(output_folder, 'dataset.json')
    with open(json_output_path, 'w') as json_file:
        json.dump(json_data_list, json_file, indent=4)


In [31]:
site_names = os.listdir("dataset/screens/desktop")
site_names = [n.replace('.png', '') for n in site_names]
print(site_names[:5])

generate_dataset_json("dataset", site_names)

['delfi_lt', 'entrust_net', 'businesswire_com', 'synology_com', 'doubleverify_com']
Missing edgecdn_ru.html in dataset/code/mobile/html
Missing edgecdn_ru.css in dataset/code/mobile/css
Missing gcdn_co.html in dataset/code/mobile/html
Missing gcdn_co.css in dataset/code/mobile/css
Missing 360safe_com.css in dataset/code/mobile/css
749


### S'inspirer de la suite /!\ pas fini ! /!\ 

In [None]:
def process_and_save(dataset, output_folder, subset_name):
    # Define image subfolder within output folder
    subset_folder = os.path.join(output_folder, subset_name)
    image_subfolder = os.path.join(output_folder, 'images')

    if not os.path.exists(image_subfolder):
        os.makedirs(image_subfolder)

    if not os.path.exists(subset_folder):
        os.makedirs(subset_folder)

    # Initialize list to hold all JSON data
    json_data_list = []

    # Process and save images and labels
    for item in dataset:
        # Load image if it's a URL or a file path
        if isinstance(item['image'], str):
            response = requests.get(item['image'])
            image = Image.open(BytesIO(response.content))
        else:
            image = item['image']  # Assuming it's a PIL.Image object

        # Create a unique ID for each image
        unique_id = str(uuid.uuid4())

        # Define image path
        image_path = os.path.join(image_subfolder, f"{unique_id}.jpg")

        # Save image
        image.save(image_path)

        # Remove duplicates and format answers
        answers = item['answers']
        unique_answers = list(set(answers))
        formatted_answers = ", ".join(unique_answers)

        # Structure for LLaVA JSON
        json_data = {
            "id": unique_id,
            "image": f"{unique_id}.jpg",
            "conversations": [
                {
                    "from": "human",
                    "value": item['question']
                },
                {
                    "from": "gpt",
                    "value": formatted_answers
                }
            ]
        }


        # Append to list
        json_data_list.append(json_data)

    # Save the JSON data list to a file
    json_output_path = os.path.join(output_folder, subset_name, 'dataset.json')
    with open(json_output_path, 'w') as json_file:
        json.dump(json_data_list, json_file, indent=4)

In [None]:
def save_dataset(dataset_name, output_folder, class_name, subset_name, val_samples=None):
    # Load the dataset from Hugging Face
    dataset = load_dataset(dataset_name, split=subset_name)


    # Filter for images with the specified class in 'question_type'
    filtered_dataset = [item for item in dataset if item['question_type'] == class_name]


    # Determine the split for training and validation
    if val_samples is not None and subset_name == 'train':
        train_dataset = filtered_dataset[val_samples:]
        val_dataset = filtered_dataset[:val_samples]
    else:
        train_dataset = filtered_dataset
        val_dataset = []


    # Process and save the datasets
    for subset, data in [('train', train_dataset), ('validation', val_dataset)]:
        if data:
            process_and_save(data, output_folder, subset)


In [None]:
# Usage example
output_folder = 'dataset'
class_name = 'other'
val_samples = 300
save_dataset('Multimodal-Fatima/OK-VQA_train', output_folder, class_name, 'train', val_samples)
save_dataset('Multimodal-Fatima/OK-VQA_test', output_folder, class_name, 'test')