In [None]:
%%capture
! pip install datasets
! pip install diffusers["torch"] transformers
! pip install -U instructor

Collecting instructor
  Downloading instructor-1.5.2-py3-none-any.whl.metadata (15 kB)
Collecting jiter<0.6.0,>=0.5.0 (from instructor)
  Downloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting openai<2.0.0,>=1.45.0 (from instructor)
  Downloading openai-1.51.2-py3-none-any.whl.metadata (24 kB)
Collecting tenacity<9.0.0,>=8.4.1 (from instructor)
  Downloading tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)
Collecting httpx<1,>=0.23.0 (from openai<2.0.0,>=1.45.0->instructor)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai<2.0.0,>=1.45.0->instructor)
  Downloading httpcore-1.0.6-py3-none-any.whl.metadata (21 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai<2.0.0,>=1.45.0->instructor)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading instructor-1.5.2-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from getpass import getpass
openai_token = getpass("Enter your OpenAI API token: ")

Enter your OpenAI API token: ··········


In [None]:
import openai
import instructor
patched_openai_client = instructor.patch(openai.OpenAI(api_key=openai_token))

In [None]:
from pydantic import BaseModel, Field

In [None]:
class DescriptionPrediction(BaseModel):
    description: str = Field(..., description="Please provide the description based on the tags provided")
    chain_of_thought: str = Field(..., description="Think Step by Step and provide your reasoning for the description")

def get_relavant_description_from_tags(tags: list[str]):
    tags_str = "\n\n".join(tags)
    llm_response = patched_openai_client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": "Please provide the description based on the tags provided"
            },
            {
                "role": "user",
                "content": f"""Here are the tags delimited by ```{tags_str}```"""
            }
        ],
        model="gpt-4o",
        response_model=DescriptionPrediction,
        max_retries=2,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
        temperature=0.1
    )

    return llm_response.description


In [None]:
import os
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from diffusers import StableDiffusionPipeline
import torch

In [None]:
def setup_models():
    tokenizer = AutoTokenizer.from_pretrained("gpt2")
    llm = AutoModelForCausalLM.from_pretrained("gpt2")
    image_model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16)
    image_model.to("cuda" if torch.cuda.is_available() else "cpu")
    return tokenizer, llm, image_model

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
def retrieve_relevant_designs(query, dataset, top_n=5):
    descriptions = []
    try:
        for item in dataset['train']:
            if 'objects' in item and isinstance(item["objects"], dict) and "text" in item["objects"]:
                texts = [text for text in item["objects"]['text'] if text is not None]

                descriptions.append(" ".join(texts))
    except Exception as e:
        print(f"Error retrieving descriptions: {e}")

    else:
        print("Descriptions retrieved successfully.")

    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([query] + descriptions)

    query_vector = tfidf_matrix[0]
    description_vectors = tfidf_matrix[1:]

    similarities = cosine_similarity(query_vector, description_vectors).flatten()
    top_indices = similarities.argsort()[-top_n:][::-1]

    relevant_designs = [dataset['train'][int(i)] for i in top_indices]
    return relevant_designs



In [None]:
def generate_design_description(query, relevant_designs, tokenizer, llm):
    relevant_texts = []
    for design in relevant_designs:
        if 'objects' in design and isinstance(design["objects"], dict) and "text" in design["objects"]:
            texts = [text for text in design["objects"]['text'] if text is not None]
            try:
                relevant_context = get_relavant_description_from_tags(texts)
            except Exception as e:
                print(f"Error generating description: {e}")
                relevant_context = " ".join(texts)
            relevant_texts.append(relevant_context)

    prompt = f"Generate a mobile UI design based on: {query}. Relevant designs: {' '.join(relevant_texts)}"
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
    outputs = llm.generate(**inputs, max_new_tokens=100, num_return_sequences=1)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
def generate_design_image(design_description, image_model):
    image = image_model(design_description).images[0]
    return image

In [None]:
def generate_ui_design(query):
    dataset = load_dataset("mrtoy/mobile-ui-design")
    tokenizer, llm, image_model = setup_models()
    relevant_designs = retrieve_relevant_designs(query, dataset)
    design_description = generate_design_description(query, relevant_designs, tokenizer, llm)
    design_image = generate_design_image(design_description, image_model)
    return design_image, design_description

In [None]:
def main():
    while True:
        query = input("Enter your UI design query (or 'quit' to exit): ")
        if query.lower() == 'quit':
            break
        design_image, design_description = generate_ui_design(query)
        filename = f"generated_design_{hash(query)}.png"
        design_image.save(filename)
        print(f"Design generated and saved as {filename}")
        print(f"Design description: {design_description}")

In [None]:

if __name__ == "__main__":
    main()

Enter your UI design query (or 'quit' to exit): mobile calculator




Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Descriptions retrieved successfully.


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Token indices sequence length is longer than the specified maximum sequence length for this model (464 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: ["process or requirement for verifying a mobile device or phone number. this typically involves a step where a user is asked to confirm their mobile number, often by entering a code sent via sms or a similar method. the numbers'8'and'6'could represent steps in a sequence, a code, or placeholders for further information. the tags provided seem to be related to options for signing up or logging into a service or application. they suggest that users can either use their mobile number or email address to continue, or they can choose to log in using their google or facebook accounts. additionally, there is a mention of 't ownhouse,' which co

  0%|          | 0/50 [00:00<?, ?it/s]

Design generated and saved as generated_design_7330250882345084111.png
Design description: Generate a mobile UI design based on: mobile calculator. Relevant designs: This application is designed to help users manage their finances efficiently. It includes features such as finding nearby ATMs, calculating interest on savings or loans, setting and tracking budget goals, inviting friends to join the app for shared financial planning, and customizing user settings for a personalized experience. The tags suggest a process or requirement for verifying a mobile device or phone number. This typically involves a step where a user is asked to confirm their mobile number, often by entering a code sent via SMS or a similar method. The numbers '8' and '6' could represent steps in a sequence, a code, or placeholders for further information. The tags provided seem to be related to options for signing up or logging into a service or application. They suggest that users can either use their mobile numb

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

KeyboardInterrupt: 