In [None]:
import torch
from diffusers import StableDiffusionPipeline
from PIL import Image

# Ensure that 'expansion_description' is defined
expansion_description = "adding a second floor and a garden"

# Read the image properly
image_path = "data/testimage.png"
original_image = Image.open(image_path).convert('RGB')
    
# Load the Stable Diffusion model
model_id = "CompVis/stable-diffusion-v1-4"
device = "cuda" if torch.cuda.is_available() else "cpu"

# It's good to handle model loading with try-except to catch potential errors
try:
    pipeline = StableDiffusionPipeline.from_pretrained(model_id, use_auth_token='your_hugging_face_token').to(device)
except Exception as e:
    print(f"Error loading model: {e}")
    raise

# Create the text prompt for the desired change
prompt = f"An expanded version of a house, {expansion_description}"
    
# Generate the modified image using Stable Diffusion
try:
    with torch.no_grad():
        generated_image = pipeline(prompt=prompt, init_image=original_image, strength=0.8).images[0]
except Exception as e:
    print(f"Error generating image: {e}")
    raise

# Save the generated image
output_path = "modified_house.png"
generated_image.save(output_path)

print(f"Generated image saved to {output_path}")


In [None]:
from diffusers import StableDiffusionPipeline

pipe = StableDiffusionPipeline.from_pretrained('CompVis/stable-diffusion-v1-4').to('cuda')

# Initialize a prompt
prompt = "a dog wearing hat"
# Pass the prompt in the pipeline
pipe(prompt).images[0]

In [None]:
from PIL import Image
import torch
from diffusers import StableDiffusionPipeline

# Load the pre-trained stable diffusion model
model = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_auth_token=True)

# Load and preprocess the house image
def load_preprocess_image(image_path):
    image = Image.open(image_path)
    # Perform necessary preprocessing like resizing or normalization
    return image

# Modify the house image based on the query
def modify_house_image(image, query):
    # Here you would need to process the query and translate it into a prompt or modifications
    # that the model can understand. This is a non-trivial task and would likely involve
    # natural language processing and a detailed understanding of the model's capabilities.

    prompt = f"A house expanded based on: {query}"  # This is a simplistic placeholder
    modified_image = model(image=image, prompt=prompt).images[0]
    return modified_image

# Main function to handle the workflow
def main(image_path, query):
    original_image = load_preprocess_image(image_path)
    modified_image = modify_house_image(original_image, query)
    modified_image.show()  # Display the modified image

# Example usage
main("house.jpg", "adding a second floor and a swimming pool")


In [4]:
import torch
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
from dalle_pytorch import DALLE

def load_models():
    # Load the CLIP model for text-image embeddings
    clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
    clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

    # Load the DALL-E model for image generation
    dalle_model = DALLE(vae_path='path_to_vae', dalle_path='path_to_dalle')

    return clip_model, clip_processor, dalle_model

def preprocess_image(image_path):
    # Load and preprocess the image
    image = Image.open(image_path)
    return image

def generate_image_embedding(image, clip_model, clip_processor):
    # Preprocess the image and generate an embedding
    inputs = clip_processor(images=image, return_tensors="pt")
    image_embedding = clip_model.get_image_features(**inputs)
    return image_embedding

def generate_modified_image(image_embedding, query, dalle_model, clip_model, clip_processor):
    # Use CLIP to process the query
    text_inputs = clip_processor(text=[query], return_tensors="pt", padding=True, truncation=True).input_ids
    text_features = clip_model.get_text_features(**text_inputs)

    # Combine image and text features to generate a prompt for DALL-E
    combined_features = torch.cat((image_embedding, text_features), dim=1)

    # Generate images using DALL-E based on the combined features
    generated_images = dalle_model.generate_images(combined_features)
    
    return generated_images

def main():
    # Load models
    clip_model, clip_processor, dalle_model = load_models()

    # Load and preprocess the image
    image_path = 'data/testimage.png'
    image = preprocess_image(image_path)

    # Generate image embedding
    image_embedding = generate_image_embedding(image, clip_model, clip_processor)

    # User's query for modifying the house
    query = "expand the house with a modern extension"

    # Generate the modified image based on the user's query
    modified_image = generate_modified_image(image_embedding, query, dalle_model, clip_model, clip_processor)

    # Save or display the modified image
    modified_image.save("modified_house.jpg")

if __name__ == "__main__":
    main()


ModuleNotFoundError: No module named 'pytorch_lightning.utilities.distributed'

In [17]:
import openai
from IPython.display import display
import clip
from PIL import Image as PILImage
import torch
import os
from io import BytesIO
from IPython.display import Image as DisplayImage


# Set the OPENAI_API_KEY environment variable
os.environ["OPENAI_API_KEY"] = "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"

# Initialize the OpenAI client with the API key
client = openai.OpenAI(api_key=os.environ["OPENAI_API_KEY"])

# Load the model
def load_model():
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model, preprocess = clip.load("ViT-B/32", device=device)
    return model, preprocess, device

# Generate and display an image based on the input and description
def generate_image(input_image_path, description):
    model, preprocess, device = load_model()

    # Load and preprocess the image
    image = preprocess(PILImage.open(input_image_path)).unsqueeze(0).to(device)

    # Tokenize and encode text and image (if necessary for further processing)
    with torch.no_grad():
        image_features = model.encode_image(image)

    # Call OpenAI API to generate the image
    response = client.images.generate(
        prompt=description,
        n=1,
        size="512x512"
    )

    # Handle the image data from the response
#     print(response)
    
    if response.data:
        image_url = response.data[0].url
        display(DisplayImage(url=image_url))
    else:
        print("No image data found in response")
        
#     image_data = response.images[0]  # Assuming the response contains a list of images

#     # Convert the image data to a displayable format and display it
#     if image_data:
#         generated_image = PILImage.open(BytesIO(image_data))
#         display(generated_image)
#     else:
#         print("No image data found in response")

# Example usage
input_image_path = "data/testimage.png"
description = "Imagine an expanded version of this house with a larger second floor."
generate_image(input_image_path, description)


ImagesResponse(created=1712495274, data=[Image(b64_json=None, revised_prompt=None, url='https://oaidalleapiprodscus.blob.core.windows.net/private/org-jxjfLJNEtiuVMehofT38qYHY/user-uDBM4fcTeieNQikLtMqdsQeC/img-xJIKtQee2z7S1Vr3Jm514cSZ.png?st=2024-04-07T12%3A07%3A54Z&se=2024-04-07T14%3A07%3A54Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2024-04-06T23%3A49%3A21Z&ske=2024-04-07T23%3A49%3A21Z&sks=b&skv=2021-08-06&sig=HEhM1EJRkpvoUyeRYdZzcx6xrULSqzSm/zsuVd10baQ%3D')])
