In [None]:
import os
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from diffusers import StableDiffusionImg2ImgPipeline
from diffusers import UNet2DConditionModel, AutoencoderKL, DDPMScheduler
from transformers import CLIPTextModel, CLIPTokenizer
from huggingface_hub import login

# Step 1: Log in to Hugging Face
huggingface_token = "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"  # Replace with your Hugging Face token
login(huggingface_token)

# Step 2: Dataset Preparation
class PairedImageDataset(Dataset):
    def __init__(self, input_dir, target_dir, transform=None):
        self.input_dir = input_dir
        self.target_dir = target_dir
        self.transform = transform
        self.input_images = sorted(os.listdir(input_dir))
        self.target_images = sorted(os.listdir(target_dir))

    def __len__(self):
        return len(self.input_images)

    def __getitem__(self, idx):
        input_path = os.path.join(self.input_dir, self.input_images[idx])
        target_path = os.path.join(self.target_dir, self.target_images[idx])

        input_image = Image.open(input_path).convert("RGB")
        target_image = Image.open(target_path).convert("RGB")

        if self.transform:
            input_image = self.transform(input_image)
            target_image = self.transform(target_image)

        return input_image, target_image

input_dir = "/content/drive/MyDrive/Fine Tune DataSet/Image generate/input_image"  # Replace with the path to input images
target_dir = "/content/drive/MyDrive/Fine Tune DataSet/Image generate/target_image"  # Replace with the path to target images

transform = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

dataset = PairedImageDataset(input_dir, target_dir, transform=transform)
train_loader = DataLoader(dataset, batch_size=4, shuffle=True)

# Step 3: Load Pretrained Stable Diffusion Model
pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    torch_dtype=torch.float32
)
pipe = pipe.to("cuda" if torch.cuda.is_available() else "cpu")

# Step 4: Fine-Tune the Model
unet = pipe.unet
optimizer = torch.optim.AdamW(unet.parameters(), lr=1e-5)

def train_step(batch):
    input_images, target_images = batch
    input_images, target_images = input_images.to(pipe.device), target_images.to(pipe.device)

    # Generate noise
    noise = torch.randn_like(target_images)

    # Ensure timesteps is an integer tensor (LongTensor)
    timesteps = torch.randint(0, 1000, (input_images.size(0),), device=pipe.device).long()

    # Add noise to target images based on timesteps
    noisy_images = pipe.scheduler.add_noise(target_images, noise, timesteps)

    # Forward pass
    optimizer.zero_grad()
    model_output = unet(noisy_images, timesteps, encoder_hidden_states=pipe.text_encoder(input_images)).sample

    # Compute the loss
    loss = F.mse_loss(model_output, noise)

    # Backward pass
    loss.backward()
    optimizer.step()

    return loss.item()


# Step 5: Save Fine-Tuned Model
output_dir = "/path/to/output"
pipe.save_pretrained(output_dir)

# Step 6: Test the Model
def generate_image(input_image_path, prompt="A beautiful landscape"):
    # Load and preprocess the input image
    input_image = Image.open(input_image_path).convert("RGB")
    input_image = transform(input_image).unsqueeze(0).to(pipe.device)

    # Generate the output image
    with torch.no_grad():
        result = pipe(prompt=prompt, image=input_image).images[0]

    return result


test_image_path = "/content/drive/MyDrive/Fine Tune DataSet/Image generate/input_image/input1.jpg.JPG"
prompt = "A futuristic cityscape at sunset"  # Replace with your desired description
generated_image = generate_image(test_image_path, prompt=prompt)
generated_image.show()


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]



  0%|          | 0/40 [00:00<?, ?it/s]