In [None]:
# pip install accelerate
# pip install --upgrade torch
# !pip install insightface
# !pip install onnxruntime
# pip install --upgrade diffusers
# !jupyter nbextension enable --py widgetsnbextension
# !pip install diffusers[torch] --upgrade

In [None]:
from PIL import Image
import warnings
warnings.filterwarnings('ignore')

In [None]:
from diffusers import AutoPipelineForInpainting, AutoencoderKL
from diffusers.utils import load_image
import torch

In [None]:
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)

In [None]:
pipeline = AutoPipelineForInpainting.from_pretrained("diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
                                                     vae=vae,
                                                     torch_dtype=torch.float16,
                                                     variant="fp16",
                                                     use_safetensors=True
                                                    ).to("cuda")


In [None]:
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin", low_cpu_mem_usage=True)

In [None]:
shirtImage = Image.open('/content/Cloth Image.jpg')
modelImage = Image.open('/content/Model Image.png')

shirtImage = shirtImage.resize((1024,1024))
modelImage = modelImage.resize((1024,1024))

shirtImage_rgb = shirtImage.convert("RGB")
modelImage_rgb = modelImage.convert("RGB")

In [None]:
# final_image = pipeline(
#     prompt="photorealistic, perfect body, beautiful skin, realistic skin, natural skin,man with red shirt",
#     negative_prompt="ugly, bad quality, bad anatomy, deformed body, deformed hands, deformed feet, deformed face, deformed clothing, deformed skin, bad skin, leggings, tights, stockings",
#     image=modelImage_rgb,
#     mask_image=mask_image,
#     ip_adapter_image=shirtImage_rgb,
#     strength=0.99,
#     guidance_scale=7.5,
#     num_inference_steps=100,
# ).images[0]


In [None]:
from transformers import pipeline
import numpy as np
import cv2
import insightface
from insightface.app import FaceAnalysis
from PIL import Image, ImageDraw


# Initialize face detection
app = FaceAnalysis(providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
app.prepare(ctx_id=0, det_size=(640, 640))

# Initialize segmentation pipeline
segmenter = pipeline(model="mattmdjaga/segformer_b2_clothes")


def remove_face(img, mask):
    # Convert image to numpy array
    img_arr = np.asarray(img)

    # Run face detection
    faces = app.get(img_arr)

    # Get the first face
    faces = faces[0]['bbox']

    # Width and height of face
    w = faces[2] - faces[0]
    h = faces[3] - faces[1]

    # Make face locations bigger
    faces[0] = faces[0] - (w * 0.4)  # x left
    faces[2] = faces[2] + (w * 0.4)  # x right
    faces[1] = faces[1] - (h * 0.5)  # y top
    faces[3] = faces[3] + (h * 0.2)  # y bottom

    # Calculate center and radius for the circular mask
    center_x = (faces[0] + faces[2]) / 2
    center_y = (faces[1] + faces[3]) / 2
    radius = max((faces[2] - faces[0]) / 2, (faces[3] - faces[1]) / 2)

    # Convert 1 cm to pixels (assuming 96 DPI)
    cm_to_pixels = 30.8
    decrease_radius_pixels = cm_to_pixels

    # Adjust the radius
    radius -= decrease_radius_pixels

    # Draw circular mask onto the mask image
    img1 = ImageDraw.Draw(mask)
    img1.ellipse([(center_x - radius, center_y - radius),
                  (center_x + radius, center_y + radius)],
                 fill=0)

    return mask

def segment_torso(original_img):
    # Make a copy
    img = original_img.copy()

    # Segment image
    segments = segmenter(img)

    # Create list of masks
    segment_include = ["Upper-clothes", "Dress", "Belt", "Face",'T-shirt']
    mask_list = []
    for s in segments:
        if(s['label'] in segment_include):
            mask_list.append(s['mask'])


    # Paste all masks on top of eachother
    final_mask = np.array(mask_list[0])
    for mask in mask_list:
        current_mask = np.array(mask)
        final_mask = final_mask + current_mask

    # Convert final mask from np array to PIL image
    final_mask = Image.fromarray(final_mask)

    # Remove face
    final_mask = remove_face(img.convert('RGB'), final_mask)

    # Apply mask to original image
    img.putalpha(final_mask)

    return img, final_mask

In [None]:
seg_image, mask_image = segment_torso(modelImage_rgb)

In [None]:
seg_image

In [None]:
mask_image

In [None]:
pipeline.set_ip_adapter_scale(1.0)

In [None]:
# import os
# os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

In [None]:
final_image = pipeline(
    prompt="photorealistic, perfect body, beautiful skin, realistic skin, natural skin",
    negative_prompt="ugly, bad quality, bad anatomy, deformed body, deformed hands, deformed feet, deformed face, deformed clothing, deformed skin, bad skin, leggings, tights, stockings",
    image=modelImage_rgb,
    mask_image=mask_image,
    ip_adapter_image=shirtImage_rgb,
    strength=1,
    num_images_per_prompt=10,
    guidance_scale=7.5,
    num_inference_steps=100,
).images[0]


In [None]:
final_image

In [None]:
final_image = pipeline(
    prompt="photorealistic, perfect body, beautiful skin, realistic skin, natural skin, shirt as shown in the input image",
    negative_prompt="ugly, bad quality, bad anatomy, deformed body, deformed hands, deformed feet, deformed face, deformed clothing, deformed skin, bad skin, leggings, tights, stockings, shirt distortion",
    image=modelImage_rgb,
    mask_image=mask_image,
    ip_adapter_image=shirtImage_rgb,
    strength=1,  # Adjust if needed
    num_images_per_prompt=10,
    guidance_scale=7.5,
    num_inference_steps=100,
).images[0]