-
Notifications
You must be signed in to change notification settings - Fork 1
/
model_invoke.py
134 lines (112 loc) · 4.73 KB
/
model_invoke.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import json
import requests
from PIL import Image, ImageFilter
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from diffusers import StableDiffusionInpaintPipeline, DPMSolverMultistepScheduler, AutoencoderKL
from transformers import SegformerImageProcessor, AutoModelForSemanticSegmentation
torch.backends.cuda.matmul.allow_tf32 = True
# SD inpainting
pipe = StableDiffusionInpaintPipeline.from_pretrained(
pretrained_model_name_or_path="runwayml/stable-diffusion-inpainting",
dtype=torch.float16,
revision="51388a731f57604945fddd703ecb5c50e8e7b49d"
)
vae = AutoencoderKL.from_pretrained(
"stabilityai/sd-vae-ft-mse",
use_safetensors=True,
torch_dtype=torch.float16,
revision="31f26fdeee1355a5c34592e401dd41e45d25a493"
)
pipe.vae = vae
# pipe.enable_xformers_memory_efficient_attention()
# pipe.vae.enable_tiling()
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True)
pipe.scheduler.config.algorithm_type = 'sde-dpmsolver++'
pipe.safety_checker = None
pipe.to("cuda")
# clothes segmentation
processor = SegformerImageProcessor.from_pretrained(
"mattmdjaga/segformer_b2_clothes",
revision="48531bf3850b68f1fa8b88ae73bfec09f696a82f"
)
model = AutoModelForSemanticSegmentation.from_pretrained(
"mattmdjaga/segformer_b2_clothes",
revision="48531bf3850b68f1fa8b88ae73bfec09f696a82f"
).to("cuda")
def scale_image(img, max_size=1500):
# Open the image
# Get the current size of the image
width, height = img.size
# Calculate the scaling factor for both height and width
scale_factor = 1 if width <= max_size and height <= max_size else min(max_size / width, max_size / height)
# Calculate the new dimensions for the scaled image (multiples of 8)
new_width = int(width * scale_factor)
new_height = int(height * scale_factor)
# Ensure the new dimensions are multiples of 8
new_width = (new_width // 8) * 8
new_height = (new_height // 8) * 8
# Resize the image
resized_img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
return resized_img
def face_segmentation(image):
inputs = processor(images=image, return_tensors="pt").to("cuda")
outputs = model(**inputs)
logits = outputs.logits.cpu()
upsampled_logits = nn.functional.interpolate(
logits,
size=image.size[::-1],
mode="bilinear",
align_corners=False,
)
'''
Labels: 0: "Background", 1: "Hat", 2: "Hair", 3: "Sunglasses", 4: "Upper-clothes",
5: "Skirt", 6: "Pants", 7: "Dress", 8: "Belt", 9: "Left-shoe", 10: "Right-shoe",
11: "Face", 12: "Left-leg", 13: "Right-leg", 14: "Left-arm", 15: "Right-arm",
16: "Bag", 17: "Scarf"
'''
pred_seg = upsampled_logits.argmax(dim=1)[0]
mask = (pred_seg == 11)
transform = transforms.ToPILImage()
mask = transform(mask * 1.0)
# add mask blur
mask = mask.filter(ImageFilter.GaussianBlur(radius = 15))
pixel_map = mask.load()
w,h = mask.size
for i in range(w):
for j in range(h):
pixel_map[i,j] = 0 if pixel_map[i,j] < 110 else 255
return mask
def invoke(input_text):
input_json = json.loads(input_text)
image_url = input_json['image_url']
image = Image.open(requests.get(image_url, stream=True).raw)
image = scale_image(image)
mask_image = face_segmentation(image)
prompt = input_json['prompt']
prompt = f'{prompt}, (portrait), (best quality:1.4), (ultra highres:1.2), (photorealistic:1.4), (soft focus:1.4)'
if 'negative_prompt' in input_json:
negative_prompt = input_json['negative_prompt']
else:
negative_prompt = '''poorly rendered face, poorly rendered eyes, deformed face, deformed eyelid,
collapsed eyeshadow, multiple eyebrows,inaccurate eyes, showing teeth, long neck,
(cross eye), bad proportions, gross proportions,(ugly face),mask,out of frame, closed eyes
'''
strength = float(input_json['strength']) if 'strength' in input_json else 1.0
num_inference_steps = int(input_json['steps']) if 'steps' in input_json else 30
guidance_scale = float(input_json['guidance_scale']) if 'guidance_scale' in input_json else 6.5
width, height = image.size
result = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
image=image,
mask_image=mask_image,
height=height,
width=width,
strength=strength,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
).images[0]
result.save("generated_image.png")
return "generated_image.png"