diff --git a/MaraScott_Nodes.py b/MaraScott_Nodes.py index 9572bae..3c55e93 100644 --- a/MaraScott_Nodes.py +++ b/MaraScott_Nodes.py @@ -13,6 +13,7 @@ from .py.nodes.UpscalerRefiner.McBoaty_v2 import UpscalerRefiner_McBoaty_v2 from .py.nodes.UpscalerRefiner.McBoaty_v3 import UpscalerRefiner_McBoaty_v3 from .py.nodes.KSampler.InpaintingTileByMask_v1 import KSampler_setInpaintingTileByMask_v1, KSampler_pasteInpaintingTileByMask_v1 +from .py.nodes.Prompt.PromptFromImage_v1 import PromptFromImage_v1 from .py.vendor.ComfyUI_JNodes.blob.main.py.prompting_nodes import TokenCounter as TokenCounter_v1 WEB_DIRECTORY = "./web/assets/js" @@ -24,6 +25,7 @@ "MaraScottUpscalerRefinerNode_v3": UpscalerRefiner_McBoaty_v3, "MaraScottSetInpaintingByMask_v1": KSampler_setInpaintingTileByMask_v1, "MaraScottPasteInpaintingByMask_v1": KSampler_pasteInpaintingTileByMask_v1, + "MaraScottPromptFromImage_v1": PromptFromImage_v1, "MaraScott_Kijai_TokenCounter_v1": TokenCounter_v1, @@ -41,6 +43,7 @@ "MaraScottUpscalerRefinerNode_v3": "\ud83d\udc30 Large Refiner - McBoaty v3 /u", "MaraScottSetInpaintingByMask_v1": "\ud83d\udc30 Set Inpainting Tile by mask - McInpainty [1/2] v1 /m", "MaraScottPasteInpaintingByMask_v1": "\ud83d\udc30 Paste Inpainting Tile by mask - McInpainty [2/2] v1 /m", + "MaraScottPromptFromImage_v1": "\ud83d\udc30 Prompt From Image - McPrompty v1 /p", "MaraScott_Kijai_TokenCounter_v1": "\ud83d\udc30 TokenCounter (from kijai/ComfyUI-KJNodes) /v", @@ -49,4 +52,4 @@ "MaraScottUpscalerRefinerNode_v2": "\u274C Large Refiner - McBoaty v2 /u", } -print('\033[34m[Maras IT] \033[92mLoaded\033[0m') +print('\033[34m[MaraScott] \033[92mLoaded\033[0m') diff --git a/README.md b/README.md index 9f2b2b6..ae1bc11 100644 --- a/README.md +++ b/README.md @@ -60,9 +60,16 @@ This AnyBus is dyslexia friendly :D The Upscaler Refiner Node (AKA McBoaty Node) is an upscaler coupled with a refiner to achieve higher rendering results. The output image is a slightly modified image. +to use `Tile Prompting` we recommend to [setup your groq API key on your computer](https://console.groq.com/docs/quickstart) to improve tile prompting accurancy + **Not Supported**: - ControlNet : your conditioning needs to be ControlNet Free +# Prompt From Image Node AKA McPrompty Node + +The Prompt From Image Node (AKA McPrompty Node) is a prompt generator node using an image as input coupled with llm engine (Grok) to generate the text. +The output text can be used as prompt afterwards. + # Thanks ## Special thanks diff --git a/prestartup_script.py b/prestartup_script.py index be27680..0908735 100644 --- a/prestartup_script.py +++ b/prestartup_script.py @@ -7,4 +7,8 @@ # ### -print('\033[34m[Maras IT] \033[92mInitialization\033[0m') \ No newline at end of file +from custom_nodes.ComfyUI_MaraScott_Nodes.py.inc.lib.llm import MS_Llm + +print('\033[34m[MaraScott] \033[92mInitialization\033[0m') + +MS_Llm.prestartup_script() diff --git a/py/inc/lib/image.py b/py/inc/lib/image.py index 5a783d7..be31252 100644 --- a/py/inc/lib/image.py +++ b/py/inc/lib/image.py @@ -86,7 +86,7 @@ def get_dynamic_grid_specs(self, width, height, rows_qty = 3, cols_qty = 3, size tiles = [] for row_index, row in enumerate(tile_order_rows): for col_index, col in enumerate(tile_order_cols): - index = (row * len(tile_order_rows)) + col + order = (row_index * len(tile_order_rows)) + col_index _tile_width = (tile_width_units_qty + 2) * size_unit _tile_height = (tile_height_units_qty + 2) * size_unit @@ -112,7 +112,7 @@ def get_dynamic_grid_specs(self, width, height, rows_qty = 3, cols_qty = 3, size tiles.append([ row_index, col_index, - index, + order, x, # x y, # y _tile_width, # width @@ -136,7 +136,7 @@ def get_grid_images(self, image, grid_specs): return grids @classmethod - def rebuild_image_from_parts(self, iteration, output_images, image, grid_specs, feather_mask, upscale_scale): + def rebuild_image_from_parts(self, iteration, output_images, image, grid_specs, feather_mask, upscale_scale, grid_prompts): width_feather_seam = feather_mask height_feather_seam = feather_mask @@ -186,7 +186,8 @@ def rebuild_image_from_parts(self, iteration, output_images, image, grid_specs, for index, grid_spec in enumerate(grid_specs): log(f"Rebuilding tile {index + 1}/{total}", None, None, f"Refining {iteration}") row, col, order, x_start, y_start, width_inc, height_inc = grid_spec - tiles_order.append((order, output_images[index])) + prompt = grid_prompts[index] if 0 <= index < len(grid_prompts) else "" + tiles_order.append((order, output_images[index], prompt)) if col == 0: outputRow = nodes.ImagePadForOutpaint().expand_image(output_images[index], 0, 0, (image.shape[2]*upscale_scale) - tile_width, 0, 0)[0] elif col == last_tile_col_index: @@ -238,7 +239,7 @@ def get_dynamic_grid_specs(self, width, height, tile_rows = 3, tile_cols =3): tiles.append([ (col * len(tile_order)) + row, (row * tile_width) - (row * width_unit), # x - (col * tile_height) - (col * height_unit), # x + (col * tile_height) - (col * height_unit), # y tile_width, # width tile_height, # height ]) diff --git a/py/inc/lib/llm.py b/py/inc/lib/llm.py new file mode 100644 index 0000000..537e5ab --- /dev/null +++ b/py/inc/lib/llm.py @@ -0,0 +1,174 @@ +import os +import requests +import torch +import folder_paths +from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer +from transformers import AutoProcessor, AutoModelForVision2Seq +from transformers import BlipProcessor, BlipForConditionalGeneration +from groq import Groq +from .image import MS_Image_v2 as MS_Image + +from ...utils.log import log + +class MS_Llm_Microsoft(): + + @classmethod + def __init__(self, model_name = "microsoft/kosmos-2-patch14-224"): + self.name = model_name + self.model = AutoModelForVision2Seq.from_pretrained(self.name) + self.processor = AutoProcessor.from_pretrained(self.name) + + @classmethod + def generate_prompt(self, image): + + # prompt_prefix = "An image of" + prompt_prefix = "" + + _image = MS_Image.tensor2pil(image) + + inputs = self.processor(text=prompt_prefix, images=_image, return_tensors="pt") + + # Generate the caption + generated_ids = self.model.generate( + pixel_values=inputs["pixel_values"], + input_ids=inputs["input_ids"], + attention_mask=inputs["attention_mask"], + image_embeds=None, + image_embeds_position_mask=inputs["image_embeds_position_mask"], + use_cache=True, + max_new_tokens=128, + ) + generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=True)[0] + caption, _ = self.processor.post_process_generation(generated_text) + + return caption + + +class MS_Llm_Salesforce(): + + @classmethod + def __init__(self, model_name = "Salesforce/blip-image-captioning-large"): + self.name = model_name + self.model = BlipForConditionalGeneration.from_pretrained(self.name) + self.processor = BlipProcessor.from_pretrained(self.name) + + @classmethod + def generate_prompt(self, image): + + # prompt_prefix = "An image of" + prompt_prefix = "" + + _image = MS_Image.tensor2pil(image) + + inputs = self.processor(text=prompt_prefix, images=_image, return_tensors="pt") + + # Generate the caption + generated_ids = self.model.generate(**inputs) + caption = self.processor.decode(generated_ids[0], skip_special_tokens=True) + + return caption + +class MS_Llm_Nlpconnect(): + + @classmethod + def __init__(self, model_name = "nlpconnect/vit-gpt2-image-captioning"): + self.name = model_name + self.model = VisionEncoderDecoderModel.from_pretrained(self.name) + self.processor = ViTImageProcessor.from_pretrained(self.name) + self.tokenizer = AutoTokenizer.from_pretrained(self.name) + + @classmethod + def generate_prompt(self, image): + + _image = MS_Image.tensor2pil(image) + inputs = self.processor(images=_image, return_tensors="pt") + # Generate the caption + generated_ids = self.model.generate( + inputs.pixel_values, + max_length=16, + num_beams=4, + num_return_sequences=1 + ) + caption = self.tokenizer.decode(generated_ids[0], skip_special_tokens=True) + + return caption + +class MS_Llm(): + + LLM_MODELS = [ + # 'gemma-7b-it', + 'llama3-70b-8192', + # 'llama3-8b-8192', + # 'mixtral-8x7b-32768', + ] + + # list of model https://huggingface.co/models?pipeline_tag=image-to-text&sort=downloads + VISION_LLM_MODELS = [ + # 'nlpconnect/vit-gpt2-image-captioning', + 'microsoft/kosmos-2-patch14-224', + # 'Salesforce/blip-image-captioning-large', + ] + + @staticmethod + def prestartup_script(): + folder_paths.add_model_folder_path("nlpconnect", os.path.join(folder_paths.models_dir, "nlpconnect")) + + @classmethod + def __init__(self, vision_llm_name = "nlpconnect/vit-gpt2-image-captioning", llm_name = "llama3-8b-8192"): + + if vision_llm_name == 'microsoft/kosmos-2-patch14-224': + self.vision_llm = MS_Llm_Microsoft() + elif vision_llm_name == 'Salesforce/blip-image-captioning-large': + self.vision_llm = MS_Llm_Salesforce() + else: + self.vision_llm = MS_Llm_Nlpconnect() + + self._groq_key = os.getenv("GROQ_API_KEY", "") + self.llm = llm_name + + @classmethod + def generate_tile_prompt(self, image, prompt_context, seed=None): + prompt_tile = self.vision_llm.generate_prompt(image) + if self.vision_llm.name == 'microsoft/kosmos-2-patch14-224': + _prompt = self.get_grok_prompt(prompt_context, prompt_tile) + else: + _prompt = self.get_grok_prompt(prompt_context, prompt_tile) + if self._groq_key != "": + prompt = self.call_grok_api(_prompt, seed) + else: + prompt = _prompt + log(prompt, None, None, self.vision_llm.name) + return prompt + + + @classmethod + def get_grok_prompt(self, prompt_context, prompt_tile): + prompt = [ + f"tile_prompt: \"{prompt_tile}\".", + f"full_image_prompt: \"{prompt_context}\".", + "tile_prompt is part of full_image_prompt.", + "If tile_prompt is describing something different than the full image, correct tile_prompt to match full_image_prompt.", + "if you don't need to change the tile_prompt return the tile_prompt.", + "your answer will strictly and only return the tile_prompt string without any decoration like markdown syntax." + ] + return " ".join(prompt) + + @classmethod + def call_grok_api(self, prompt, seed=None): + + client = Groq(api_key=self._groq_key) # Assuming the Groq client accepts an api_key parameter + completion = client.chat.completions.create( + model=self.llm, + messages=[{ + "role": "user", + "content": prompt + }], + temperature=1, + max_tokens=1024, + top_p=1, + stream=False, + stop=None, + seed=seed, + ) + + return completion.choices[0].message.content diff --git a/py/nodes/Prompt/PromptFromImage_v1.py b/py/nodes/Prompt/PromptFromImage_v1.py new file mode 100644 index 0000000..a83adf7 --- /dev/null +++ b/py/nodes/Prompt/PromptFromImage_v1.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# +### +# +# Display Info or any string +# +# Largely inspired by PYSSSSS - ShowText +# +### + +from types import SimpleNamespace + +from ...inc.lib.llm import MS_Llm + +from ...utils.log import * + +class PromptFromImage_v1: + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": ("IMAGE", {"label": "image"}), + "vision_llm_model": (MS_Llm.VISION_LLM_MODELS, { "label": "Vision LLM Model", "default": "microsoft/kosmos-2-patch14-224" }), + "llm_model": (MS_Llm.LLM_MODELS, { "label": "LLM Model", "default": "llama3-70b-8192" }), + }, + "hidden": { + "unique_id": "UNIQUE_ID", + "extra_pnginfo": "EXTRA_PNGINFO", + }, + } + + INPUT_IS_LIST = False + FUNCTION = "fn" + OUTPUT_NODE = True + OUTPUT_IS_LIST = (False,) + CATEGORY = "MaraScott/Prompt" + + RETURN_TYPES = ( + "STRING", + ) + RETURN_NAMES = ( + "Prompt", + ) + + @classmethod + def fn(self, **kwargs): + + self.INPUTS = SimpleNamespace( + image = kwargs.get('image', None) + ) + self.LLM = SimpleNamespace( + vision_model_name = kwargs.get('vision_llm_model', None), + model_name = kwargs.get('llm_model', None), + model = None, + ) + self.LLM.model = MS_Llm(self.LLM.vision_model_name, self.LLM.model_name) + + self.OUPUTS = SimpleNamespace( + prompt = self.LLM.model.vision_llm.generate_prompt(self.INPUTS.image) + ) + + return {"ui": {"text": self.OUPUTS.prompt}, "result": (self.OUPUTS.prompt,)} \ No newline at end of file diff --git a/py/nodes/UpscalerRefiner/McBoaty_v3.py b/py/nodes/UpscalerRefiner/McBoaty_v3.py index 0a72449..04f36b9 100644 --- a/py/nodes/UpscalerRefiner/McBoaty_v3.py +++ b/py/nodes/UpscalerRefiner/McBoaty_v3.py @@ -19,7 +19,7 @@ from ...utils.version import VERSION from ...inc.lib.image import MS_Image_v2 as MS_Image -from ...inc.lib.sampler import MS_Sampler +from ...inc.lib.llm import MS_Llm from ...vendor.ComfyUI_KJNodes.nodes.image_nodes import ColorMatch as ColorMatch from ...utils.log import * @@ -27,10 +27,19 @@ import time class UpscalerRefiner_McBoaty_v3(): + UPSCALE_METHODS = [ + "area", + "bicubic", + "bilinear", + "bislerp", + "lanczos", + "nearest-exact" + ] + SIGMAS_TYPES = [ - 'BasicScheduler' - , 'SDTurboScheduler' - , 'AlignYourStepsScheduler' + 'BasicScheduler', + 'SDTurboScheduler', + 'AlignYourStepsScheduler' ] AYS_MODEL_TYPE_SIZES = { 'SD1': 512, @@ -54,6 +63,7 @@ class UpscalerRefiner_McBoaty_v3(): OUTPUTS = {} PARAMS = {} KSAMPLERS = {} + LLM = {} @classmethod def INPUT_TYPES(self): @@ -65,24 +75,30 @@ def INPUT_TYPES(self): "image": ("IMAGE", {"label": "Image" }), "model": ("MODEL", { "label": "Model" }), + "clip": ("CLIP", { "label": "Clip" }), "vae": ("VAE", { "label": "VAE" }), "positive": ("CONDITIONING", { "label": "Positive" }), "negative": ("CONDITIONING", { "label": "Negative" }), "seed": ("INT", { "label": "Seed", "default": 4, "min": 0, "max": 0xffffffffffffffff}), - "output_size": ("BOOLEAN", { "label": "Output Size", "default": True, "label_on": "Upscale size", "label_off": "Input size"}), "upscale_model": (folder_paths.get_filename_list("upscale_models"), { "label": "Upscale Model" }), + "output_size_type": ("BOOLEAN", { "label": "Output Size Type", "default": True, "label_on": "Upscale size", "label_off": "Custom size"}), + "output_size": ("FLOAT", { "label": "Custom Output Size", "default": 1.00, "min": 1.00, "max": 16.00, "step":0.01, "round": 0.01}), + "output_upscale_method": (self.UPSCALE_METHODS, { "label": "Custom Output Upscale Method", "default": "bicubic"}), "steps": ("INT", { "label": "Steps", "default": 10, "min": 1, "max": 10000}), "cfg": ("FLOAT", { "label": "CFG", "default": 2.5, "min": 0.0, "max": 100.0, "step":0.1, "round": 0.01}), "sigmas_type": (self.SIGMAS_TYPES, { "label": "Sigmas Type" }), "sampler_name": (comfy.samplers.KSampler.SAMPLERS, { "label": "Sampler Name" }), "basic_scheduler": (comfy.samplers.KSampler.SCHEDULERS, { "label": "Basic Scheduler" }), - "denoise": ("FLOAT", { "label": "Denoise", "default": 0.35, "min": 0.0, "max": 1.0, "step": 0.01}), - "ays_model_type": (self.AYS_MODEL_TYPES, { "label": "Model Type" }), - "tile_size": ("INT", { "label": "Tile Size", "default": 512, "min": 320, "max": 4096, "step": 64}), + "denoise": ("FLOAT", { "label": "Denoise", "default": 0.27, "min": 0.0, "max": 1.0, "step": 0.01}), + "ays_model_type": (self.AYS_MODEL_TYPES, { "label": "Model Type", "default": "SDXL" }), + "tile_size": ("INT", { "label": "Tile Size", "default": 1024, "min": 320, "max": 4096, "step": 64}), "vae_encode": ("BOOLEAN", { "label": "VAE Encode type", "default": True, "label_on": "tiled", "label_off": "standard"}), - "feather_mask": ("INT", { "label": "Feather Mask", "default": 64, "min": 32, "max": nodes.MAX_RESOLUTION, "step": 32}), + "feather_mask": ("INT", { "label": "Feather Mask", "default": 128, "min": 32, "max": nodes.MAX_RESOLUTION, "step": 32}), "color_match_method": (self.COLOR_MATCH_METHODS, { "label": "Color Match Method", "default": 'none'}), + "tile_prompting_active": ("BOOLEAN", { "label": "Tile prompting (experimental)", "default": False, "label_on": "Active", "label_off": "Inactive"}), + "vision_llm_model": (MS_Llm.VISION_LLM_MODELS, { "label": "Vision LLM Model", "default": "microsoft/kosmos-2-patch14-224" }), + "llm_model": (MS_Llm.LLM_MODELS, { "label": "LLM Model", "default": "llama3-70b-8192" }), }, "optional": { @@ -110,6 +126,8 @@ def INPUT_TYPES(self): @classmethod def fn(self, **kwargs): + + start_time = time.time() self.init(**kwargs) @@ -125,15 +143,25 @@ def fn(self, **kwargs): current_image = self.OUTPUTS.image for index in range(self.PARAMS.max_iterations): - output_image, output_tiles = self.upscale_refine(current_image, f"{index + 1}/{self.PARAMS.max_iterations}") - if not self.PARAMS.upscale_size: - output_image = nodes.ImageScale().upscale(output_image, "nearest-exact", image_width, image_height, False)[0] + output_image, output_tiles, output_prompts = self.upscale_refine(current_image, f"{index + 1}/{self.PARAMS.max_iterations}") + if not self.PARAMS.upscale_size_type: + output_image = nodes.ImageScale().upscale(output_image, self.PARAMS.upscale_method, int(image_width * self.PARAMS.upscale_size), int(image_height * self.PARAMS.upscale_size), False)[0] current_image = output_image output_image_width = output_image.shape[2] output_image_height = output_image.shape[1] - output_info = self._get_info(image_width, image_height, image_divisible_by_8, output_image_width, output_image_height) + end_time = time.time() + + output_info = self._get_info( + image_width, + image_height, + image_divisible_by_8, + output_image_width, + output_image_height, + output_prompts, + int(end_time - start_time) + ) log(f"McBoaty is done with its magic") @@ -156,12 +184,14 @@ def init(self, **kwargs): image = kwargs.get('image', None), ) self.PARAMS = SimpleNamespace( + upscale_size_type = kwargs.get('output_size_type', None), upscale_size = kwargs.get('output_size', None), upscale_model_name = kwargs.get('upscale_model', None), - upscale_method = "lanczos", + upscale_method = kwargs.get('output_upscale_method', "lanczos"), feather_mask = kwargs.get('feather_mask', None), color_match_method = kwargs.get('color_match_method', 'none'), max_iterations = kwargs.get('running_count', 1), + tile_prompting_active = kwargs.get('tile_prompting_active', False), ) self.PARAMS.upscale_model = comfy_extras.nodes_upscale_model.UpscaleModelLoader().load_model(self.PARAMS.upscale_model_name)[0] @@ -169,6 +199,7 @@ def init(self, **kwargs): tiled = kwargs.get('vae_encode', None), tile_size = kwargs.get('tile_size', None), model = kwargs.get('model', None), + clip = kwargs.get('clip', None), vae = kwargs.get('vae', None), noise_seed = kwargs.get('seed', None), sampler_name = kwargs.get('sampler_name', None), @@ -184,10 +215,16 @@ def init(self, **kwargs): ) self.KSAMPLER.sampler = comfy_extras.nodes_custom_sampler.KSamplerSelect().get_sampler(self.KSAMPLER.sampler_name)[0] + # isinstance(self.KSAMPLER.model, comfy.model_base.SDXL) self.KSAMPLER.tile_size_sampler = self.AYS_MODEL_TYPE_SIZES[self.KSAMPLER.ays_model_type] self.KSAMPLER.sigmas = self._get_sigmas(self.KSAMPLER.sigmas_type, self.KSAMPLER.model, self.KSAMPLER.steps, self.KSAMPLER.denoise, self.KSAMPLER.scheduler, self.KSAMPLER.ays_model_type) self.KSAMPLER.outpaint_sigmas = self._get_sigmas(self.KSAMPLER.sigmas_type, self.KSAMPLER.model, self.KSAMPLER.steps, 1, self.KSAMPLER.scheduler, self.KSAMPLER.ays_model_type) + self.LLM = SimpleNamespace( + vision_model = kwargs.get('vision_llm_model', None), + model = kwargs.get('llm_model', None), + ) + # TODO : make the feather_mask proportional to tile size ? # self.PARAMS.feather_mask = self.KSAMPLER.tile_size // 16 @@ -197,7 +234,9 @@ def init(self, **kwargs): @classmethod - def _get_info(self, image_width, image_height, image_divisible_by_8, output_image_width, output_image_height): + def _get_info(self, image_width, image_height, image_divisible_by_8, output_image_width, output_image_height, output_prompts, execution_duration): + formatted_prompts = "\n".join(f" [{index+1}] {prompt}" for index, prompt in enumerate(output_prompts)) + return [f""" IMAGE (INPUT) @@ -210,7 +249,17 @@ def _get_info(self, image_width, image_height, image_divisible_by_8, output_imag IMAGE (OUTPUT) width : {output_image_width} height : {output_image_height} + + ------------------------------ + + TILES PROMPTS +{formatted_prompts} + ------------------------------ + + EXECUTION + DURATION : {execution_duration} seconds + NODE INFO version : {VERSION} @@ -242,13 +291,25 @@ def upscale_refine(self, image, iteration): grid_specs = MS_Image().get_dynamic_grid_specs(image.shape[2], image.shape[1], rows_qty, cols_qty, feather_mask)[0] grid_images = MS_Image().get_grid_images(image, grid_specs) + grid_prompts = ["No tile prompting"] grid_upscales = [] grid_latents = [] grid_latent_outputs = [] output_images = [] total = len(grid_images) - for index, grid_image in enumerate(grid_images): + if self.PARAMS.tile_prompting_active: + grid_prompts = [] + llm = MS_Llm(self.LLM.vision_model, self.LLM.model) + prompt_context = llm.vision_llm.generate_prompt(image) + + for index, grid_image in enumerate(grid_images): + log(f"tile {index + 1}/{total}", None, None, f"Prompting {iteration}") + prompt_tile = llm.generate_tile_prompt(grid_image, prompt_context, self.KSAMPLER.noise_seed) + log(prompt_tile, None, None, f"Model {llm.vision_llm.name}") + grid_prompts.append(prompt_tile) + + for index, grid_image in enumerate(grid_images): log(f"tile {index + 1}/{total}", None, None, f"Upscaling {iteration}") # _image_grid = nodes.ImageScaleBy().upscale(_image_grid, self.PARAMS.upscale_method, (_image_grid.shape[2] / self.KSAMPLER.tile_size_sampler))[0] upscaled_image_grid = comfy_extras.nodes_upscale_model.ImageUpscaleWithModel().upscale(self.PARAMS.upscale_model, grid_image)[0] @@ -263,15 +324,19 @@ def upscale_refine(self, image, iteration): log(f"tile {index + 1}/{total}", None, None, f"VAEEncoding {iteration}") latent_image = nodes.VAEEncode().encode(self.KSAMPLER.vae, upscaled_image_grid)[0] grid_latents.append(latent_image) - - for index, latent_image in enumerate(grid_latents): + + for index, latent_image in enumerate(grid_latents): + positive = self.KSAMPLER.positive + if self.PARAMS.tile_prompting_active: + log(f"tile {index + 1}/{total} : {grid_prompts[index]}", None, None, f"ClipTextEncoding {iteration}") + positive = nodes.CLIPTextEncode().encode(self.KSAMPLER.clip, grid_prompts[index])[0] log(f"tile {index + 1}/{total}", None, None, f"Refining {iteration}") latent_output = comfy_extras.nodes_custom_sampler.SamplerCustom().sample( self.KSAMPLER.model, self.KSAMPLER.add_noise, self.KSAMPLER.noise_seed, self.KSAMPLER.cfg, - self.KSAMPLER.positive, + positive, self.KSAMPLER.negative, self.KSAMPLER.sampler, self.KSAMPLER.sigmas, @@ -292,13 +357,14 @@ def upscale_refine(self, image, iteration): feather_mask = int(self.PARAMS.feather_mask * self.PARAMS.upscale_model.scale) upscaled_grid_specs = MS_Image().get_dynamic_grid_specs((image.shape[2]*self.PARAMS.upscale_model.scale), (image.shape[1]*self.PARAMS.upscale_model.scale), rows_qty, cols_qty, feather_mask)[0] - output_image, tiles_order = MS_Image().rebuild_image_from_parts(iteration, output_images, image, upscaled_grid_specs, feather_mask, self.PARAMS.upscale_model.scale) + output_image, tiles_order = MS_Image().rebuild_image_from_parts(iteration, output_images, image, upscaled_grid_specs, feather_mask, self.PARAMS.upscale_model.scale, grid_prompts) if self.PARAMS.color_match_method != 'none': output_image = ColorMatch().colormatch(image, output_image, self.PARAMS.color_match_method)[0] tiles_order.sort(key=lambda x: x[0]) - output_tiles = tuple(output for _, output in tiles_order) + output_tiles = tuple(output for _, output, _ in tiles_order) output_tiles = torch.cat(output_tiles) + output_prompts = tuple(prompt for _, _, prompt in tiles_order) - return output_image, output_tiles + return output_image, output_tiles, output_prompts diff --git a/py/utils/version.py b/py/utils/version.py index 8575989..cf445c1 100644 --- a/py/utils/version.py +++ b/py/utils/version.py @@ -3,4 +3,4 @@ # ### -VERSION = "4.2.9" \ No newline at end of file +VERSION = "4.3.0" \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 6ba59bc..18abe80 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,9 +1,9 @@ [project] name = "comfyui_marascott_nodes" description = "A set of nodes including a universal bus, an Inpainting By Mask and a large Upscaler/Refiner" -version = "4.2.9" +version = "4.3.0" license = "LICENSE" -dependencies = ["blend_modes", "numba", "color-matcher"] +dependencies = ["blend_modes", "numba", "color-matcher", "groq"] [project.urls] Repository = "https://github.com/MaraScott/ComfyUI_MaraScott_Nodes" diff --git a/requirements.txt b/requirements.txt index 929c84e..51ed7e4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ blend_modes numba -color-matcher \ No newline at end of file +color-matcher +groq \ No newline at end of file diff --git a/web/assets/js/DisplayInfo_v1.js b/web/assets/js/DisplayInfo_v1.js index f56636b..491c976 100644 --- a/web/assets/js/DisplayInfo_v1.js +++ b/web/assets/js/DisplayInfo_v1.js @@ -5,7 +5,7 @@ import { ComfyWidgets } from "../../scripts/widgets.js"; app.registerExtension({ name: "ComfyUI.MaraScott.DisplayInfo_v1", async beforeRegisterNodeDef(nodeType, nodeData, app) { - if (nodeData.name === "DisplayInfoNode_v1") { + if (nodeData.name === "MaraScottDisplayInfo_v1") { function populate(text) { if (this.widgets) { for (let i = 1; i < this.widgets.length; i++) { @@ -19,6 +19,7 @@ app.registerExtension({ v.shift(); } for (const list of v) { + const w = ComfyWidgets["STRING"](this, "text", ["STRING", { multiline: true }], app).widget; w.inputEl.readOnly = true; w.inputEl.style.opacity = 0.6;