diff --git a/.dockerignore b/.dockerignore index bae2ec223..9af684e5d 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,3 @@ -models/custom/ outputs/ src/ -gfpgan/ +configs/webui/userconfig_streamlit.yaml \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 1d669282e..4fcdc6d19 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,61 +1,25 @@ -# This file is part of stable-diffusion-webui (https://github.com/sd-webui/stable-diffusion-webui/). +ARG IMAGE=hlky/sd-webui:base -# Copyright 2022 sd-webui team. -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. +FROM ${IMAGE} -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# Assumes host environment is AMD64 architecture -# ARG TARGETPLATFORM - -# This is used to allow building against AMD GPUs -# Annoyingly, you can't IF branch off of, say, TARGETGPU and set -# the Dockerfile's FROM based on that, so we have to have the user -# pass in the entire image path for us. -ARG PYTORCH_IMAGE=pytorch/pytorch:1.12.1-cuda11.3-cudnn8-runtime -# To build against AMD, use -# --build-arg PYTORCH_IMAGE=rocm/pytorch:rocm5.2.3_ubuntu20.04_py3.7_pytorch_1.12.1 - -FROM ${PYTORCH_IMAGE} - -WORKDIR /install +WORKDIR /workdir SHELL ["/bin/bash", "-c"] -RUN apt-get update && \ - apt-get install -y wget git build-essential && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -COPY ./requirements.txt /install/ - -RUN pip install -r /install/requirements.txt -# From base image. We need opencv-python-headless so we uninstall here -RUN pip uninstall -y opencv-python && pip install opencv-python-headless==4.6.0.66 - -# Install font for prompt matrix -COPY /data/DejaVuSans.ttf /usr/share/fonts/truetype/ - ENV PYTHONPATH=/sd -COPY ./models /sd/models -COPY ./configs /sd/configs -COPY ./frontend /sd/frontend -COPY ./ldm /sd/ldm -# COPY ./gfpgan/ /sd/ -COPY ./optimizedSD /sd/optimizedSD -COPY ./scripts /sd/scripts - -EXPOSE 7860 8501 - +EXPOSE 8501 +COPY ./data/DejaVuSans.ttf /usr/share/fonts/truetype/ +COPY ./data/ /sd/data/ +copy ./images/ /sd/images/ +copy ./scripts/ /sd/scripts/ +copy ./ldm/ /sd/ldm/ +copy ./frontend/ /sd/frontend/ +copy ./configs/ /sd/configs/ +copy ./.streamlit/ /sd/.streamlit/ COPY ./entrypoint.sh /sd/ ENTRYPOINT /sd/entrypoint.sh +RUN mkdir -p ~/.streamlit/ +RUN echo "[general]" > ~/.streamlit/credentials.toml +RUN echo "email = \"\"" >> ~/.streamlit/credentials.toml diff --git a/Dockerfile_base b/Dockerfile_base new file mode 100644 index 000000000..21ff93206 --- /dev/null +++ b/Dockerfile_base @@ -0,0 +1,17 @@ +ARG PYTORCH_IMAGE=hlky/pytorch:1.12.1-runtime + +FROM ${PYTORCH_IMAGE} +SHELL ["/bin/bash", "-c"] + +WORKDIR /install + +RUN apt-get update && \ + apt-get install -y wget curl git build-essential zip unzip nano openssh-server libgl1 && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +COPY ./requirements.txt /install/ + +RUN /opt/conda/bin/python -m pip install -r /install/requirements.txt + +RUN /opt/conda/bin/conda clean -ya diff --git a/Dockerfile_runpod b/Dockerfile_runpod new file mode 100644 index 000000000..fbd314417 --- /dev/null +++ b/Dockerfile_runpod @@ -0,0 +1,27 @@ +ARG IMAGE=hlky/sd-webui:base + +FROM ${IMAGE} + +WORKDIR /workdir + +SHELL ["/bin/bash", "-c"] + +ENV PYTHONPATH=/sd + +EXPOSE 8501 +COPY ./data/DejaVuSans.ttf /usr/share/fonts/truetype/ +COPY ./configs/ /sd/configs/ +COPY ./data/ /sd/data/ +COPY ./frontend/ /sd/frontend/ +COPY ./gfpgan/ /sd/gfpgan/ +COPY ./images/ /sd/images/ +COPY ./ldm/ /sd/ldm/ +COPY ./models/ /sd/models/ +COPY ./scripts/ /sd/scripts/ +COPY ./.streamlit/ /sd/.streamlit/ +COPY ./runpod_entrypoint.sh /sd/entrypoint.sh +ENTRYPOINT /sd/entrypoint.sh + +RUN mkdir -p ~/.streamlit/ +RUN echo "[general]" > ~/.streamlit/credentials.toml +RUN echo "email = \"\"" >> ~/.streamlit/credentials.toml diff --git a/README.md b/README.md index 354c7f332..ccb49e2a0 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,7 @@ Lets you improve faces in pictures using the GFPGAN model. There is a checkbox i If you want to use GFPGAN to improve generated faces, you need to install it separately. Download [GFPGANv1.3.pth](https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth) and put it -into the `/stable-diffusion-webui/src/gfpgan/experiments/pretrained_models` directory. +into the `/stable-diffusion-webui/models/gfpgan` directory. ### RealESRGAN @@ -117,13 +117,13 @@ Lets you double the resolution of generated images. There is a checkbox in every There is also a separate tab for using RealESRGAN on any picture. Download [RealESRGAN_x4plus.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth) and [RealESRGAN_x4plus_anime_6B.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth). -Put them into the `stable-diffusion-webui/src/realesrgan/experiments/pretrained_models` directory. +Put them into the `stable-diffusion-webui/models/realesrgan` directory. ### GoBig, LSDR, and GoLatent *(Currently Gradio Only)* More powerful upscalers that uses a seperate Latent Diffusion model to more cleanly upscale images. -Download **LDSR** [project.yaml](https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1) and [ model last.cpkt](https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1). Rename last.ckpt to model.ckpt and place both under stable-diffusion-webui/src/latent-diffusion/experiments/pretrained_models/ +Download **LDSR** [project.yaml](https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1) and [ model last.cpkt](https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1). Rename last.ckpt to model.ckpt and place both under stable-diffusion-webui/models/ldsr/ Please see the [Image Enhancers Documentation](docs/5.image_enhancers.md) to learn more. diff --git a/build_docker.sh b/build_docker.sh deleted file mode 100755 index 50ad8e067..000000000 --- a/build_docker.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/sh -# This file is part of stable-diffusion-webui (https://github.com/sd-webui/stable-diffusion-webui/). - -# Copyright 2022 sd-webui team. -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# Functionally equivalent to docker compose build -docker build . -t stable-diffusion-webui:dev diff --git a/configs/webui/webui_streamlit.yaml b/configs/webui/webui_streamlit.yaml index 38dcfbff5..a0870af15 100644 --- a/configs/webui/webui_streamlit.yaml +++ b/configs/webui/webui_streamlit.yaml @@ -29,11 +29,11 @@ general: default_model_path: "models/ldm/stable-diffusion-v1/model.ckpt" use_sd_concepts_library: True sd_concepts_library_folder: "models/custom/sd-concepts-library" - GFPGAN_dir: "./src/gfpgan" + GFPGAN_dir: "./models/gfpgan" GFPGAN_model: "GFPGANv1.4" LDSR_dir: "./models/ldsr" LDSR_model: "model" - RealESRGAN_dir: "./src/realesrgan" + RealESRGAN_dir: "./models/realesrgan" RealESRGAN_model: "RealESRGAN_x4plus" upscaling_method: "RealESRGAN" outdir_txt2img: outputs/txt2img @@ -85,21 +85,21 @@ txt2img: seed: "" batch_count: value: 1 - min_value: 1 - max_value: 100 - step: 1 batch_size: value: 1 - min_value: 1 - max_value: 5 - step: 1 sampling_steps: value: 30 min_value: 10 max_value: 250 step: 10 + + LDSR_config: + sampling_steps: 50 + preDownScale: 1 + postDownScale: 1 + downsample_method: "Lanczos" default_sampler: "k_euler" separate_prompts: False @@ -114,6 +114,7 @@ txt2img: use_RealESRGAN: False use_LDSR: False RealESRGAN_model: "RealESRGAN_x4plus" + use_upscaling: False variant_amount: value: 0.0 @@ -126,7 +127,7 @@ txt2img: txt2vid: default_model: "CompVis/stable-diffusion-v1-4" - custom_models_list: ["CompVis/stable-diffusion-v1-4", "hakurei/waifu-diffusion"] + custom_models_list: ["CompVis/stable-diffusion-v1-4"] prompt: width: value: 512 @@ -148,15 +149,9 @@ txt2vid: batch_count: value: 1 - min_value: 1 - max_value: 100 - step: 1 batch_size: value: 1 - min_value: 1 - max_value: 5 - step: 1 sampling_steps: value: 30 @@ -187,6 +182,7 @@ txt2vid: use_GFPGAN: False use_RealESRGAN: False RealESRGAN_model: "RealESRGAN_x4plus" + use_upscaling: False variant_amount: value: 0.0 min_value: 0.0 @@ -209,9 +205,15 @@ txt2vid: step: 0.0001 format: "%.5f" - beta_scheduler_type: "linear" + beta_scheduler_type: "scaled_linear" max_frames: 100 + LDSR_config: + sampling_steps: 50 + preDownScale: 1 + postDownScale: 1 + downsample_method: "Lanczos" + img2img: prompt: sampler_name: "k_euler" @@ -251,15 +253,9 @@ img2img: batch_count: value: 1 - min_value: 1 - max_value: 100 - step: 1 batch_size: value: 1 - min_value: 1 - max_value: 5 - step: 1 sampling_steps: value: 30 @@ -279,6 +275,12 @@ img2img: max_value: 500 step: 10 + LDSR_config: + sampling_steps: 50 + preDownScale: 1 + postDownScale: 1 + downsample_method: "Lanczos" + loopback: True random_seed_loopback: True separate_prompts: False @@ -292,6 +294,7 @@ img2img: use_GFPGAN: False use_RealESRGAN: False RealESRGAN_model: "RealESRGAN_x4plus" + use_upscaling: False variant_amount: 0.0 variant_seed: "" write_info_files: True @@ -299,6 +302,7 @@ img2img: img2txt: batch_size: 420 blip_image_eval_size: 512 + keep_all_models_loaded: False concepts_library: concepts_per_page: 12 @@ -307,10 +311,94 @@ gfpgan: strength: 100 textual_inversion: - pretrained_model_name_or_path: "models/ldm/stable-diffusion-v1-4" - tokenizer_name: "" + pretrained_model_name_or_path: "models/diffusers/stable-diffusion-v1-4" + tokenizer_name: "models/clip-vit-large-patch14" daisi_app: running_on_daisi_io: False - \ No newline at end of file + +model_manager: + models: + stable_diffusion: + model_name: "Stable Diffusion v1.4" + save_location: "./models/ldm/stable-diffusion-v1" + files: + model_ckpt: + file_name: "model.ckpt" + download_link: "https://www.googleapis.com/storage/v1/b/aai-blog-files/o/sd-v1-4.ckpt?alt=media" + + gfpgan: + model_name: "GFPGAN" + save_location: "./models/gfpgan" + files: + gfpgan_1_4: + file_name: "GFPGANv1.4.pth" + download_link: "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.4/GFPGANv1.4.pth" + resnet_50: + file_name: "detection_Resnet50_Final.pth" + save_location: "./gfpgan/weights" + download_link: "https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth" + parsing_parsenet: + file_name: "parsing_parsenet.pth" + save_location: "./gfpgan/weights" + download_link: "https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth" + + + realesrgan: + model_name: "RealESRGAN" + save_location: "./models/realesrgan" + files: + x4plus: + file_name: "RealESRGAN_x4plus.pth" + download_link: "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth" + x4plus_anime_6b: + file_name: "RealESRGAN_x4plus_anime_6B.pth" + download_link: "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth" + + + waifu_diffusion: + model_name: "Waifu Diffusion v1.2" + save_location: "./models/custom" + files: + waifu_diffusion: + file_name: "waifu-diffusion.ckpt" + download_link: "https://huggingface.co/crumb/pruned-waifu-diffusion/resolve/main/model-pruned.ckpt" + + + trinart_stable_diffusion: + model_name: "TrinArt Stable Diffusion v2" + save_location: "./models/custom" + files: + trinart: + file_name: "trinart.ckpt" + download_link: "https://huggingface.co/naclbit/trinart_stable_diffusion_v2/resolve/main/trinart2_step95000.ckpt" + + stable_diffusion_concept_library: + model_name: "Stable Diffusion Concept Library" + save_location: "./models/custom/sd-concepts-library/" + files: + concept_library: + file_name: "" + download_link: "https://github.com/sd-webui/sd-concepts-library" + + blip_model: + model_name: "Blip Model" + save_location: "./models/blip" + files: + blip: + file_name: "model__base_caption.pth" + download_link: "https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model*_base_caption.pth" + + ldsr: + model_name: "Latent Diffusion Super Resolution (LDSR)" + save_location: "./models/ldsr" + files: + project_yaml: + file_name: "project.yaml" + download_link: "https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1" + + ldsr_model: + file_name: "model.ckpt" + download_link: "https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1" + \ No newline at end of file diff --git a/data/img2txt/flavors.txt b/data/img2txt/flavors.txt index 549cfa826..f5ede9c99 100644 --- a/data/img2txt/flavors.txt +++ b/data/img2txt/flavors.txt @@ -1,8 +1,3 @@ -#film -#myportfolio -#pixelart -#screenshotsaturday -#vfxfriday 1920s 1970s 1990s @@ -394,4 +389,4 @@ windows xp woodcut xbox 360 graphics y2k aesthetic -zbrush +zbrush \ No newline at end of file diff --git a/data/img2txt/mediums.txt b/data/img2txt/mediums.txt index 6e49a2cb1..87338c8aa 100644 --- a/data/img2txt/mediums.txt +++ b/data/img2txt/mediums.txt @@ -92,4 +92,4 @@ graffiti art lineart pixel art poster art -vector art +vector art \ No newline at end of file diff --git a/data/img2txt/movements.txt b/data/img2txt/movements.txt index 0a6c86206..2778487f0 100644 --- a/data/img2txt/movements.txt +++ b/data/img2txt/movements.txt @@ -197,4 +197,4 @@ verdadism video art viennese actionism visual art -vorticism +vorticism \ No newline at end of file diff --git a/docker-compose.amd.yml b/docker-compose.amd.yml deleted file mode 100644 index e46afde79..000000000 --- a/docker-compose.amd.yml +++ /dev/null @@ -1,8 +0,0 @@ -services: - stable-diffusion: - build: - args: - PYTORCH_IMAGE: rocm/pytorch:rocm5.2.3_ubuntu20.04_py3.7_pytorch_1.12.1 - devices: - - /dev/dri - - /dev/kfd diff --git a/docker-compose.override.yml b/docker-compose.override.yml deleted file mode 100644 index be3483106..000000000 --- a/docker-compose.override.yml +++ /dev/null @@ -1,10 +0,0 @@ -# Nvidia specific config -version: '3.3' - -services: - stable-diffusion: - deploy: - resources: - reservations: - devices: - - capabilities: [ gpu ] diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index af247e06e..000000000 --- a/docker-compose.yml +++ /dev/null @@ -1,43 +0,0 @@ -# This file is part of stable-diffusion-webui (https://github.com/sd-webui/stable-diffusion-webui/). - -# Copyright 2022 sd-webui team. -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -version: '3.3' - -services: - stable-diffusion: - container_name: sd-webui - image: stable-diffusion-webui:dev - build: - context: . - dockerfile: Dockerfile - env_file: .env_docker - volumes: - - .:/sd - - ./outputs:/sd/outputs - - ./model_cache:/sd/model_cache - - ~/.huggingface/token:/root/.huggingface/token - - root_profile:/root - ports: - - '7860:7860' - - '8501:8501' - healthcheck: - test: curl --fail http://localhost:8501 --head || curl --fail http://localhost:7860 --head || echo 1 - interval: 30s - timeout: 1s - retries: 10 - -volumes: - root_profile: diff --git a/docker-reset.sh b/docker-reset.sh deleted file mode 100755 index dafacd900..000000000 --- a/docker-reset.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash -# This file is part of stable-diffusion-webui (https://github.com/sd-webui/stable-diffusion-webui/). - -# Copyright 2022 sd-webui team. -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# Use this script to reset your Docker-based Stable Diffusion environment -# This script will remove all cached files/models that are downloaded during your first startup - - -declare -a deletion_paths=("src" - "gfpgan" - "sd_webui.egg-info" - ".env_updated" # Check if still needed - ) - - -# TODO This should be improved to be safer -install_dir=$(pwd) - -echo $install_dir -read -p "Do you want to reset the above directory? (y/n) " -n 1 DIRCONFIRM -echo "" - -if [[ $DIRCONFIRM =~ ^[Yy]$ ]]; then - docker compose down - docker image rm stable-diffusion-webui:dev - docker volume rm stable-diffusion-webui_root_profile - - for path in "${deletion_paths[@]}" - do - echo "Removing files located at path: $install_dir/$path" - rm -rf $path - done -else - echo "Exited without reset" -fi diff --git a/docs/1.windows-installation.md b/docs/1.windows-installation.md index 05a980d38..20259a43c 100644 --- a/docs/1.windows-installation.md +++ b/docs/1.windows-installation.md @@ -107,20 +107,20 @@ There are three more models that we need to download in order to get the most ou ### GFPGAN 1. If you want to use GFPGAN to improve generated faces, you need to install it separately. -1. Download [GFPGANv1.3.pth](https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth) and [GFPGANv1.4.pth](https://github.com/TencentARC/GFPGAN/releases/download/v1.3.4/GFPGANv1.4.pth) and put it -into the `/stable-diffusion-webui/src/gfpgan/experiments/pretrained_models` directory. +1. Download [GFPGANv1.3.pth](https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth) and [GFPGANv1.4.pth](https://github.com/TencentARC/GFPGAN/releases/download/v1.3.4/GFPGANv1.4.pth) and put it into the `/stable-diffusion-webui/models/gfpgan` directory. + ### RealESRGAN 1. Download [RealESRGAN_x4plus.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth) and [RealESRGAN_x4plus_anime_6B.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth). -1. Put them into the `stable-diffusion-webui/src/realesrgan/experiments/pretrained_models` directory. +1. Put them into the `stable-diffusion-webui/models/realesrgan` directory. ### LDSR 1. Detailed instructions [here](https://github.com/Hafiidz/latent-diffusion). Brief instruction as follows. 1. Git clone [Hafiidz/latent-diffusion](https://github.com/Hafiidz/latent-diffusion) into your `/stable-diffusion-webui/src/` folder. -1. Run `/stable-diffusion-webui/src/latent-diffusion/download_model.bat` to automatically download and rename the models. -1. Wait until it is done and you can confirm by confirming two new files in `stable-diffusion-webui/src/latent-diffusion/experiments/pretrained_models/` +1. Run `/stable-diffusion-webui/models/ldsr/download_model.bat` to automatically download and rename the models. +1. Wait until it is done and you can confirm by confirming two new files in `stable-diffusion-webui/models/ldsr/` 1. _(Optional)_ If there are no files there, you can manually download **LDSR** [project.yaml](https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1) and [model last.cpkt](https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1). -1. Rename last.ckpt to model.ckpt and place both under `stable-diffusion-webui/src/latent-diffusion/experiments/pretrained_models/`. +1. Rename last.ckpt to model.ckpt and place both under `stable-diffusion-webui/models/ldsr/`. 1. Refer to [here](https://github.com/sd-webui/stable-diffusion-webui/issues/488) for any issue. diff --git a/docs/5.gradio-interface.md b/docs/5.gradio-interface.md index 3bfdb6fca..6026ed325 100644 --- a/docs/5.gradio-interface.md +++ b/docs/5.gradio-interface.md @@ -92,6 +92,58 @@ The Gradio Image Lab is a central location to access image enhancers and upscale Please see the [Image Enhancers](6.image_enhancers.md) section to learn more about how to use these tools. +## Scene2Image +--- + +![](../images/gradio/gradio-s2i.png) + +Gradio Scene2Image allows you to define layers of images in a markdown-like syntax. + +> Would it be possible to have a layers system where we could do have +foreground, mid, and background objects which relate to one another and +share the style? So we could say generate a landscape, one another layer +generate a castle, and on another layer generate a crowd of people. + +You write a a multi-line prompt that looks like markdown, where each section declares one layer. +It is hierarchical, so each layer can have their own child layers. +In the frontend you can find a brief documentation for the syntax, examples and reference for the various arguments. +Here a summary: + +Markdown headings, e.g. '# layer0', define layers. +The content of sections define the arguments for image generation. +Arguments are defined by lines of the form 'arg:value' or 'arg=value'. + +Layers are hierarchical, i.e. each layer can contain more layers. +The number of '#' increases in the headings of a child layers. +Child layers are blended together by their image masks, like layers in image editors. +By default alpha composition is used for blending. +Other blend modes from [ImageChops](https://pillow.readthedocs.io/en/stable/reference/ImageChops.html) can also be used. + +Sections with "prompt" and child layers invoke Image2Image, without child layers they invoke Text2Image. +The result of blending child layers will be the input for Image2Image. + +Without "prompt" they are just images, useful for mask selection, image composition, etc. +Images can be initialized with "color", resized with "resize" and their position specified with "pos". +Rotation and rotation center are "rotation" and "center". + +Mask can automatically be selected by color, color at pixels of the image, or by estimated depth. + +You can chose between two different depth estimation models, see frontend reference for name of arguments. +[Monocular depth estimation](https://huggingface.co/spaces/atsantiago/Monocular_Depth_Filter) can be selected as depth model `0`. +[MiDaS depth estimation](https://huggingface.co/spaces/pytorch/MiDaS), used by default, can be selected as depth model `1`. + +Depth estimation can be used for traditional 3d reconstruction. +Using `transform3d=True` the pixels of an image can be rendered from another perspective or with a different field of view. +For this you specify pose and field of view that corresponds to the input image and a desired output pose and field of view. +The poses describe the camera position and orientation as x,y,z,rotate_x,rotate_y,rotate_z tuple with angles describing rotations around axes in degrees. +The camera coordinate system is the pinhole camera as described and pictured in [OpenCV "Camera Calibration and 3D Reconstruction" documentation](https://docs.opencv.org/4.x/d9/d0c/group__calib3d.html). + +When the camera pose `transform3d_from_pose` where the input image was taken is not specified, the camera pose `transform3d_to_pose` to which the image is to be transformed is in terms of the input camera coordinate system: +Walking forwards one depth unit in the input image corresponds to a position `0,0,1`. +Walking to the right is something like `1,0,0`. +Going downwards is then `0,1,0`. + + ## Gradio Optional Customizations --- diff --git a/docs/6.image_enhancers.md b/docs/6.image_enhancers.md index e92982e76..4563eb441 100644 --- a/docs/6.image_enhancers.md +++ b/docs/6.image_enhancers.md @@ -32,7 +32,7 @@ GFPGAN is designed to help restore faces in Stable Diffusion outputs. If you hav If you want to use GFPGAN to improve generated faces, you need to download the models for it seperately if you are on Windows or doing so manually on Linux. Download [GFPGANv1.3.pth](https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth) and put it -into the `/stable-diffusion-webui/src/gfpgan/experiments/pretrained_models` directory after you have setup the conda environment for the first time. +into the `/stable-diffusion-webui/models/gfpgan` directory after you have setup the conda environment for the first time. ## RealESRGAN --- @@ -42,7 +42,7 @@ RealESRGAN is a 4x upscaler built into both versions of the Web UI interface. It If you want to use RealESRGAN to upscale your images, you need to download the models for it seperately if you are on Windows or doing so manually on Linux. Download [RealESRGAN_x4plus.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth) and [RealESRGAN_x4plus_anime_6B.pth](https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth). -Put them into the `stable-diffusion-webui/src/realesrgan/experiments/pretrained_models` directory after you have setup the conda environment for the first time. +Put them into the `stable-diffusion-webui/models/realesrgan` directory after you have setup the conda environment for the first time. ## GoBig (Gradio only currently) --- @@ -57,7 +57,7 @@ To use GoBig, you will need to download the RealESRGAN models as directed above. LSDR is a 4X upscaler with high VRAM usage that uses a Latent Diffusion model to upscale the image. This will accentuate the details of an image, but won't change the composition. This might introduce sharpening, but it is great for textures or compositions with plenty of details. However, it is slower and will use more VRAM. If you want to use LSDR to upscale your images, you need to download the models for it seperately if you are on Windows or doing so manually on Linux. -Download the LDSR [project.yaml](https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1) and [ model last.cpkt](https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1). Rename `last.ckpt` to `model.ckpt` and place both in the `stable-diffusion-webui/src/latent-diffusion/experiments/pretrained_models` directory after you have setup the conda environment for the first time. +Download the LDSR [project.yaml](https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1) and [ model last.cpkt](https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1). Rename `last.ckpt` to `model.ckpt` and place both in the `stable-diffusion-webui/models/ldsr` directory after you have setup the conda environment for the first time. ## GoLatent (Gradio only currently) --- diff --git a/entrypoint.sh b/entrypoint.sh old mode 100755 new mode 100644 index c1d9043fb..61d5db872 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -45,85 +45,80 @@ mkdir -p $MODEL_DIR # download URL # sha256sum MODEL_FILES=( - 'model.ckpt models/ldm/stable-diffusion-v1 https://www.googleapis.com/storage/v1/b/aai-blog-files/o/sd-v1-4.ckpt?alt=media fe4efff1e174c627256e44ec2991ba279b3816e364b49f9be2abc0b3ff3f8556' - 'GFPGANv1.3.pth src/gfpgan/experiments/pretrained_models https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth c953a88f2727c85c3d9ae72e2bd4846bbaf59fe6972ad94130e23e7017524a70' - 'RealESRGAN_x4plus.pth src/realesrgan/experiments/pretrained_models https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth 4fa0d38905f75ac06eb49a7951b426670021be3018265fd191d2125df9d682f1' - 'RealESRGAN_x4plus_anime_6B.pth src/realesrgan/experiments/pretrained_models https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth f872d837d3c90ed2e05227bed711af5671a6fd1c9f7d7e91c911a61f155e99da' - 'project.yaml src/latent-diffusion/experiments/pretrained_models https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1 9d6ad53c5dafeb07200fb712db14b813b527edd262bc80ea136777bdb41be2ba' - 'model.ckpt src/latent-diffusion/experiments/pretrained_models https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1 c209caecac2f97b4bb8f4d726b70ac2ac9b35904b7fc99801e1f5e61f9210c13' - 'waifu-diffusion.ckpt models/custom https://huggingface.co/crumb/pruned-waifu-diffusion/resolve/main/model-pruned.ckpt 9b31355f90fea9933847175d4731a033f49f861395addc7e153f480551a24c25' - 'trinart.ckpt models/custom https://huggingface.co/naclbit/trinart_stable_diffusion_v2/resolve/main/trinart2_step95000.ckpt c1799d22a355ba25c9ceeb6e3c91fc61788c8e274b73508ae8a15877c5dbcf63' - 'model__base_caption.pth models/blip https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model*_base_caption.pth 96ac8749bd0a568c274ebe302b3a3748ab9be614c737f3d8c529697139174086' - 'pytorch_model.bin models/clip-vit-large-patch14 https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/pytorch_model.bin f1a17cdbe0f36fec524f5cafb1c261ea3bbbc13e346e0f74fc9eb0460dedd0d3' - 'config.json models/clip-vit-large-patch14 https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/config.json 8a09b467700c58138c29d53c605b34ebc69beaadd13274a8a2af8ad2c2f4032a' - 'merges.txt models/clip-vit-large-patch14 https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/merges.txt 9fd691f7c8039210e0fced15865466c65820d09b63988b0174bfe25de299051a' - 'preprocessor_config.json models/clip-vit-large-patch14 https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/preprocessor_config.json 910e70b3956ac9879ebc90b22fb3bc8a75b6a0677814500101a4c072bd7857bd' - 'special_tokens_map.json models/clip-vit-large-patch14 https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/special_tokens_map.json f8c0d6c39aee3f8431078ef6646567b0aba7f2246e9c54b8b99d55c22b707cbf' - 'tokenizer.json models/clip-vit-large-patch14 https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/tokenizer.json a83e0809aa4c3af7208b2df632a7a69668c6d48775b3c3fe4e1b1199d1f8b8f4' - 'tokenizer_config.json models/clip-vit-large-patch14 https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/tokenizer_config.json deef455e52fa5e8151e339add0582e4235f066009601360999d3a9cda83b1129' - 'vocab.json models/clip-vit-large-patch14 https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/vocab.json 3f0c4f7d2086b61b38487075278ea9ed04edb53a03cbb045b86c27190fa8fb69' + 'model.ckpt models/ldm/stable-diffusion-v1 https://www.googleapis.com/storage/v1/b/aai-blog-files/o/sd-v1-4.ckpt?alt=media' + 'GFPGANv1.4.pth models/gfpgan https://github.com/TencentARC/GFPGAN/releases/download/v1.3.4/GFPGANv1.4.pth' + 'detection_Resnet50_Final.pth gfpgan/weights https://github.com/xinntao/facexlib/releases/download/v0.1.0/detection_Resnet50_Final.pth' + 'parsing_parsenet.pth gfpgan/weights https://github.com/xinntao/facexlib/releases/download/v0.2.2/parsing_parsenet.pth' + 'RealESRGAN_x4plus.pth models/realesrgan https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth' + 'RealESRGAN_x4plus_anime_6B.pth models/realesrgan https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth' + 'project.yaml models/ldsr https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1' + 'model.ckpt models/ldsr https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1' + 'waifu-diffusion.ckpt models/custom https://huggingface.co/crumb/pruned-waifu-diffusion/resolve/main/model-pruned.ckpt' + 'trinart.ckpt models/custom https://huggingface.co/naclbit/trinart_stable_diffusion_v2/resolve/main/trinart2_step95000.ckpt' + 'model__base_caption.pth models/blip https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model*_base_caption.pth' + 'pytorch_model.bin models/clip-vit-large-patch14 https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/pytorch_model.bin' + 'config.json models/clip-vit-large-patch14 https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/config.json' + 'merges.txt models/clip-vit-large-patch14 https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/merges.txt' + 'preprocessor_config.json models/clip-vit-large-patch14 https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/preprocessor_config.json' + 'special_tokens_map.json models/clip-vit-large-patch14 https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/special_tokens_map.json' + 'tokenizer.json models/clip-vit-large-patch14 https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/tokenizer.json' + 'tokenizer_config.json models/clip-vit-large-patch14 https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/tokenizer_config.json' + 'vocab.json models/clip-vit-large-patch14 https://huggingface.co/openai/clip-vit-large-patch14/resolve/main/vocab.json' ) - -# Function to checks for valid hash for model files and download/replaces if invalid or does not exist -validateDownloadModel() { +downloadModel() { local file=$1 local path="${SCRIPT_DIR}/${2}" + local path_dir="${MODEL_DIR}/$2" local url=$3 - local hash=$4 - echo "checking ${file}..." - sha256sum --check --status <<< "${hash} ${MODEL_DIR}/${file}.${hash}" - if [[ $? == "1" ]]; then + if [[ ! -e "${MODEL_DIR}/$2/${file}" ]]; then echo "Downloading: ${url} please wait..." + mkdir -p ${MODEL_DIR}/$2 mkdir -p ${path} - wget --output-document=${MODEL_DIR}/${file}.${hash} --no-verbose --show-progress --progress=dot:giga ${url} - ln -sf ${MODEL_DIR}/${file}.${hash} ${path}/${file} + wget --output-document=${MODEL_DIR}/$2/${file} --no-verbose --show-progress --progress=dot:giga ${url} + ln -sf ${MODEL_DIR}/$2/${file} ${path}/${file} if [[ -e "${path}/${file}" ]]; then echo "saved ${file}" else - echo "error saving ${path}/${file}!" + echo "error saving ${MODEL_DIR}/$2/${file}!" exit 1 fi - else - if [[ ! -e ${path}/${file} || ! -L ${path}/${file} ]]; then - mkdir -p ${path} - ln -sf ${MODEL_DIR}/${file}.${hash} ${path}/${file} - echo -e "linked valid ${file}\n" - else - echo -e "${file} is valid!\n" - fi fi } +echo "Downloading model files..." +for models in "${MODEL_FILES[@]}"; do + model=($models) + if [[ ! -e ${model[1]}/${model[0]} || ! -L ${model[1]}/${model[0]} ]]; then + downloadModel ${model[0]} ${model[1]} ${model[2]} + fi +done -# Validate model files -if [ $VALIDATE_MODELS == "false" ]; then - echo "Skipping model file validation..." -else - echo "Validating model files..." - for models in "${MODEL_FILES[@]}"; do - model=($models) - if [[ ! -e ${model[1]}/${model[0]} || ! -L ${model[1]}/${model[0]} || -z $VALIDATE_MODELS || $VALIDATE_MODELS == "true" ]]; then - validateDownloadModel ${model[0]} ${model[1]} ${model[2]} ${model[3]} - fi - done - mkdir -p ${MODEL_DIR}/stable-diffusion-v1-4 - mkdir -p ${MODEL_DIR}/waifu-diffusion - - ln -fs ${SCRIPT_DIR}/models/clip-vit-large-patch14/ ${MODEL_DIR}/stable-diffusion-v1-4/tokenizer - ln -fs ${SCRIPT_DIR}/models/clip-vit-large-patch14/ ${MODEL_DIR}/waifu-diffusion/tokenizer -fi +# Create directory for diffusers models +mkdir -p ${MODEL_DIR}/diffusers/stable-diffusion-v1-4 +mkdir -p ${MODEL_DIR}/diffusers/waifu-diffusion +mkdir -p ${SCRIPT_DIR}/diffusers/stable-diffusion-v1-4 +mkdir -p ${SCRIPT_DIR}/diffusers/waifu-diffusion +# Link tokenizer to diffusers models +ln -fs ${SCRIPT_DIR}/models/clip-vit-large-patch14/ ${SCRIPT_DIR}/diffusers/stable-diffusion-v1-4/tokenizer +ln -fs ${SCRIPT_DIR}/models/clip-vit-large-patch14/ ${SCRIPT_DIR}/diffusers/waifu-diffusion/tokenizer if [[ -e "${MODEL_DIR}/sd-concepts-library" ]]; then + # concept library exists, update cd ${MODEL_DIR}/sd-concepts-library git pull else + # concept library does not exist, clone cd ${MODEL_DIR} - git clone https://github.com/sd-webui/sd-concepts-library + git clone https://github.com/sd-webui/sd-concepts-library.git fi +# create directory and link concepts library mkdir -p ${SCRIPT_DIR}/models/custom -ln -fs ${MODEL_DIR}/sd-concepts-library/sd-concepts-library ${SCRIPT_DIR}/models/custom +ln -fs ${MODEL_DIR}/sd-concepts-library/sd-concepts-library/ ${SCRIPT_DIR}/models/custom/sd-concepts-library + +mkdir -p ${SCRIPT_DIR}/user_data/outputs +ln -fs ${SCRIPT_DIR}/user_data/outputs/ ${SCRIPT_DIR}/outputs echo "export HF_HOME=${MODEL_DIR}" >> ~/.bashrc echo "export XDG_CACHE_HOME=${MODEL_DIR}" >> ~/.bashrc diff --git a/environment.yaml b/environment.yaml index ecae4c215..b966e4a37 100644 --- a/environment.yaml +++ b/environment.yaml @@ -28,12 +28,10 @@ dependencies: - pytorch=1.11.0 - scikit-image=0.19.2 - torchvision=0.12.0 - - loguru - pip: - -e . - -e git+https://github.com/CompVis/taming-transformers#egg=taming-transformers - -e git+https://github.com/openai/CLIP#egg=clip - - -e git+https://github.com/TencentARC/GFPGAN#egg=GFPGAN - -e git+https://github.com/hlky/k-diffusion-sd#egg=k_diffusion - -e git+https://github.com/devilismyfriend/latent-diffusion#egg=latent-diffusion - accelerate==0.12.0 @@ -45,18 +43,18 @@ dependencies: - ftfy==6.1.1 - fairscale==0.4.4 - gradio==3.1.6 - - hydralit==1.0.14 + - gfpgan==1.3.8 - hydralit_components==1.0.10 + - hydralit==1.0.14 - imageio-ffmpeg==0.4.2 - imageio==2.9.0 - kornia==0.6 + - loguru - omegaconf==2.1.1 - opencv-python-headless==4.6.0.66 - open-clip-torch==2.0.2 - pandas==1.4.3 - piexif==1.1.3 - - pycocotools==2.0.5 - - pycocoevalcap==1.2 - pudb==2019.2 - pynvml==11.4.1 - python-slugify>=6.1.2 @@ -78,4 +76,6 @@ dependencies: - transformers==4.19.2 - tensorflow==2.10.0 - tqdm==4.64.0 + - stqdm==0.0.4 + - wget diff --git a/horde_bridge.cmd b/horde_bridge.cmd new file mode 100644 index 000000000..d5923934b --- /dev/null +++ b/horde_bridge.cmd @@ -0,0 +1,106 @@ +@echo off +:: This file is part of stable-diffusion-webui (https://github.com/sd-webui/stable-diffusion-webui/). + +:: Copyright 2022 sd-webui team. +:: This program is free software: you can redistribute it and/or modify +:: it under the terms of the GNU Affero General Public License as published by +:: the Free Software Foundation, either version 3 of the License, or +:: (at your option) any later version. + +:: This program is distributed in the hope that it will be useful, +:: but WITHOUT ANY WARRANTY; without even the implied warranty of +:: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +:: GNU Affero General Public License for more details. + +:: You should have received a copy of the GNU Affero General Public License +:: along with this program. If not, see . +:: Run all commands using this script's directory as the working directory +cd %~dp0 + +:: copy over the first line from environment.yaml, e.g. name: ldm, and take the second word after splitting by ":" delimiter +for /F "tokens=2 delims=: " %%i in (environment.yaml) DO ( + set v_conda_env_name=%%i + goto EOL +) +:EOL + +echo Environment name is set as %v_conda_env_name% as per environment.yaml + +:: Put the path to conda directory in a file called "custom-conda-path.txt" if it's installed at non-standard path +IF EXIST custom-conda-path.txt ( + FOR /F %%i IN (custom-conda-path.txt) DO set v_custom_path=%%i +) + +set v_paths=%ProgramData%\miniconda3 +set v_paths=%v_paths%;%USERPROFILE%\miniconda3 +set v_paths=%v_paths%;%ProgramData%\anaconda3 +set v_paths=%v_paths%;%USERPROFILE%\anaconda3 + +for %%a in (%v_paths%) do ( + IF NOT "%v_custom_path%"=="" ( + set v_paths=%v_custom_path%;%v_paths% + ) +) + +for %%a in (%v_paths%) do ( + if EXIST "%%a\Scripts\activate.bat" ( + SET v_conda_path=%%a + echo anaconda3/miniconda3 detected in %%a + goto :CONDA_FOUND + ) +) + +IF "%v_conda_path%"=="" ( + echo anaconda3/miniconda3 not found. Install from here https://docs.conda.io/en/latest/miniconda.html + pause + exit /b 1 +) + +:CONDA_FOUND +echo Stashing local changes and pulling latest update... +call git stash +call git pull +set /P restore="Do you want to restore changes you made before updating? (Y/N): " +IF /I "%restore%" == "N" ( + echo Removing changes please wait... + call git stash drop + echo Changes removed, press any key to continue... + pause >nul +) ELSE IF /I "%restore%" == "Y" ( + echo Restoring changes, please wait... + call git stash pop --quiet + echo Changes restored, press any key to continue... + pause >nul +) +call "%v_conda_path%\Scripts\activate.bat" + +for /f "delims=" %%a in ('git log -1 --format^="%%H" -- environment.yaml') DO set v_cur_hash=%%a +set /p "v_last_hash="<"z_version_env.tmp" +echo %v_cur_hash%>z_version_env.tmp + +echo Current environment.yaml hash: %v_cur_hash% +echo Previous environment.yaml hash: %v_last_hash% + +if "%v_last_hash%" == "%v_cur_hash%" ( + echo environment.yaml unchanged. dependencies should be up to date. + echo if you still have unresolved dependencies, delete "z_version_env.tmp" +) else ( + echo environment.yaml changed. updating dependencies + call conda env create --name "%v_conda_env_name%" -f environment.yaml + call conda env update --name "%v_conda_env_name%" -f environment.yaml +) + + +call "%v_conda_path%\Scripts\activate.bat" "%v_conda_env_name%" + +:PROMPT +set SETUPTOOLS_USE_DISTUTILS=stdlib +IF EXIST "models\ldm\stable-diffusion-v1\model.ckpt" ( + set "PYTHONPATH=%~dp0" + python scripts\relauncher.py --bridge -v %* +) ELSE ( + echo Your model file does not exist! Place it in 'models\ldm\stable-diffusion-v1' with the name 'model.ckpt'. + pause +) + +::cmd /k diff --git a/horde_bridge.sh b/horde_bridge.sh new file mode 100644 index 000000000..e4c2bbdc0 --- /dev/null +++ b/horde_bridge.sh @@ -0,0 +1,169 @@ +#!/bin/bash -i +# This file is part of stable-diffusion-webui (https://github.com/sd-webui/stable-diffusion-webui/). + +# Copyright 2022 sd-webui team. +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# Start the Stable Diffusion WebUI for Linux Users + +DIRECTORY="." +ENV_FILE="environment.yaml" +ENV_NAME="ldm" +ENV_MODIFIED=$(date -r $ENV_FILE "+%s") +ENV_MODIFED_FILE=".env_updated" +ENV_UPDATED=0 + +# Models used for upscaling +GFPGAN_MODEL="https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth" +LATENT_DIFFUSION_REPO="https://github.com/devilismyfriend/latent-diffusion.git" +LSDR_CONFIG="https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1" +LSDR_MODEL="https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1" +REALESRGAN_MODEL="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth" +REALESRGAN_ANIME_MODEL="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth" +SD_CONCEPT_REPO="https://github.com/sd-webui/sd-concepts-library/archive/refs/heads/main.zip" + + +if [[ -f $ENV_MODIFED_FILE ]]; then + ENV_MODIFIED_CACHED=$(<${ENV_MODIFED_FILE}) +else + ENV_MODIFIED_CACHED=0 +fi + +# Setup the Conda env for the project. This will also handle updating the env as needed too. +conda_env_setup () { + # Set conda path if it is not already in default environment + CUSTOM_CONDA_PATH= + + # Allow setting custom path via file to allow updates of this script without undoing custom path + if [ -f custom-conda-path.txt ]; then + CUSTOM_CONDA_PATH=$(cat custom-conda-path.txt) + fi + + # If custom path is set above, try to setup conda environment + if [ -f "${CUSTOM_CONDA_PATH}/etc/profile.d/conda.sh" ]; then + . "${CUSTOM_CONDA_PATH}/etc/profile.d/conda.sh" + elif [ -n "${CUSTOM_CONDA_PATH}" ] && [ -f "${CUSTOM_CONDA_PATH}/bin" ]; then + export PATH="${CUSTOM_CONDA_PATH}/bin:$PATH" + fi + + if ! command -v conda >/dev/null; then + printf "Anaconda3 not found. Install from here https://www.anaconda.com/products/distribution\n" + exit 1 + fi + + # Create/update conda env if needed + if ! conda env list | grep ".*${ENV_NAME}.*" >/dev/null 2>&1; then + printf "Could not find conda env: ${ENV_NAME} ... creating ... \n\n" + conda env create -f $ENV_FILE + ENV_UPDATED=1 + elif [[ ! -z $CONDA_FORCE_UPDATE && $CONDA_FORCE_UPDATE == "true" ]] || (( $ENV_MODIFIED > $ENV_MODIFIED_CACHED )); then + printf "Updating conda env: ${ENV_NAME} ...\n\n" + PIP_EXISTS_ACTION=w conda env update --file $ENV_FILE --prune + ENV_UPDATED=1 + fi + + # Clear artifacts from conda after create/update + if (( $ENV_UPDATED > 0 )); then + conda clean --all + echo -n $ENV_MODIFIED > $ENV_MODIFED_FILE + fi +} + +# Activate conda environment +conda_env_activation () { + conda activate $ENV_NAME + conda info | grep active +} + +# Check to see if the SD model already exists, if not then it creates it and prompts the user to add the SD AI models to the repo directory +sd_model_loading () { + if [ -f "$DIRECTORY/models/ldm/stable-diffusion-v1/model.ckpt" ]; then + printf "AI Model already in place. Continuing...\n\n" + else + printf "\n\n########## MOVE MODEL FILE ##########\n\n" + printf "Please download the 1.4 AI Model from Huggingface (or another source) and place it inside of the stable-diffusion-webui folder\n\n" + read -p "Once you have sd-v1-4.ckpt in the project root, Press Enter...\n\n" + + # Check to make sure checksum of models is the original one from HuggingFace and not a fake model set + printf "fe4efff1e174c627256e44ec2991ba279b3816e364b49f9be2abc0b3ff3f8556 sd-v1-4.ckpt" | sha256sum --check || exit 1 + mv sd-v1-4.ckpt $DIRECTORY/models/ldm/stable-diffusion-v1/model.ckpt + rm -r ./Models + fi +} + +# Checks to see if the upscaling models exist in their correct locations. If they do not they will be downloaded as required +post_processor_model_loading () { + # Check to see if GFPGAN has been added yet, if not it will download it and place it in the proper directory + if [ -f "$DIRECTORY/models/gfpgan/GFPGANv1.3.pth" ]; then + printf "GFPGAN already exists. Continuing...\n\n" + else + printf "Downloading GFPGAN model. Please wait...\n" + wget $GFPGAN_MODEL -P $DIRECTORY/models/gfpgan + fi + + # Check to see if realESRGAN has been added yet, if not it will download it and place it in the proper directory + if [ -f "$DIRECTORY/models/realesrgan/RealESRGAN_x4plus.pth" ]; then + printf "realESRGAN already exists. Continuing...\n\n" + else + printf "Downloading realESRGAN model. Please wait...\n" + wget $REALESRGAN_MODEL -P $DIRECTORY/models/realesrgan + wget $REALESRGAN_ANIME_MODEL -P $DIRECTORY/models/realesrgan + fi + + # Check to see if LDSR has been added yet, if not it will be cloned and its models downloaded to the correct directory + if [ -f "$DIRECTORY/models/ldsr/model.ckpt" ]; then + printf "LDSR already exists. Continuing...\n\n" + else + printf "Cloning LDSR and downloading model. Please wait...\n" + git clone $LATENT_DIFFUSION_REPO + mv latent-diffusion $DIRECTORY/models/ldsr + mkdir $DIRECTORY/models/ldsr/experiments + mkdir $DIRECTORY/models/ldsr + wget $LSDR_CONFIG -P $DIRECTORY/models/ldsr + mv $DIRECTORY/models/ldsr/index.html?dl=1 $DIRECTORY/models/ldsr/project.yaml + wget $LSDR_MODEL -P $DIRECTORY/models/ldsr + mv $DIRECTORY/models/ldsr/index.html?dl=1 $DIRECTORY/models/ldsr/model.ckpt + fi + + # Check to see if SD Concepts has been added yet, if not it will download it and place it in the proper directory + if [ -d "$DIRECTORY/models/custom/sd-concepts-library" ]; then + printf "SD Concepts Library already exists. Continuing...\n\n" + else + printf "Downloading and Extracting SD Concepts Library model. Please wait...\n" + mkdir $DIRECTORY/models/custom + wget $SD_CONCEPT_REPO + if ! command -v unzip &> /dev/null + then + printf "Warning: unzip could not be found. \nPlease install 'unzip' from your package manager and rerun this program.\n" + exit 1 + fi + unzip main.zip + mv sd-concepts-library-main/sd-concepts-library $DIRECTORY/models/custom + fi +} + +# Function to initialize the other functions +start_initialization () { + conda_env_setup + sd_model_loading + post_processor_model_loading + conda_env_activation + if [ ! -e "models/ldm/stable-diffusion-v1/model.ckpt" ]; then + echo "Your model file does not exist! Place it in 'models/ldm/stable-diffusion-v1' with the name 'model.ckpt'." + exit 1 + fi + printf "\nStarting Stable Horde Bridg: Please Wait...\n"; python scripts/relauncher.py --bridge -v "$@"; break; + +} + +start_initialization "$@" \ No newline at end of file diff --git a/images/gradio/gradio-s2i.png b/images/gradio/gradio-s2i.png new file mode 100644 index 000000000..84dbab0d8 Binary files /dev/null and b/images/gradio/gradio-s2i.png differ diff --git a/ldm/data/utils.py b/ldm/data/utils.py deleted file mode 100644 index 628894844..000000000 --- a/ldm/data/utils.py +++ /dev/null @@ -1,112 +0,0 @@ -import re -import json -import os - -import torch -import torch.distributed as dist - -import utils - -def pre_caption(caption,max_words=50): - caption = re.sub( - r"([.!\"()*#:;~])", - ' ', - caption.lower(), - ) - caption = re.sub( - r"\s{2,}", - ' ', - caption, - ) - caption = caption.rstrip('\n') - caption = caption.strip(' ') - - #truncate caption - caption_words = caption.split(' ') - if len(caption_words)>max_words: - caption = ' '.join(caption_words[:max_words]) - - return caption - -def pre_question(question,max_ques_words=50): - question = re.sub( - r"([.!\"()*#:;~])", - '', - question.lower(), - ) - question = question.rstrip(' ') - - #truncate question - question_words = question.split(' ') - if len(question_words)>max_ques_words: - question = ' '.join(question_words[:max_ques_words]) - - return question - - -def save_result(result, result_dir, filename, remove_duplicate=''): - result_file = os.path.join(result_dir, '%s_rank%d.json'%(filename,utils.get_rank())) - final_result_file = os.path.join(result_dir, '%s.json'%filename) - - json.dump(result,open(result_file,'w')) - - dist.barrier() - - if utils.is_main_process(): - # combine results from all processes - result = [] - - for rank in range(utils.get_world_size()): - result_file = os.path.join(result_dir, '%s_rank%d.json'%(filename,rank)) - res = json.load(open(result_file,'r')) - result += res - - if remove_duplicate: - result_new = [] - id_list = [] - for res in result: - if res[remove_duplicate] not in id_list: - id_list.append(res[remove_duplicate]) - result_new.append(res) - result = result_new - - json.dump(result,open(final_result_file,'w')) - print('result file saved to %s'%final_result_file) - - return final_result_file - - - -from pycocotools.coco import COCO -from pycocoevalcap.eval import COCOEvalCap -from torchvision.datasets.utils import download_url - -def coco_caption_eval(coco_gt_root, results_file, split): - urls = {'val':'https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_val_gt.json', - 'test':'https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_test_gt.json'} - filenames = {'val':'coco_karpathy_val_gt.json','test':'coco_karpathy_test_gt.json'} - - download_url(urls[split],coco_gt_root) - annotation_file = os.path.join(coco_gt_root,filenames[split]) - - # create coco object and coco_result object - coco = COCO(annotation_file) - coco_result = coco.loadRes(results_file) - - # create coco_eval object by taking coco and coco_result - coco_eval = COCOEvalCap(coco, coco_result) - - # evaluate on a subset of images by setting - # coco_eval.params['image_id'] = coco_result.getImgIds() - # please remove this line when evaluating the full validation set - # coco_eval.params['image_id'] = coco_result.getImgIds() - - # evaluate results - # SPICE will take a few minutes the first time, but speeds up due to caching - coco_eval.evaluate() - - # print output evaluation scores - for metric, score in coco_eval.eval.items(): - print(f'{metric}: {score:.3f}') - - return coco_eval \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 14ea78cbe..c4dc50ef3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,25 +1,11 @@ -# Core Stable Diffusion Dependencies - -# Minimum Environment Dependencies for Stable Diffusion -#torch # already satisfied as 1.12.1 from base image -#torchvision # already satisfied as 0.13.1 from base image -numpy==1.22.0 # already satisfied as 1.21.5 from base image - - -# Stable Diffusion (see: https://github.com/CompVis/stable-diffusion) -transformers==4.22.2 +transformers==4.19.2 # do not change diffusers==0.3.0 invisible-watermark==0.1.5 pytorch_lightning==1.7.7 - -# Additional Stable Diffusion Requirements -# TODO: Pin external dependency versions - -#opencv-python==4.6.0.66 # Opencv python already satisfied upstream -#opencv-python-headless==4.6.0.66 # Needed to operate opencv in headless/server mode - - +open-clip-torch +loguru taming-transformers-rom1504==0.0.6 # required by ldm +wget # See: https://github.com/CompVis/taming-transformers/issues/176 # -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers # required by ldm # Note: taming package needs to be installed with -e option @@ -54,8 +40,6 @@ hydralit_components==1.0.10 # Img2text ftfy==6.1.1 fairscale==0.4.4 -pycocotools==2.0.5 -pycocoevalcap==1.2 regex timm==0.6.7 tqdm==4.64.0 diff --git a/.env_docker.example b/runpod_entrypoint.sh similarity index 59% rename from .env_docker.example rename to runpod_entrypoint.sh index 37a6df1f7..943306aa0 100644 --- a/.env_docker.example +++ b/runpod_entrypoint.sh @@ -1,3 +1,4 @@ +#!/bin/bash # This file is part of stable-diffusion-webui (https://github.com/sd-webui/stable-diffusion-webui/). # Copyright 2022 sd-webui team. @@ -13,20 +14,31 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . - -# Validate the model files on every container restart -# (useful to set to false after you're sure the model files are already in place) -VALIDATE_MODELS=true - -# Automatically relaunch the webui on crashes -WEBUI_RELAUNCH=true - -# Which webui to launch -# WEBUI_SCRIPT=webui_streamlit.py -WEBUI_SCRIPT=webui.py - -# Pass cli arguments to webui.py e.g: -# WEBUI_ARGS=--optimized --extra-models-cpu --gpu=1 --esrgan-gpu=1 --gfpgan-gpu=1 -WEBUI_ARGS= - -STREAMLIT_SERVER_HEADLESS=true +# +# Starts the webserver inside the docker container +# + +# set -x + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +cd $SCRIPT_DIR +export PYTHONPATH=$SCRIPT_DIR + +if [[ $PUBLIC_KEY ]] +then + mkdir -p ~/.ssh + chmod 700 ~/.ssh + cd ~/.ssh + echo $PUBLIC_KEY >> authorized_keys + chmod 700 -R ~/.ssh + cd / + service ssh start + echo "SSH Service Started" +fi + +cd $SCRIPT_DIR +launch_command="streamlit run ${SCRIPT_DIR}/scripts/webui_streamlit.py" + +$launch_command + +sleep infinity diff --git a/scripts/ModelManager.py b/scripts/ModelManager.py index 493aecde2..8101ae79d 100644 --- a/scripts/ModelManager.py +++ b/scripts/ModelManager.py @@ -15,48 +15,80 @@ # along with this program. If not, see . # base webui import and utils. from sd_utils import * - # streamlit imports #other imports -import pandas as pd -from io import StringIO # Temp imports # end of imports #--------------------------------------------------------------------------------------------------------------- +def download_file(file_name, file_path, file_url): + if not os.path.exists(file_path): + os.makedirs(file_path) + + if not os.path.exists(os.path.join(file_path , file_name)): + print('Downloading ' + file_name + '...') + # TODO - add progress bar in streamlit + # download file with `requests`` + with requests.get(file_url, stream=True) as r: + r.raise_for_status() + with open(os.path.join(file_path, file_name), 'wb') as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + + else: + print(file_name + ' already exists.') + +def download_model(models, model_name): + """ Download all files from model_list[model_name] """ + for file in models[model_name]: + download_file(file['file_name'], file['file_path'], file['file_url']) + return + def layout(): #search = st.text_input(label="Search", placeholder="Type the name of the model you want to search for.", help="") - - csvString = f""" - ,Stable Diffusion v1.4 , ./models/ldm/stable-diffusion-v1 , https://huggingface.co/CompVis/stable-diffusion-v-1-4-original - ,GFPGAN v1.4 , ./models/gfpgan , https://github.com/TencentARC/GFPGAN/releases/download/v1.3.4/GFPGANv1.4.pth - ,RealESRGAN_x4plus , ./models/realesrgan , https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth - ,RealESRGAN_x4plus_anime_6B , ./models/realesrgan , https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth - ,Waifu Diffusion v1.2 , ./models/custom , https://huggingface.co/hakurei/waifu-diffusion - ,Waifu Diffusion v1.2 Pruned , ./models/custom , https://huggingface.co/crumb/pruned-waifu-diffusion - ,TrinArt Stable Diffusion v2 , ./models/custom , https://huggingface.co/naclbit/trinart_stable_diffusion_v2 - ,Stable Diffusion Concept Library , ./models/custom/sd-concepts-library , https://github.com/sd-webui/sd-concepts-library - ,Blip Model , ./models/blip , https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model*_base_caption.pth - """ + colms = st.columns((1, 3, 5, 5)) columns = ["№",'Model Name','Save Location','Download Link'] - - # Convert String into StringIO - csvStringIO = StringIO(csvString) - df = pd.read_csv(csvStringIO, sep=",", header=None, names=columns) + + models = st.session_state["defaults"].model_manager.models for col, field_name in zip(colms, columns): # table header col.write(field_name) - - for x, model_name in enumerate(df["Model Name"]): + + for x, model_name in enumerate(models): col1, col2, col3, col4 = st.columns((1, 3, 4, 6)) col1.write(x) # index - col2.write(df['Model Name'][x]) - col3.write(df['Save Location'][x]) - col4.write(df['Download Link'][x]) \ No newline at end of file + col2.write(models[model_name]['model_name']) + col3.write(models[model_name]['save_location']) + with col4: + files_exist = 0 + for file in models[model_name]['files']: + if "save_location" in models[model_name]['files'][file]: + os.path.exists(os.path.join(models[model_name]['files'][file]['save_location'] , models[model_name]['files'][file]['file_name'])) + files_exist += 1 + elif os.path.exists(os.path.join(models[model_name]['save_location'] , models[model_name]['files'][file]['file_name'])): + files_exist += 1 + files_needed = [] + for file in models[model_name]['files']: + if "save_location" in models[model_name]['files'][file]: + if not os.path.exists(os.path.join(models[model_name]['files'][file]['save_location'] , models[model_name]['files'][file]['file_name'])): + files_needed.append(file) + elif not os.path.exists(os.path.join(models[model_name]['save_location'] , models[model_name]['files'][file]['file_name'])): + files_needed.append(file) + if len(files_needed) > 0: + if st.button('Download', key=models[model_name]['model_name'], help='Download ' + models[model_name]['model_name']): + for file in files_needed: + if "save_location" in models[model_name]['files'][file]: + download_file(models[model_name]['files'][file]['file_name'], models[model_name]['files'][file]['save_location'], models[model_name]['files'][file]['download_link']) + else: + download_file(models[model_name]['files'][file]['file_name'], models[model_name]['save_location'], models[model_name]['files'][file]['download_link']) + else: + st.empty() + else: + st.write('✅') \ No newline at end of file diff --git a/scripts/Settings.py b/scripts/Settings.py index 4508bb15d..4aa9f0c22 100644 --- a/scripts/Settings.py +++ b/scripts/Settings.py @@ -12,7 +12,7 @@ # GNU Affero General Public License for more details. # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . +# along with this program. If not, see . # base webui import and utils. from sd_utils import * @@ -28,38 +28,38 @@ # end of imports #--------------------------------------------------------------------------------------------------------------- -def layout(): +def layout(): st.header("Settings") - + with st.form("Settings"): general_tab, txt2img_tab, img2img_tab, img2txt_tab, txt2vid_tab, image_processing, textual_inversion_tab, concepts_library_tab = st.tabs( ['General', "Text-To-Image", "Image-To-Image", "Image-To-Text", "Text-To-Video", "Image processing", "Textual Inversion", "Concepts Library"]) - + with general_tab: col1, col2, col3, col4, col5 = st.columns(5, gap='large') - + device_list = [] device_properties = [(i, torch.cuda.get_device_properties(i)) for i in range(torch.cuda.device_count())] for device in device_properties: id = device[0] name = device[1].name total_memory = device[1].total_memory - + device_list.append(f"{id}: {name} ({human_readable_size(total_memory, decimal_places=0)})") - - + + with col1: - st.title("General") + st.title("General") st.session_state['defaults'].general.gpu = int(st.selectbox("GPU", device_list, help=f"Select which GPU to use. Default: {device_list[0]}").split(":")[0]) - + st.session_state['defaults'].general.outdir = str(st.text_input("Output directory", value=st.session_state['defaults'].general.outdir, help="Relative directory on which the output images after a generation will be placed. Default: 'outputs'")) - - # If we have custom models available on the "models/custom" + + # If we have custom models available on the "models/custom" # folder then we show a menu to select which model we want to use, otherwise we use the main model for SD custom_models_available() - + if server_state["CustomModel_available"]: st.session_state.default_model = st.selectbox("Default Model:", server_state["custom_models"], index=server_state["custom_models"].index(st.session_state['defaults'].general.default_model), @@ -75,37 +75,38 @@ def layout(): The model name that will be shown here is the same as the name\ the file for the model has on said folder, it is recommended to give the .ckpt file a name that \ will make it easier for you to distinguish it from other models. Default: Stable Diffusion v1.4") - + st.session_state['defaults'].general.default_model_config = st.text_input("Default Model Config", value=st.session_state['defaults'].general.default_model_config, help="Default model config file for inference. Default: 'configs/stable-diffusion/v1-inference.yaml'") - + st.session_state['defaults'].general.default_model_path = st.text_input("Default Model Config", value=st.session_state['defaults'].general.default_model_path, - help="Default model path. Default: 'models/ldm/stable-diffusion-v1/model.ckpt'") - + help="Default model path. Default: 'models/ldm/stable-diffusion-v1/model.ckpt'") + st.session_state['defaults'].general.GFPGAN_dir = st.text_input("Default GFPGAN directory", value=st.session_state['defaults'].general.GFPGAN_dir, - help="Default GFPGAN directory. Default: './src/gfpgan'") - + help="Default GFPGAN directory. Default: './models/gfpgan'") + st.session_state['defaults'].general.RealESRGAN_dir = st.text_input("Default RealESRGAN directory", value=st.session_state['defaults'].general.RealESRGAN_dir, - help="Default GFPGAN directory. Default: './src/realesrgan'") - + help="Default GFPGAN directory. Default: './models/realesrgan'") + RealESRGAN_model_list = ["RealESRGAN_x4plus", "RealESRGAN_x4plus_anime_6B"] st.session_state['defaults'].general.RealESRGAN_model = st.selectbox("RealESRGAN model", RealESRGAN_model_list, index=RealESRGAN_model_list.index(st.session_state['defaults'].general.RealESRGAN_model), help="Default RealESRGAN model. Default: 'RealESRGAN_x4plus'") - + Upscaler_list = ["RealESRGAN", "LDSR"] + st.session_state['defaults'].general.upscaling_method = st.selectbox("Upscaler", Upscaler_list, index=Upscaler_list.index(st.session_state['defaults'].general.upscaling_method), help="Default upscaling method. Default: 'RealESRGAN'") with col2: - st.title("Performance") - + st.title("Performance") + st.session_state["defaults"].general.gfpgan_cpu = st.checkbox("GFPGAN - CPU", value=st.session_state['defaults'].general.gfpgan_cpu, help="Run GFPGAN on the cpu. Default: False") - + st.session_state["defaults"].general.esrgan_cpu = st.checkbox("ESRGAN - CPU", value=st.session_state['defaults'].general.esrgan_cpu, help="Run ESRGAN on the cpu. Default: False") - + st.session_state["defaults"].general.extra_models_cpu = st.checkbox("Extra Models - CPU", value=st.session_state['defaults'].general.extra_models_cpu, help="Run extra models (GFGPAN/ESRGAN) on cpu. Default: False") - + st.session_state["defaults"].general.extra_models_gpu = st.checkbox("Extra Models - GPU", value=st.session_state['defaults'].general.extra_models_gpu, help="Run extra models (GFGPAN/ESRGAN) on gpu. \ Check and save in order to be able to select the GPU that each model will use. Default: False") @@ -113,106 +114,106 @@ def layout(): st.session_state['defaults'].general.gfpgan_gpu = int(st.selectbox("GFGPAN GPU", device_list, index=st.session_state['defaults'].general.gfpgan_gpu, help=f"Select which GPU to use. Default: {device_list[st.session_state['defaults'].general.gfpgan_gpu]}", key="gfpgan_gpu").split(":")[0]) - + st.session_state["defaults"].general.esrgan_gpu = int(st.selectbox("ESRGAN - GPU", device_list, index=st.session_state['defaults'].general.esrgan_gpu, help=f"Select which GPU to use. Default: {device_list[st.session_state['defaults'].general.esrgan_gpu]}", key="esrgan_gpu").split(":")[0]) - + st.session_state["defaults"].general.no_half = st.checkbox("No Half", value=st.session_state['defaults'].general.no_half, help="DO NOT switch the model to 16-bit floats. Default: False") - + st.session_state["defaults"].general.use_float16 = st.checkbox("Use float16", value=st.session_state['defaults'].general.use_float16, help="Switch the model to 16-bit floats. Default: False") - + precision_list = ['full','autocast'] st.session_state["defaults"].general.precision = st.selectbox("Precision", precision_list, index=precision_list.index(st.session_state['defaults'].general.precision), help="Evaluates at this precision. Default: autocast") - + st.session_state["defaults"].general.optimized = st.checkbox("Optimized Mode", value=st.session_state['defaults'].general.optimized, help="Loads the model onto the device piecemeal instead of all at once to reduce VRAM usage\ at the cost of performance. Default: False") - + st.session_state["defaults"].general.optimized_turbo = st.checkbox("Optimized Turbo Mode", value=st.session_state['defaults'].general.optimized_turbo, help="Alternative optimization mode that does not save as much VRAM but \ runs siginificantly faster. Default: False") - + st.session_state["defaults"].general.optimized_config = st.text_input("Optimized Config", value=st.session_state['defaults'].general.optimized_config, help=f"Loads alternative optimized configuration for inference. \ Default: optimizedSD/v1-inference.yaml") - + st.session_state["defaults"].general.enable_attention_slicing = st.checkbox("Enable Attention Slicing", value=st.session_state['defaults'].general.enable_attention_slicing, help="Enable sliced attention computation. When this option is enabled, the attention module will \ split the input tensor in slices, to compute attention in several steps. This is useful to save some \ memory in exchange for a small speed decrease. Only works the txt2vid tab right now. Default: False") - + st.session_state["defaults"].general.enable_minimal_memory_usage = st.checkbox("Enable Minimal Memory Usage", value=st.session_state['defaults'].general.enable_minimal_memory_usage, help="Moves only unet to fp16 and to CUDA, while keepping lighter models on CPUs \ (Not properly implemented and currently not working, check this \ - link 'https://github.com/huggingface/diffusers/pull/537' for more information on it ). Default: False") - - st.session_state["defaults"].general.update_preview = st.checkbox("Update Preview Image", value=st.session_state['defaults'].general.update_preview, - help="Enables the preview image to be updated and shown to the user on the UI during the generation.\ - If checked, once you save the settings an option to specify the frequency at which the image is updated\ - in steps will be shown, this is helpful to reduce the negative effect this option has on performance. \ - Default: True") - if st.session_state["defaults"].general.update_preview: - st.session_state["defaults"].general.update_preview_frequency = int(st.text_input("Update Preview Frequency", value=st.session_state['defaults'].general.update_preview_frequency, - help="Specify the frequency at which the image is updated in steps, this is helpful to reduce the \ - negative effect updating the preview image has on performance. Default: 10")) - + link 'https://github.com/huggingface/diffusers/pull/537' for more information on it ). Default: False") + + #st.session_state["defaults"].general.update_preview = st.checkbox("Update Preview Image", value=st.session_state['defaults'].general.update_preview, + #help="Enables the preview image to be updated and shown to the user on the UI during the generation.\ + #If checked, once you save the settings an option to specify the frequency at which the image is updated\ + #in steps will be shown, this is helpful to reduce the negative effect this option has on performance. \ + #Default: True") + st.session_state["defaults"].general.update_preview = True + st.session_state["defaults"].general.update_preview_frequency = st.number_input("Update Preview Frequency", value=st.session_state['defaults'].general.update_preview_frequency, + help="Specify the frequency at which the image is updated in steps, this is helpful to reduce the \ + negative effect updating the preview image has on performance. Default: 10") + with col3: st.title("Others") st.session_state["defaults"].general.use_sd_concepts_library = st.checkbox("Use the Concepts Library", value=st.session_state['defaults'].general.use_sd_concepts_library, help="Use the embeds Concepts Library, if checked, once the settings are saved an option will\ - appear to specify the directory where the concepts are stored. Default: True)") - + appear to specify the directory where the concepts are stored. Default: True)") + if st.session_state["defaults"].general.use_sd_concepts_library: st.session_state['defaults'].general.sd_concepts_library_folder = st.text_input("Concepts Library Folder", value=st.session_state['defaults'].general.sd_concepts_library_folder, help="Relative folder on which the concepts library embeds are stored. \ - Default: 'models/custom/sd-concepts-library'") - + Default: 'models/custom/sd-concepts-library'") + st.session_state['defaults'].general.LDSR_dir = st.text_input("LDSR Folder", value=st.session_state['defaults'].general.LDSR_dir, - help="Folder where LDSR is located. Default: './src/latent-diffusion'") - + help="Folder where LDSR is located. Default: './models/ldsr'") + st.session_state["defaults"].general.save_metadata = st.checkbox("Save Metadata", value=st.session_state['defaults'].general.save_metadata, help="Save metadata on the output image. Default: True") save_format_list = ["png"] st.session_state["defaults"].general.save_format = st.selectbox("Save Format",save_format_list, index=save_format_list.index(st.session_state['defaults'].general.save_format), help="Format that will be used whens saving the output images. Default: 'png'") - + st.session_state["defaults"].general.skip_grid = st.checkbox("Skip Grid", value=st.session_state['defaults'].general.skip_grid, help="Skip saving the grid output image. Default: False") if not st.session_state["defaults"].general.skip_grid: st.session_state["defaults"].general.grid_format = st.text_input("Grid Format", value=st.session_state['defaults'].general.grid_format, help="Format for saving the grid output image. Default: 'jpg:95'") - + st.session_state["defaults"].general.skip_save = st.checkbox("Skip Save", value=st.session_state['defaults'].general.skip_save, help="Skip saving the output image. Default: False") - - st.session_state["defaults"].general.n_rows = int(st.text_input("Number of Grid Rows", value=st.session_state['defaults'].general.n_rows, - help="Number of rows the grid wil have when saving the grid output image. Default: '-1'")) - + + st.session_state["defaults"].general.n_rows = st.number_input("Number of Grid Rows", value=st.session_state['defaults'].general.n_rows, + help="Number of rows the grid wil have when saving the grid output image. Default: '-1'") + st.session_state["defaults"].general.no_verify_input = st.checkbox("Do not Verify Input", value=st.session_state['defaults'].general.no_verify_input, help="Do not verify input to check if it's too long. Default: False") - + st.session_state["defaults"].daisi_app.running_on_daisi_io = st.checkbox("Running on Daisi.io?", value=st.session_state['defaults'].daisi_app.running_on_daisi_io, help="Specify if we are running on app.Daisi.io . Default: False") - - - + + + with col4: st.title("Streamlit Config") - + st.session_state["defaults"].general.streamlit_telemetry = st.checkbox("Enable Telemetry", value=st.session_state['defaults'].general.streamlit_telemetry, help="Enables or Disables streamlit telemetry. Default: False") st.session_state["streamlit_config"]["browser"]["gatherUsageStats"] = st.session_state["defaults"].general.streamlit_telemetry - + default_theme_list = ["light", "dark"] st.session_state["defaults"].general.default_theme = st.selectbox("Default Theme", default_theme_list, index=default_theme_list.index(st.session_state['defaults'].general.default_theme), help="Defaut theme to use as base for streamlit. Default: dark") - st.session_state["streamlit_config"]["theme"]["base"] = st.session_state["defaults"].general.default_theme - + st.session_state["streamlit_config"]["theme"]["base"] = st.session_state["defaults"].general.default_theme + with col5: st.title("Huggingface") st.session_state["defaults"].general.huggingface_token = st.text_input("Huggingface Token", value=st.session_state['defaults'].general.huggingface_token, type="password", @@ -220,652 +221,562 @@ def layout(): is used on the Text To Video tab. This token will be saved to your user config file\ and WILL NOT be share with us or anyone. You can get your access token \ at https://huggingface.co/settings/tokens. Default: None") - + with txt2img_tab: - col1, col2, col3, col4, col5 = st.columns(5, gap='large') + col1, col2, col3, col4, col5 = st.columns(5, gap='medium') with col1: st.title("Slider Parameters") - + # Width - st.session_state["defaults"].txt2img.width.value = int(st.text_input("Default Image Width", value=st.session_state['defaults'].txt2img.width.value, - help="Set the default width for the generated image. Default is: 512")) - - st.session_state["defaults"].txt2img.width.min_value = int(st.text_input("Minimum Image Width", value=st.session_state['defaults'].txt2img.width.min_value, - help="Set the default minimum value for the width slider. Default is: 64")) - - st.session_state["defaults"].txt2img.width.max_value = int(st.text_input("Maximum Image Width", value=st.session_state['defaults'].txt2img.width.max_value, - help="Set the default maximum value for the width slider. Default is: 2048")) - + st.session_state["defaults"].txt2img.width.value = st.number_input("Default Image Width", value=st.session_state['defaults'].txt2img.width.value, + help="Set the default width for the generated image. Default is: 512") + + st.session_state["defaults"].txt2img.width.min_value = st.number_input("Minimum Image Width", value=st.session_state['defaults'].txt2img.width.min_value, + help="Set the default minimum value for the width slider. Default is: 64") + + st.session_state["defaults"].txt2img.width.max_value = st.number_input("Maximum Image Width", value=st.session_state['defaults'].txt2img.width.max_value, + help="Set the default maximum value for the width slider. Default is: 2048") + # Height - st.session_state["defaults"].txt2img.height.value = int(st.text_input("Default Image Height", value=st.session_state['defaults'].txt2img.height.value, - help="Set the default height for the generated image. Default is: 512")) - - st.session_state["defaults"].txt2img.height.min_value = int(st.text_input("Minimum Image Height", value=st.session_state['defaults'].txt2img.height.min_value, - help="Set the default minimum value for the height slider. Default is: 64")) - - st.session_state["defaults"].txt2img.height.max_value = int(st.text_input("Maximum Image Height", value=st.session_state['defaults'].txt2img.height.max_value, - help="Set the default maximum value for the height slider. Default is: 2048")) - + st.session_state["defaults"].txt2img.height.value = st.number_input("Default Image Height", value=st.session_state['defaults'].txt2img.height.value, + help="Set the default height for the generated image. Default is: 512") + + st.session_state["defaults"].txt2img.height.min_value = st.number_input("Minimum Image Height", value=st.session_state['defaults'].txt2img.height.min_value, + help="Set the default minimum value for the height slider. Default is: 64") + + st.session_state["defaults"].txt2img.height.max_value = st.number_input("Maximum Image Height", value=st.session_state['defaults'].txt2img.height.max_value, + help="Set the default maximum value for the height slider. Default is: 2048") + with col2: # CFG - st.session_state["defaults"].txt2img.cfg_scale.value = float(st.text_input("Default CFG Scale", value=st.session_state['defaults'].txt2img.cfg_scale.value, - help="Set the default value for the CFG Scale. Default is: 7.5")) - - st.session_state["defaults"].txt2img.cfg_scale.min_value = float(st.text_input("Minimum CFG Scale Value", value=st.session_state['defaults'].txt2img.cfg_scale.min_value, - help="Set the default minimum value for the CFG scale slider. Default is: 1")) - - st.session_state["defaults"].txt2img.cfg_scale.max_value = float(st.text_input("Maximum CFG Scale Value", + st.session_state["defaults"].txt2img.cfg_scale.value = st.number_input("Default CFG Scale", value=st.session_state['defaults'].txt2img.cfg_scale.value, + help="Set the default value for the CFG Scale. Default is: 7.5") + + st.session_state["defaults"].txt2img.cfg_scale.min_value = st.number_input("Minimum CFG Scale Value", value=st.session_state['defaults'].txt2img.cfg_scale.min_value, + help="Set the default minimum value for the CFG scale slider. Default is: 1") + + st.session_state["defaults"].txt2img.cfg_scale.max_value = st.number_input("Maximum CFG Scale Value", value=st.session_state['defaults'].txt2img.cfg_scale.max_value, - help="Set the default maximum value for the CFG scale slider. Default is: 30")) - - st.session_state["defaults"].txt2img.cfg_scale.step = float(st.text_input("CFG Slider Steps", value=st.session_state['defaults'].txt2img.cfg_scale.step, - help="Set the default value for the number of steps on the CFG scale slider. Default is: 0.5")) - + help="Set the default maximum value for the CFG scale slider. Default is: 30") + + st.session_state["defaults"].txt2img.cfg_scale.step = st.number_input("CFG Slider Steps", value=st.session_state['defaults'].txt2img.cfg_scale.step, + help="Set the default value for the number of steps on the CFG scale slider. Default is: 0.5") # Sampling Steps - st.session_state["defaults"].txt2img.sampling_steps.value = int(st.text_input("Default Sampling Steps", value=st.session_state['defaults'].txt2img.sampling_steps.value, - help="Set the default number of sampling steps to use. Default is: 30 (with k_euler)")) - - st.session_state["defaults"].txt2img.sampling_steps.min_value = int(st.text_input("Minimum Sampling Steps", - value=st.session_state['defaults'].txt2img.sampling_steps.min_value, - help="Set the default minimum value for the sampling steps slider. Default is: 1")) - - st.session_state["defaults"].txt2img.sampling_steps.max_value = int(st.text_input("Maximum Sampling Steps", - value=st.session_state['defaults'].txt2img.sampling_steps.max_value, - help="Set the default maximum value for the sampling steps slider. Default is: 250")) - - st.session_state["defaults"].txt2img.sampling_steps.step = int(st.text_input("Sampling Slider Steps", + st.session_state["defaults"].txt2img.sampling_steps.value = st.number_input("Default Sampling Steps", value=st.session_state['defaults'].txt2img.sampling_steps.value, + help="Set the default number of sampling steps to use. Default is: 30 (with k_euler)") + + st.session_state["defaults"].txt2img.sampling_steps.min_value = st.number_input("Minimum Sampling Steps", + value=st.session_state['defaults'].txt2img.sampling_steps.min_value, + help="Set the default minimum value for the sampling steps slider. Default is: 1") + + st.session_state["defaults"].txt2img.sampling_steps.step = st.text_input("Sampling Slider Steps", value=st.session_state['defaults'].txt2img.sampling_steps.step, - help="Set the default value for the number of steps on the sampling steps slider. Default is: 10")) - - with col3: - # Batch Count - st.session_state["defaults"].txt2img.batch_count.value = int(st.text_input("Default Batch Count", - value=st.session_state['defaults'].txt2img.batch_count.value, - help="Set the default batch count to use. Default is: 1")) - - st.session_state["defaults"].txt2img.batch_count.min_value = int(st.text_input("Minimum Batch Count", - value=st.session_state['defaults'].txt2img.batch_count.min_value, - help="Set the default minimum value for the batch count slider. Default is: 1")) - - st.session_state["defaults"].txt2img.batch_count.max_value = int(st.text_input("Maximum Batch Count", - value=st.session_state['defaults'].txt2img.batch_count.max_value, - help="Set the default maximum value for the batch count slider. Default is: 100")) - - st.session_state["defaults"].txt2img.batch_count.step = int(st.text_input("Batch Count Slider Steps", - value=st.session_state['defaults'].txt2img.batch_count.step, - help="Set the default value for the number of steps on the batch count slider. Default is: 10")) - - # Batch Size - st.session_state["defaults"].txt2img.batch_size.value = int(st.text_input("Default Batch Size", - value=st.session_state['defaults'].txt2img.batch_size.value, - help="Set the default batch size to use. Default is: 1")) - - st.session_state["defaults"].txt2img.batch_size.min_value = int(st.text_input("Minimum Batch Size", - value=st.session_state['defaults'].txt2img.batch_size.min_value, - help="Set the default minimum value for the batch size slider. Default is: 1")) - - st.session_state["defaults"].txt2img.batch_size.max_value = int(st.text_input("Maximum Batch Size", - value=st.session_state['defaults'].txt2img.batch_size.max_value, - help="Set the default maximum value for the batch size slider. Default is: 5")) - - st.session_state["defaults"].txt2img.batch_size.step = int(st.text_input("Batch Size Slider Steps", - value=st.session_state['defaults'].txt2img.batch_size.step, - help="Set the default value for the number of steps on the batch size slider. Default is: 1")) - - with col4: + help="Set the default value for the number of steps on the sampling steps slider. Default is: 10") + + with col3: st.title("General Parameters") - + + # Batch Count + st.session_state["defaults"].txt2img.batch_count.value = st.number_input("Batch count", value=st.session_state['defaults'].txt2img.batch_count.value, + help="How many iterations or batches of images to generate in total.") + + st.session_state["defaults"].txt2img.batch_size.value = st.number_input("Batch size", value=st.session_state.defaults.txt2img.batch_size.value, + help="How many images are at once in a batch.\ + It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it \ + takes to finish generation as more images are generated at once.\ + Default: 1") + default_sampler_list = ["k_lms", "k_euler", "k_euler_a", "k_dpm_2", "k_dpm_2_a", "k_heun", "PLMS", "DDIM"] st.session_state["defaults"].txt2img.default_sampler = st.selectbox("Default Sampler", default_sampler_list, index=default_sampler_list.index(st.session_state['defaults'].txt2img.default_sampler), help="Defaut sampler to use for txt2img. Default: k_euler") - + st.session_state['defaults'].txt2img.seed = st.text_input("Default Seed", value=st.session_state['defaults'].txt2img.seed, help="Default seed.") - + + with col4: + st.session_state["defaults"].txt2img.separate_prompts = st.checkbox("Separate Prompts", value=st.session_state['defaults'].txt2img.separate_prompts, help="Separate Prompts. Default: False") - - st.session_state["defaults"].txt2img.normalize_prompt_weights = st.checkbox("Normalize Prompt Weights", + + st.session_state["defaults"].txt2img.normalize_prompt_weights = st.checkbox("Normalize Prompt Weights", value=st.session_state['defaults'].txt2img.normalize_prompt_weights, help="Choose to normalize prompt weights. Default: True") - + st.session_state["defaults"].txt2img.save_individual_images = st.checkbox("Save Individual Images", - value=st.session_state['defaults'].txt2img.save_individual_images, + value=st.session_state['defaults'].txt2img.save_individual_images, help="Choose to save individual images. Default: True") - - st.session_state["defaults"].txt2img.save_grid = st.checkbox("Save Grid Images", value=st.session_state['defaults'].txt2img.save_grid, + + st.session_state["defaults"].txt2img.save_grid = st.checkbox("Save Grid Images", value=st.session_state['defaults'].txt2img.save_grid, help="Choose to save the grid images. Default: True") st.session_state["defaults"].txt2img.group_by_prompt = st.checkbox("Group By Prompt", value=st.session_state['defaults'].txt2img.group_by_prompt, help="Choose to save images grouped by their prompt. Default: False") - - st.session_state["defaults"].txt2img.save_as_jpg = st.checkbox("Save As JPG", value=st.session_state['defaults'].txt2img.save_as_jpg, + + st.session_state["defaults"].txt2img.save_as_jpg = st.checkbox("Save As JPG", value=st.session_state['defaults'].txt2img.save_as_jpg, help="Choose to save images as jpegs. Default: False") - + st.session_state["defaults"].txt2img.write_info_files = st.checkbox("Write Info Files For Images", value=st.session_state['defaults'].txt2img.write_info_files, help="Choose to write the info files along with the generated images. Default: True") st.session_state["defaults"].txt2img.use_GFPGAN = st.checkbox("Use GFPGAN", value=st.session_state['defaults'].txt2img.use_GFPGAN, help="Choose to use GFPGAN. Default: False") - - st.session_state["defaults"].txt2img.use_RealESRGAN = st.checkbox("Use RealESRGAN", value=st.session_state['defaults'].txt2img.use_RealESRGAN, - help="Choose to use RealESRGAN. Default: False") - - st.session_state["defaults"].txt2img.update_preview = st.checkbox("Update Preview Image", value=st.session_state['defaults'].txt2img.update_preview, - help="Choose to update the preview image during generation. Default: True") - - st.session_state["defaults"].txt2img.update_preview_frequency = int(st.text_input("Preview Image Update Frequency", + + st.session_state["defaults"].txt2img.use_upscaling = st.checkbox("Use Upscaling", value=st.session_state['defaults'].txt2img.use_upscaling, + help="Choose to turn on upscaling by default. Default: False") + + st.session_state["defaults"].txt2img.update_preview = True + st.session_state["defaults"].txt2img.update_preview_frequency = st.number_input("Preview Image Update Frequency", value=st.session_state['defaults'].txt2img.update_preview_frequency, - help="Set the default value for the frrquency of the preview image updates. Default is: 10")) + help="Set the default value for the frrquency of the preview image updates. Default is: 10") with col5: st.title("Variation Parameters") - - st.session_state["defaults"].txt2img.variant_amount.value = float(st.text_input("Default Variation Amount", + + st.session_state["defaults"].txt2img.variant_amount.value = st.number_input("Default Variation Amount", value=st.session_state['defaults'].txt2img.variant_amount.value, - help="Set the default variation to use. Default is: 0.0")) - - st.session_state["defaults"].txt2img.variant_amount.min_value = float(st.text_input("Minimum Variation Amount", + help="Set the default variation to use. Default is: 0.0") + + st.session_state["defaults"].txt2img.variant_amount.min_value = st.number_input("Minimum Variation Amount", value=st.session_state['defaults'].txt2img.variant_amount.min_value, - help="Set the default minimum value for the variation slider. Default is: 0.0")) - - st.session_state["defaults"].txt2img.variant_amount.max_value = float(st.text_input("Maximum Variation Amount", + help="Set the default minimum value for the variation slider. Default is: 0.0") + + st.session_state["defaults"].txt2img.variant_amount.max_value = st.number_input("Maximum Variation Amount", value=st.session_state['defaults'].txt2img.variant_amount.max_value, - help="Set the default maximum value for the variation slider. Default is: 1.0")) - - st.session_state["defaults"].txt2img.variant_amount.step = float(st.text_input("Variation Slider Steps", + help="Set the default maximum value for the variation slider. Default is: 1.0") + + st.session_state["defaults"].txt2img.variant_amount.step = st.number_input("Variation Slider Steps", value=st.session_state['defaults'].txt2img.variant_amount.step, - help="Set the default value for the number of steps on the variation slider. Default is: 1")) - + help="Set the default value for the number of steps on the variation slider. Default is: 1") + st.session_state['defaults'].txt2img.variant_seed = st.text_input("Default Variation Seed", value=st.session_state['defaults'].txt2img.variant_seed, help="Default variation seed.") - + with img2img_tab: col1, col2, col3, col4, col5 = st.columns(5, gap='medium') - + with col1: st.title("Image Editing") - + # Denoising - st.session_state["defaults"].img2img.denoising_strength.value = float(st.text_input("Default Denoising Amount", - value=st.session_state['defaults'].img2img.denoising_strength.value, - help="Set the default denoising to use. Default is: 0.75")) - - st.session_state["defaults"].img2img.denoising_strength.min_value = float(st.text_input("Minimum Denoising Amount", - value=st.session_state['defaults'].img2img.denoising_strength.min_value, - help="Set the default minimum value for the denoising slider. Default is: 0.0")) - - st.session_state["defaults"].img2img.denoising_strength.max_value = float(st.text_input("Maximum Denoising Amount", + st.session_state["defaults"].img2img.denoising_strength.value = st.number_input("Default Denoising Amount", + value=st.session_state['defaults'].img2img.denoising_strength.value, + help="Set the default denoising to use. Default is: 0.75") + + st.session_state["defaults"].img2img.denoising_strength.min_value = st.number_input("Minimum Denoising Amount", + value=st.session_state['defaults'].img2img.denoising_strength.min_value, + help="Set the default minimum value for the denoising slider. Default is: 0.0") + + st.session_state["defaults"].img2img.denoising_strength.max_value = st.number_input("Maximum Denoising Amount", value=st.session_state['defaults'].img2img.denoising_strength.max_value, - help="Set the default maximum value for the denoising slider. Default is: 1.0")) - - st.session_state["defaults"].img2img.denoising_strength.step = float(st.text_input("Denoising Slider Steps", - value=st.session_state['defaults'].img2img.denoising_strength.step, - help="Set the default value for the number of steps on the denoising slider. Default is: 0.01")) - + help="Set the default maximum value for the denoising slider. Default is: 1.0") + + st.session_state["defaults"].img2img.denoising_strength.step = st.number_input("Denoising Slider Steps", + value=st.session_state['defaults'].img2img.denoising_strength.step, + help="Set the default value for the number of steps on the denoising slider. Default is: 0.01") + # Masking - st.session_state["defaults"].img2img.mask_mode = int(st.text_input("Default Mask Mode", value=st.session_state['defaults'].img2img.mask_mode, - help="Set the default mask mode to use. 0 = Keep Masked Area, 1 = Regenerate Masked Area. Default is: 0")) - - st.session_state["defaults"].img2img.mask_restore = st.checkbox("Default Mask Restore", value=st.session_state['defaults'].img2img.mask_restore, + st.session_state["defaults"].img2img.mask_mode = st.number_input("Default Mask Mode", value=st.session_state['defaults'].img2img.mask_mode, + help="Set the default mask mode to use. 0 = Keep Masked Area, 1 = Regenerate Masked Area. Default is: 0") + + st.session_state["defaults"].img2img.mask_restore = st.checkbox("Default Mask Restore", value=st.session_state['defaults'].img2img.mask_restore, help="Mask Restore. Default: False") - - st.session_state["defaults"].img2img.resize_mode = int(st.text_input("Default Resize Mode", value=st.session_state['defaults'].img2img.resize_mode, - help="Set the default resizing mode. 0 = Just Resize, 1 = Crop and Resize, 3 = Resize and Fill. Default is: 0")) + + st.session_state["defaults"].img2img.resize_mode = st.number_input("Default Resize Mode", value=st.session_state['defaults'].img2img.resize_mode, + help="Set the default resizing mode. 0 = Just Resize, 1 = Crop and Resize, 3 = Resize and Fill. Default is: 0") with col2: st.title("Slider Parameters") - + # Width - st.session_state["defaults"].img2img.width.value = int(st.text_input("Default Outputted Image Width", value=st.session_state['defaults'].img2img.width.value, - help="Set the default width for the generated image. Default is: 512")) - - st.session_state["defaults"].img2img.width.min_value = int(st.text_input("Minimum Outputted Image Width", value=st.session_state['defaults'].img2img.width.min_value, - help="Set the default minimum value for the width slider. Default is: 64")) - - st.session_state["defaults"].img2img.width.max_value = int(st.text_input("Maximum Outputted Image Width", value=st.session_state['defaults'].img2img.width.max_value, - help="Set the default maximum value for the width slider. Default is: 2048")) - + st.session_state["defaults"].img2img.width.value = st.number_input("Default Outputted Image Width", value=st.session_state['defaults'].img2img.width.value, + help="Set the default width for the generated image. Default is: 512") + + st.session_state["defaults"].img2img.width.min_value = st.number_input("Minimum Outputted Image Width", value=st.session_state['defaults'].img2img.width.min_value, + help="Set the default minimum value for the width slider. Default is: 64") + + st.session_state["defaults"].img2img.width.max_value = st.number_input("Maximum Outputted Image Width", value=st.session_state['defaults'].img2img.width.max_value, + help="Set the default maximum value for the width slider. Default is: 2048") + # Height - st.session_state["defaults"].img2img.height.value = int(st.text_input("Default Outputted Image Height", value=st.session_state['defaults'].img2img.height.value, - help="Set the default height for the generated image. Default is: 512")) - - st.session_state["defaults"].img2img.height.min_value = int(st.text_input("Minimum Outputted Image Height", value=st.session_state['defaults'].img2img.height.min_value, - help="Set the default minimum value for the height slider. Default is: 64")) - - st.session_state["defaults"].img2img.height.max_value = int(st.text_input("Maximum Outputted Image Height", value=st.session_state['defaults'].img2img.height.max_value, - help="Set the default maximum value for the height slider. Default is: 2048")) - + st.session_state["defaults"].img2img.height.value = st.number_input("Default Outputted Image Height", value=st.session_state['defaults'].img2img.height.value, + help="Set the default height for the generated image. Default is: 512") + + st.session_state["defaults"].img2img.height.min_value = st.number_input("Minimum Outputted Image Height", value=st.session_state['defaults'].img2img.height.min_value, + help="Set the default minimum value for the height slider. Default is: 64") + + st.session_state["defaults"].img2img.height.max_value = st.number_input("Maximum Outputted Image Height", value=st.session_state['defaults'].img2img.height.max_value, + help="Set the default maximum value for the height slider. Default is: 2048") + # CFG - st.session_state["defaults"].img2img.cfg_scale.value = float(st.text_input("Default Img2Img CFG Scale", value=st.session_state['defaults'].img2img.cfg_scale.value, - help="Set the default value for the CFG Scale. Default is: 7.5")) - - st.session_state["defaults"].img2img.cfg_scale.min_value = float(st.text_input("Minimum Img2Img CFG Scale Value", + st.session_state["defaults"].img2img.cfg_scale.value = st.number_input("Default Img2Img CFG Scale", value=st.session_state['defaults'].img2img.cfg_scale.value, + help="Set the default value for the CFG Scale. Default is: 7.5") + + st.session_state["defaults"].img2img.cfg_scale.min_value = st.number_input("Minimum Img2Img CFG Scale Value", value=st.session_state['defaults'].img2img.cfg_scale.min_value, - help="Set the default minimum value for the CFG scale slider. Default is: 1")) - - st.session_state["defaults"].img2img.cfg_scale.max_value = float(st.text_input("Maximum Img2Img CFG Scale Value", - value=st.session_state['defaults'].img2img.cfg_scale.max_value, - help="Set the default maximum value for the CFG scale slider. Default is: 30")) - + help="Set the default minimum value for the CFG scale slider. Default is: 1") + + st.session_state["defaults"].img2img.cfg_scale.max_value = st.number_input("Maximum Img2Img CFG Scale Value", + value=st.session_state['defaults'].img2img.cfg_scale.max_value, + help="Set the default maximum value for the CFG scale slider. Default is: 30") + with col3: - st.session_state["defaults"].img2img.cfg_scale.step = float(st.text_input("Img2Img CFG Slider Steps", - value=st.session_state['defaults'].img2img.cfg_scale.step, - help="Set the default value for the number of steps on the CFG scale slider. Default is: 0.5")) - + st.session_state["defaults"].img2img.cfg_scale.step = st.number_input("Img2Img CFG Slider Steps", + value=st.session_state['defaults'].img2img.cfg_scale.step, + help="Set the default value for the number of steps on the CFG scale slider. Default is: 0.5") + # Sampling Steps - st.session_state["defaults"].img2img.sampling_steps.value = int(st.text_input("Default Img2Img Sampling Steps", + st.session_state["defaults"].img2img.sampling_steps.value = st.number_input("Default Img2Img Sampling Steps", value=st.session_state['defaults'].img2img.sampling_steps.value, - help="Set the default number of sampling steps to use. Default is: 30 (with k_euler)")) - - st.session_state["defaults"].img2img.sampling_steps.min_value = int(st.text_input("Minimum Img2Img Sampling Steps", + help="Set the default number of sampling steps to use. Default is: 30 (with k_euler)") + + st.session_state["defaults"].img2img.sampling_steps.min_value = st.number_input("Minimum Img2Img Sampling Steps", value=st.session_state['defaults'].img2img.sampling_steps.min_value, - help="Set the default minimum value for the sampling steps slider. Default is: 1")) - - st.session_state["defaults"].img2img.sampling_steps.max_value = int(st.text_input("Maximum Img2Img Sampling Steps", - value=st.session_state['defaults'].img2img.sampling_steps.max_value, - help="Set the default maximum value for the sampling steps slider. Default is: 250")) - - st.session_state["defaults"].img2img.sampling_steps.step = int(st.text_input("Img2Img Sampling Slider Steps", - value=st.session_state['defaults'].img2img.sampling_steps.step, - help="Set the default value for the number of steps on the sampling steps slider. Default is: 10")) - + help="Set the default minimum value for the sampling steps slider. Default is: 1") + + + st.session_state["defaults"].img2img.sampling_steps.step = st.number_input("Img2Img Sampling Slider Steps", + value=st.session_state['defaults'].img2img.sampling_steps.step, + help="Set the default value for the number of steps on the sampling steps slider. Default is: 10") + # Batch Count - st.session_state["defaults"].img2img.batch_count.value = int(st.text_input("Default Img2Img Batch Count", - value=st.session_state['defaults'].img2img.batch_count.value, - help="Set the default batch count to use. Default is: 1")) - - st.session_state["defaults"].img2img.batch_count.min_value = int(st.text_input("Minimum Img2Img Batch Count", - value=st.session_state['defaults'].img2img.batch_count.min_value, - help="Set the default minimum value for the batch count slider. Default is: 1")) - - st.session_state["defaults"].img2img.batch_count.max_value = int(st.text_input("Maximum Img2Img Batch Count", - value=st.session_state['defaults'].img2img.batch_count.max_value, - help="Set the default maximum value for the batch count slider. Default is: 100")) - - st.session_state["defaults"].img2img.batch_count.step = int(st.text_input("Img2Img Batch Count Slider Steps", - value=st.session_state['defaults'].img2img.batch_count.step, - help="Set the default value for the number of steps on the batch count slider. Default is: 10")) - - # Batch Size - st.session_state["defaults"].img2img.batch_size.value = int(st.text_input("Default Img2Img Batch Size", - value=st.session_state['defaults'].img2img.batch_size.value, - help="Set the default batch size to use. Default is: 1")) - - st.session_state["defaults"].img2img.batch_size.min_value = int(st.text_input("Minimum Img2Img Batch Size", - value=st.session_state['defaults'].img2img.batch_size.min_value, - help="Set the default minimum value for the batch size slider. Default is: 1")) - - st.session_state["defaults"].img2img.batch_size.max_value = int(st.text_input("Maximum Img2Img Batch Size", - value=st.session_state['defaults'].img2img.batch_size.max_value, - help="Set the default maximum value for the batch size slider. Default is: 5")) - - st.session_state["defaults"].img2img.batch_size.step = int(st.text_input("Img2Img Batch Size Slider Steps", - value=st.session_state['defaults'].img2img.batch_size.step, - help="Set the default value for the number of steps on the batch size slider. Default is: 1")) + st.session_state["defaults"].img2img.batch_count.value = st.number_input("Img2img Batch count", value=st.session_state["defaults"].img2img.batch_count.value, + help="How many iterations or batches of images to generate in total.") + + st.session_state["defaults"].img2img.batch_size.value = st.number_input("Img2img Batch size", value=st.session_state["defaults"].img2img.batch_size.value, + help="How many images are at once in a batch.\ + It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it \ + takes to finish generation as more images are generated at once.\ + Default: 1") with col4: # Inference Steps - st.session_state["defaults"].img2img.num_inference_steps.value = int(st.text_input("Default Inference Steps", + st.session_state["defaults"].img2img.num_inference_steps.value = st.number_input("Default Inference Steps", value=st.session_state['defaults'].img2img.num_inference_steps.value, - help="Set the default number of inference steps to use. Default is: 200")) - - st.session_state["defaults"].img2img.num_inference_steps.min_value = int(st.text_input("Minimum Sampling Steps", + help="Set the default number of inference steps to use. Default is: 200") + + st.session_state["defaults"].img2img.num_inference_steps.min_value = st.number_input("Minimum Sampling Steps", value=st.session_state['defaults'].img2img.num_inference_steps.min_value, - help="Set the default minimum value for the inference steps slider. Default is: 10")) - - st.session_state["defaults"].img2img.num_inference_steps.max_value = int(st.text_input("Maximum Sampling Steps", + help="Set the default minimum value for the inference steps slider. Default is: 10") + + st.session_state["defaults"].img2img.num_inference_steps.max_value = st.number_input("Maximum Sampling Steps", value=st.session_state['defaults'].img2img.num_inference_steps.max_value, - help="Set the default maximum value for the inference steps slider. Default is: 500")) - - st.session_state["defaults"].img2img.num_inference_steps.step = int(st.text_input("Inference Slider Steps", + help="Set the default maximum value for the inference steps slider. Default is: 500") + + st.session_state["defaults"].img2img.num_inference_steps.step = st.number_input("Inference Slider Steps", value=st.session_state['defaults'].img2img.num_inference_steps.step, help="Set the default value for the number of steps on the inference steps slider.\ - Default is: 10")) - + Default is: 10") + # Find Noise Steps - st.session_state["defaults"].img2img.find_noise_steps.value = int(st.text_input("Default Find Noise Steps", + st.session_state["defaults"].img2img.find_noise_steps.value = st.number_input("Default Find Noise Steps", value=st.session_state['defaults'].img2img.find_noise_steps.value, - help="Set the default number of find noise steps to use. Default is: 100")) - - st.session_state["defaults"].img2img.find_noise_steps.min_value = int(st.text_input("Minimum Find Noise Steps", + help="Set the default number of find noise steps to use. Default is: 100") + + st.session_state["defaults"].img2img.find_noise_steps.min_value = st.number_input("Minimum Find Noise Steps", value=st.session_state['defaults'].img2img.find_noise_steps.min_value, - help="Set the default minimum value for the find noise steps slider. Default is: 0")) - - st.session_state["defaults"].img2img.find_noise_steps.max_value = int(st.text_input("Maximum Find Noise Steps", + help="Set the default minimum value for the find noise steps slider. Default is: 0") + + st.session_state["defaults"].img2img.find_noise_steps.max_value = st.number_input("Maximum Find Noise Steps", value=st.session_state['defaults'].img2img.find_noise_steps.max_value, - help="Set the default maximum value for the find noise steps slider. Default is: 500")) - - st.session_state["defaults"].img2img.find_noise_steps.step = int(st.text_input("Find Noise Slider Steps", + help="Set the default maximum value for the find noise steps slider. Default is: 500") + + st.session_state["defaults"].img2img.find_noise_steps.step = st.number_input("Find Noise Slider Steps", value=st.session_state['defaults'].img2img.find_noise_steps.step, help="Set the default value for the number of steps on the find noise steps slider. \ - Default is: 10")) - + Default is: 10") + with col5: st.title("General Parameters") - + default_sampler_list = ["k_lms", "k_euler", "k_euler_a", "k_dpm_2", "k_dpm_2_a", "k_heun", "PLMS", "DDIM"] st.session_state["defaults"].img2img.sampler_name = st.selectbox("Default Img2Img Sampler", default_sampler_list, index=default_sampler_list.index(st.session_state['defaults'].img2img.sampler_name), help="Defaut sampler to use for img2img. Default: k_euler") - + st.session_state['defaults'].img2img.seed = st.text_input("Default Img2Img Seed", value=st.session_state['defaults'].img2img.seed, help="Default seed.") - + st.session_state["defaults"].img2img.separate_prompts = st.checkbox("Separate Img2Img Prompts", value=st.session_state['defaults'].img2img.separate_prompts, help="Separate Prompts. Default: False") - + st.session_state["defaults"].img2img.normalize_prompt_weights = st.checkbox("Normalize Img2Img Prompt Weights", - value=st.session_state['defaults'].img2img.normalize_prompt_weights, + value=st.session_state['defaults'].img2img.normalize_prompt_weights, help="Choose to normalize prompt weights. Default: True") - + st.session_state["defaults"].img2img.save_individual_images = st.checkbox("Save Individual Img2Img Images", value=st.session_state['defaults'].img2img.save_individual_images, help="Choose to save individual images. Default: True") - + st.session_state["defaults"].img2img.save_grid = st.checkbox("Save Img2Img Grid Images", value=st.session_state['defaults'].img2img.save_grid, help="Choose to save the grid images. Default: True") - st.session_state["defaults"].img2img.group_by_prompt = st.checkbox("Group By Img2Img Prompt", - value=st.session_state['defaults'].img2img.group_by_prompt, + st.session_state["defaults"].img2img.group_by_prompt = st.checkbox("Group By Img2Img Prompt", + value=st.session_state['defaults'].img2img.group_by_prompt, help="Choose to save images grouped by their prompt. Default: False") - + st.session_state["defaults"].img2img.save_as_jpg = st.checkbox("Save Img2Img As JPG", value=st.session_state['defaults'].img2img.save_as_jpg, help="Choose to save images as jpegs. Default: False") - - st.session_state["defaults"].img2img.write_info_files = st.checkbox("Write Info Files For Img2Img Images", + + st.session_state["defaults"].img2img.write_info_files = st.checkbox("Write Info Files For Img2Img Images", value=st.session_state['defaults'].img2img.write_info_files, help="Choose to write the info files along with the generated images. Default: True") st.session_state["defaults"].img2img.use_GFPGAN = st.checkbox("Img2Img Use GFPGAN", value=st.session_state['defaults'].img2img.use_GFPGAN, help="Choose to use GFPGAN. Default: False") - + st.session_state["defaults"].img2img.use_RealESRGAN = st.checkbox("Img2Img Use RealESRGAN", value=st.session_state['defaults'].img2img.use_RealESRGAN, help="Choose to use RealESRGAN. Default: False") - - st.session_state["defaults"].img2img.update_preview = st.checkbox("Update Img2Img Preview Image", value=st.session_state['defaults'].img2img.update_preview, - help="Choose to update the preview image during generation. Default: True") - - st.session_state["defaults"].img2img.update_preview_frequency = int(st.text_input("Img2Img Preview Image Update Frequency", - value=st.session_state['defaults'].img2img.update_preview_frequency, - help="Set the default value for the frrquency of the preview image updates. Default is: 10")) - + + st.session_state["defaults"].img2img.update_preview = True + st.session_state["defaults"].img2img.update_preview_frequency = st.number_input("Img2Img Preview Image Update Frequency", + value=st.session_state['defaults'].img2img.update_preview_frequency, + help="Set the default value for the frrquency of the preview image updates. Default is: 10") + st.title("Variation Parameters") - - st.session_state["defaults"].img2img.variant_amount = float(st.text_input("Default Img2Img Variation Amount", + + st.session_state["defaults"].img2img.variant_amount = st.number_input("Default Img2Img Variation Amount", value=st.session_state['defaults'].img2img.variant_amount, - help="Set the default variation to use. Default is: 0.0")) - + help="Set the default variation to use. Default is: 0.0") + # I THINK THESE ARE MISSING FROM THE CONFIG FILE - # st.session_state["defaults"].img2img.variant_amount.min_value = float(st.text_input("Minimum Img2Img Variation Amount", + # st.session_state["defaults"].img2img.variant_amount.min_value = st.number_input("Minimum Img2Img Variation Amount", # value=st.session_state['defaults'].img2img.variant_amount.min_value, help="Set the default minimum value for the variation slider. Default is: 0.0")) - - # st.session_state["defaults"].img2img.variant_amount.max_value = float(st.text_input("Maximum Img2Img Variation Amount", + + # st.session_state["defaults"].img2img.variant_amount.max_value = st.number_input("Maximum Img2Img Variation Amount", # value=st.session_state['defaults'].img2img.variant_amount.max_value, help="Set the default maximum value for the variation slider. Default is: 1.0")) - - # st.session_state["defaults"].img2img.variant_amount.step = float(st.text_input("Img2Img Variation Slider Steps", + + # st.session_state["defaults"].img2img.variant_amount.step = st.number_input("Img2Img Variation Slider Steps", # value=st.session_state['defaults'].img2img.variant_amount.step, help="Set the default value for the number of steps on the variation slider. Default is: 1")) - - st.session_state['defaults'].img2img.variant_seed = st.text_input("Default Img2Img Variation Seed", + + st.session_state['defaults'].img2img.variant_seed = st.text_input("Default Img2Img Variation Seed", value=st.session_state['defaults'].img2img.variant_seed, help="Default variation seed.") with img2txt_tab: - col1 = st.columns(1, gap="large") - + col1 = st.columns(1, gap="large") + st.title("Image-To-Text") - st.session_state["defaults"].img2txt.batch_size = int(st.text_input("Default Img2Txt Batch Size", value=st.session_state['defaults'].img2txt.batch_size, - help="Set the default batch size for Img2Txt. Default is: 420?")) + st.session_state["defaults"].img2txt.batch_size = st.number_input("Default Img2Txt Batch Size", value=st.session_state['defaults'].img2txt.batch_size, + help="Set the default batch size for Img2Txt. Default is: 420?") - st.session_state["defaults"].img2txt.blip_image_eval_size = int(st.text_input("Default Blip Image Size Evaluation", + st.session_state["defaults"].img2txt.blip_image_eval_size = st.number_input("Default Blip Image Size Evaluation", value=st.session_state['defaults'].img2txt.blip_image_eval_size, - help="Set the default value for the blip image evaluation size. Default is: 512")) - + help="Set the default value for the blip image evaluation size. Default is: 512") + with txt2vid_tab: col1, col2, col3, col4, col5 = st.columns(5, gap="medium") - + with col1: st.title("Slider Parameters") - + # Width - st.session_state["defaults"].txt2vid.width.value = int(st.text_input("Default txt2vid Image Width", - value=st.session_state['defaults'].txt2vid.width.value, - help="Set the default width for the generated image. Default is: 512")) - - st.session_state["defaults"].txt2vid.width.min_value = int(st.text_input("Minimum txt2vid Image Width", - value=st.session_state['defaults'].txt2vid.width.min_value, - help="Set the default minimum value for the width slider. Default is: 64")) - - st.session_state["defaults"].txt2vid.width.max_value = int(st.text_input("Maximum txt2vid Image Width", - value=st.session_state['defaults'].txt2vid.width.max_value, - help="Set the default maximum value for the width slider. Default is: 2048")) - + st.session_state["defaults"].txt2vid.width.value = st.number_input("Default txt2vid Image Width", + value=st.session_state['defaults'].txt2vid.width.value, + help="Set the default width for the generated image. Default is: 512") + + st.session_state["defaults"].txt2vid.width.min_value = st.number_input("Minimum txt2vid Image Width", + value=st.session_state['defaults'].txt2vid.width.min_value, + help="Set the default minimum value for the width slider. Default is: 64") + + st.session_state["defaults"].txt2vid.width.max_value = st.number_input("Maximum txt2vid Image Width", + value=st.session_state['defaults'].txt2vid.width.max_value, + help="Set the default maximum value for the width slider. Default is: 2048") + # Height - st.session_state["defaults"].txt2vid.height.value = int(st.text_input("Default txt2vid Image Height", - value=st.session_state['defaults'].txt2vid.height.value, - help="Set the default height for the generated image. Default is: 512")) - - st.session_state["defaults"].txt2vid.height.min_value = int(st.text_input("Minimum txt2vid Image Height", - value=st.session_state['defaults'].txt2vid.height.min_value, - help="Set the default minimum value for the height slider. Default is: 64")) - - st.session_state["defaults"].txt2vid.height.max_value = int(st.text_input("Maximum txt2vid Image Height", - value=st.session_state['defaults'].txt2vid.height.max_value, - help="Set the default maximum value for the height slider. Default is: 2048")) - + st.session_state["defaults"].txt2vid.height.value = st.number_input("Default txt2vid Image Height", + value=st.session_state['defaults'].txt2vid.height.value, + help="Set the default height for the generated image. Default is: 512") + + st.session_state["defaults"].txt2vid.height.min_value = st.number_input("Minimum txt2vid Image Height", + value=st.session_state['defaults'].txt2vid.height.min_value, + help="Set the default minimum value for the height slider. Default is: 64") + + st.session_state["defaults"].txt2vid.height.max_value = st.number_input("Maximum txt2vid Image Height", + value=st.session_state['defaults'].txt2vid.height.max_value, + help="Set the default maximum value for the height slider. Default is: 2048") + # CFG - st.session_state["defaults"].txt2vid.cfg_scale.value = float(st.text_input("Default txt2vid CFG Scale", + st.session_state["defaults"].txt2vid.cfg_scale.value = st.number_input("Default txt2vid CFG Scale", value=st.session_state['defaults'].txt2vid.cfg_scale.value, - help="Set the default value for the CFG Scale. Default is: 7.5")) - - st.session_state["defaults"].txt2vid.cfg_scale.min_value = float(st.text_input("Minimum txt2vid CFG Scale Value", - value=st.session_state['defaults'].txt2vid.cfg_scale.min_value, - help="Set the default minimum value for the CFG scale slider. Default is: 1")) - - st.session_state["defaults"].txt2vid.cfg_scale.max_value = float(st.text_input("Maximum txt2vid CFG Scale Value", - value=st.session_state['defaults'].txt2vid.cfg_scale.max_value, - help="Set the default maximum value for the CFG scale slider. Default is: 30")) - - st.session_state["defaults"].txt2vid.cfg_scale.step = float(st.text_input("txt2vid CFG Slider Steps", + help="Set the default value for the CFG Scale. Default is: 7.5") + + st.session_state["defaults"].txt2vid.cfg_scale.min_value = st.number_input("Minimum txt2vid CFG Scale Value", + value=st.session_state['defaults'].txt2vid.cfg_scale.min_value, + help="Set the default minimum value for the CFG scale slider. Default is: 1") + + st.session_state["defaults"].txt2vid.cfg_scale.max_value = st.number_input("Maximum txt2vid CFG Scale Value", + value=st.session_state['defaults'].txt2vid.cfg_scale.max_value, + help="Set the default maximum value for the CFG scale slider. Default is: 30") + + st.session_state["defaults"].txt2vid.cfg_scale.step = st.number_input("txt2vid CFG Slider Steps", value=st.session_state['defaults'].txt2vid.cfg_scale.step, - help="Set the default value for the number of steps on the CFG scale slider. Default is: 0.5")) - + help="Set the default value for the number of steps on the CFG scale slider. Default is: 0.5") + with col2: # Sampling Steps - st.session_state["defaults"].txt2vid.sampling_steps.value = int(st.text_input("Default txt2vid Sampling Steps", - value=st.session_state['defaults'].txt2vid.sampling_steps.value, - help="Set the default number of sampling steps to use. Default is: 30 (with k_euler)")) - - st.session_state["defaults"].txt2vid.sampling_steps.min_value = int(st.text_input("Minimum txt2vid Sampling Steps", + st.session_state["defaults"].txt2vid.sampling_steps.value = st.number_input("Default txt2vid Sampling Steps", + value=st.session_state['defaults'].txt2vid.sampling_steps.value, + help="Set the default number of sampling steps to use. Default is: 30 (with k_euler)") + + st.session_state["defaults"].txt2vid.sampling_steps.min_value = st.number_input("Minimum txt2vid Sampling Steps", value=st.session_state['defaults'].txt2vid.sampling_steps.min_value, - help="Set the default minimum value for the sampling steps slider. Default is: 1")) - - st.session_state["defaults"].txt2vid.sampling_steps.max_value = int(st.text_input("Maximum txt2vid Sampling Steps", - value=st.session_state['defaults'].txt2vid.sampling_steps.max_value, - help="Set the default maximum value for the sampling steps slider. Default is: 250")) - - st.session_state["defaults"].txt2vid.sampling_steps.step = int(st.text_input("txt2vid Sampling Slider Steps", - value=st.session_state['defaults'].txt2vid.sampling_steps.step, - help="Set the default value for the number of steps on the sampling steps slider. Default is: 10")) - + help="Set the default minimum value for the sampling steps slider. Default is: 1") + + st.session_state["defaults"].txt2vid.sampling_steps.step = st.number_input("txt2vid Sampling Slider Steps", + value=st.session_state['defaults'].txt2vid.sampling_steps.step, + help="Set the default value for the number of steps on the sampling steps slider. Default is: 10") + # Batch Count - st.session_state["defaults"].txt2vid.batch_count.value = int(st.text_input("Default txt2vid Batch Count", - value=st.session_state['defaults'].txt2vid.batch_count.value, - help="Set the default batch count to use. Default is: 1")) - - st.session_state["defaults"].txt2vid.batch_count.min_value = int(st.text_input("Minimum txt2vid Batch Count", - value=st.session_state['defaults'].img2img.batch_count.min_value, - help="Set the default minimum value for the batch count slider. Default is: 1")) - - st.session_state["defaults"].img2img.batch_count.max_value = int(st.text_input("Maximum txt2vid Batch Count", - value=st.session_state['defaults'].txt2vid.batch_count.max_value, - help="Set the default maximum value for the batch count slider. Default is: 100")) - - st.session_state["defaults"].txt2vid.batch_count.step = int(st.text_input("txt2vid Batch Count Slider Steps", - value=st.session_state['defaults'].txt2vid.batch_count.step, - help="Set the default value for the number of steps on the batch count slider. Default is: 10")) - - # Batch Size - st.session_state["defaults"].txt2vid.batch_size.value = int(st.text_input("Default txt2vid Batch Size", - value=st.session_state['defaults'].txt2vid.batch_size.value, - help="Set the default batch size to use. Default is: 1")) - - st.session_state["defaults"].txt2vid.batch_size.min_value = int(st.text_input("Minimum txt2vid Batch Size", - value=st.session_state['defaults'].txt2vid.batch_size.min_value, - help="Set the default minimum value for the batch size slider. Default is: 1")) - - st.session_state["defaults"].txt2vid.batch_size.max_value = int(st.text_input("Maximum txt2vid Batch Size", - value=st.session_state['defaults'].txt2vid.batch_size.max_value, - help="Set the default maximum value for the batch size slider. Default is: 5")) - - st.session_state["defaults"].txt2vid.batch_size.step = int(st.text_input("txt2vid Batch Size Slider Steps", - value=st.session_state['defaults'].txt2vid.batch_size.step, - help="Set the default value for the number of steps on the batch size slider. Default is: 1")) - + st.session_state["defaults"].txt2vid.batch_count.value = st.number_input("txt2vid Batch count", value=st.session_state['defaults'].txt2vid.batch_count.value, + help="How many iterations or batches of images to generate in total.") + + st.session_state["defaults"].txt2vid.batch_size.value = st.number_input("txt2vid Batch size", value=st.session_state.defaults.txt2vid.batch_size.value, + help="How many images are at once in a batch.\ + It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it \ + takes to finish generation as more images are generated at once.\ + Default: 1") + # Inference Steps - st.session_state["defaults"].txt2vid.num_inference_steps.value = int(st.text_input("Default Txt2Vid Inference Steps", - value=st.session_state['defaults'].txt2vid.num_inference_steps.value, - help="Set the default number of inference steps to use. Default is: 200")) - - st.session_state["defaults"].txt2vid.num_inference_steps.min_value = int(st.text_input("Minimum Txt2Vid Sampling Steps", + st.session_state["defaults"].txt2vid.num_inference_steps.value = st.number_input("Default Txt2Vid Inference Steps", + value=st.session_state['defaults'].txt2vid.num_inference_steps.value, + help="Set the default number of inference steps to use. Default is: 200") + + st.session_state["defaults"].txt2vid.num_inference_steps.min_value = st.number_input("Minimum Txt2Vid Sampling Steps", value=st.session_state['defaults'].txt2vid.num_inference_steps.min_value, - help="Set the default minimum value for the inference steps slider. Default is: 10")) - - st.session_state["defaults"].txt2vid.num_inference_steps.max_value = int(st.text_input("Maximum Txt2Vid Sampling Steps", + help="Set the default minimum value for the inference steps slider. Default is: 10") + + st.session_state["defaults"].txt2vid.num_inference_steps.max_value = st.number_input("Maximum Txt2Vid Sampling Steps", value=st.session_state['defaults'].txt2vid.num_inference_steps.max_value, - help="Set the default maximum value for the inference steps slider. Default is: 500")) - - st.session_state["defaults"].txt2vid.num_inference_steps.step = int(st.text_input("Txt2Vid Inference Slider Steps", - value=st.session_state['defaults'].txt2vid.num_inference_steps.step, - help="Set the default value for the number of steps on the inference steps slider. Default is: 10")) - + help="Set the default maximum value for the inference steps slider. Default is: 500") + st.session_state["defaults"].txt2vid.num_inference_steps.step = st.number_input("Txt2Vid Inference Slider Steps", + value=st.session_state['defaults'].txt2vid.num_inference_steps.step, + help="Set the default value for the number of steps on the inference steps slider. Default is: 10") + with col3: st.title("General Parameters") - - st.session_state['defaults'].txt2vid.default_model = st.text_input("Default Txt2Vid Model", value=st.session_state['defaults'].txt2vid.default_model, + + st.session_state['defaults'].txt2vid.default_model = st.text_input("Default Txt2Vid Model", value=st.session_state['defaults'].txt2vid.default_model, help="Default: CompVis/stable-diffusion-v1-4") - + # INSERT CUSTOM_MODELS_LIST HERE - + default_sampler_list = ["k_lms", "k_euler", "k_euler_a", "k_dpm_2", "k_dpm_2_a", "k_heun", "PLMS", "DDIM"] - st.session_state["defaults"].txt2vid.default_sampler = st.selectbox("Default txt2vid Sampler", default_sampler_list, + st.session_state["defaults"].txt2vid.default_sampler = st.selectbox("Default txt2vid Sampler", default_sampler_list, index=default_sampler_list.index(st.session_state['defaults'].txt2vid.default_sampler), help="Defaut sampler to use for txt2vid. Default: k_euler") - + st.session_state['defaults'].txt2vid.seed = st.text_input("Default txt2vid Seed", value=st.session_state['defaults'].txt2vid.seed, help="Default seed.") - - st.session_state['defaults'].txt2vid.scheduler_name = st.text_input("Default Txt2Vid Scheduler", + + st.session_state['defaults'].txt2vid.scheduler_name = st.text_input("Default Txt2Vid Scheduler", value=st.session_state['defaults'].txt2vid.scheduler_name, help="Default scheduler.") - + st.session_state["defaults"].txt2vid.separate_prompts = st.checkbox("Separate txt2vid Prompts", value=st.session_state['defaults'].txt2vid.separate_prompts, help="Separate Prompts. Default: False") - - st.session_state["defaults"].txt2vid.normalize_prompt_weights = st.checkbox("Normalize txt2vid Prompt Weights", - value=st.session_state['defaults'].txt2vid.normalize_prompt_weights, + + st.session_state["defaults"].txt2vid.normalize_prompt_weights = st.checkbox("Normalize txt2vid Prompt Weights", + value=st.session_state['defaults'].txt2vid.normalize_prompt_weights, help="Choose to normalize prompt weights. Default: True") - - st.session_state["defaults"].txt2vid.save_individual_images = st.checkbox("Save Individual txt2vid Images", - value=st.session_state['defaults'].txt2vid.save_individual_images, + + st.session_state["defaults"].txt2vid.save_individual_images = st.checkbox("Save Individual txt2vid Images", + value=st.session_state['defaults'].txt2vid.save_individual_images, help="Choose to save individual images. Default: True") - + st.session_state["defaults"].txt2vid.save_video = st.checkbox("Save Txt2Vid Video", value=st.session_state['defaults'].txt2vid.save_video, help="Choose to save the Txt2Vid video. Default: True") - - st.session_state["defaults"].txt2vid.group_by_prompt = st.checkbox("Group By txt2vid Prompt", value=st.session_state['defaults'].txt2vid.group_by_prompt, + + st.session_state["defaults"].txt2vid.group_by_prompt = st.checkbox("Group By txt2vid Prompt", value=st.session_state['defaults'].txt2vid.group_by_prompt, help="Choose to save images grouped by their prompt. Default: False") - + st.session_state["defaults"].txt2vid.save_as_jpg = st.checkbox("Save txt2vid As JPG", value=st.session_state['defaults'].txt2vid.save_as_jpg, help="Choose to save images as jpegs. Default: False") - + # Need more info for the Help dialog... - st.session_state["defaults"].txt2vid.do_loop = st.checkbox("Loop Generations", value=st.session_state['defaults'].txt2vid.do_loop, + st.session_state["defaults"].txt2vid.do_loop = st.checkbox("Loop Generations", value=st.session_state['defaults'].txt2vid.do_loop, help="Choose to loop or something, IDK.... Default: False") - - st.session_state["defaults"].txt2vid.max_frames = int(st.text_input("Txt2Vid Max Video Frames", value=st.session_state['defaults'].txt2vid.max_frames, - help="Set the default value for the number of video frames generated. Default is: 100")) - - st.session_state["defaults"].txt2vid.write_info_files = st.checkbox("Write Info Files For txt2vid Images", value=st.session_state['defaults'].txt2vid.write_info_files, + + st.session_state["defaults"].txt2vid.max_frames = st.number_input("Txt2Vid Max Video Frames", value=st.session_state['defaults'].txt2vid.max_frames, + help="Set the default value for the number of video frames generated. Default is: 100") + + st.session_state["defaults"].txt2vid.write_info_files = st.checkbox("Write Info Files For txt2vid Images", value=st.session_state['defaults'].txt2vid.write_info_files, help="Choose to write the info files along with the generated images. Default: True") st.session_state["defaults"].txt2vid.use_GFPGAN = st.checkbox("txt2vid Use GFPGAN", value=st.session_state['defaults'].txt2vid.use_GFPGAN, help="Choose to use GFPGAN. Default: False") - + st.session_state["defaults"].txt2vid.use_RealESRGAN = st.checkbox("txt2vid Use RealESRGAN", value=st.session_state['defaults'].txt2vid.use_RealESRGAN, help="Choose to use RealESRGAN. Default: False") - - st.session_state["defaults"].txt2vid.update_preview = st.checkbox("Update txt2vid Preview Image", value=st.session_state['defaults'].txt2vid.update_preview, - help="Choose to update the preview image during generation. Default: True") - - st.session_state["defaults"].txt2vid.update_preview_frequency = int(st.text_input("txt2vid Preview Image Update Frequency", - value=st.session_state['defaults'].txt2vid.update_preview_frequency, - help="Set the default value for the frrquency of the preview image updates. Default is: 10")) - + + st.session_state["defaults"].txt2vid.update_preview = True + st.session_state["defaults"].txt2vid.update_preview_frequency = st.number_input("txt2vid Preview Image Update Frequency", + value=st.session_state['defaults'].txt2vid.update_preview_frequency, + help="Set the default value for the frrquency of the preview image updates. Default is: 10") + with col4: st.title("Variation Parameters") - - st.session_state["defaults"].txt2vid.variant_amount.value = float(st.text_input("Default txt2vid Variation Amount", + + st.session_state["defaults"].txt2vid.variant_amount.value = st.number_input("Default txt2vid Variation Amount", value=st.session_state['defaults'].txt2vid.variant_amount.value, - help="Set the default variation to use. Default is: 0.0")) - - st.session_state["defaults"].txt2vid.variant_amount.min_value = float(st.text_input("Minimum txt2vid Variation Amount", - value=st.session_state['defaults'].txt2vid.variant_amount.min_value, - help="Set the default minimum value for the variation slider. Default is: 0.0")) - - st.session_state["defaults"].txt2vid.variant_amount.max_value = float(st.text_input("Maximum txt2vid Variation Amount", - value=st.session_state['defaults'].txt2vid.variant_amount.max_value, - help="Set the default maximum value for the variation slider. Default is: 1.0")) - - st.session_state["defaults"].txt2vid.variant_amount.step = float(st.text_input("txt2vid Variation Slider Steps", + help="Set the default variation to use. Default is: 0.0") + + st.session_state["defaults"].txt2vid.variant_amount.min_value = st.number_input("Minimum txt2vid Variation Amount", + value=st.session_state['defaults'].txt2vid.variant_amount.min_value, + help="Set the default minimum value for the variation slider. Default is: 0.0") + + st.session_state["defaults"].txt2vid.variant_amount.max_value = st.number_input("Maximum txt2vid Variation Amount", + value=st.session_state['defaults'].txt2vid.variant_amount.max_value, + help="Set the default maximum value for the variation slider. Default is: 1.0") + + st.session_state["defaults"].txt2vid.variant_amount.step = st.number_input("txt2vid Variation Slider Steps", value=st.session_state['defaults'].txt2vid.variant_amount.step, - help="Set the default value for the number of steps on the variation slider. Default is: 1")) - - st.session_state['defaults'].txt2vid.variant_seed = st.text_input("Default txt2vid Variation Seed", + help="Set the default value for the number of steps on the variation slider. Default is: 1") + + st.session_state['defaults'].txt2vid.variant_seed = st.text_input("Default txt2vid Variation Seed", value=st.session_state['defaults'].txt2vid.variant_seed, help="Default variation seed.") - + with col5: st.title("Beta Parameters") - + # Beta Start - st.session_state["defaults"].txt2vid.beta_start.value = float(st.text_input("Default txt2vid Beta Start Value", - value=st.session_state['defaults'].txt2vid.beta_start.value, - help="Set the default variation to use. Default is: 0.0")) - - st.session_state["defaults"].txt2vid.beta_start.min_value = float(st.text_input("Minimum txt2vid Beta Start Amount", - value=st.session_state['defaults'].txt2vid.beta_start.min_value, - help="Set the default minimum value for the variation slider. Default is: 0.0")) - - st.session_state["defaults"].txt2vid.beta_start.max_value = float(st.text_input("Maximum txt2vid Beta Start Amount", - value=st.session_state['defaults'].txt2vid.beta_start.max_value, - help="Set the default maximum value for the variation slider. Default is: 1.0")) - - st.session_state["defaults"].txt2vid.beta_start.step = float(st.text_input("txt2vid Beta Start Slider Steps", value=st.session_state['defaults'].txt2vid.beta_start.step, - help="Set the default value for the number of steps on the variation slider. Default is: 1")) - + st.session_state["defaults"].txt2vid.beta_start.value = st.number_input("Default txt2vid Beta Start Value", + value=st.session_state['defaults'].txt2vid.beta_start.value, + help="Set the default variation to use. Default is: 0.0") + + st.session_state["defaults"].txt2vid.beta_start.min_value = st.number_input("Minimum txt2vid Beta Start Amount", + value=st.session_state['defaults'].txt2vid.beta_start.min_value, + help="Set the default minimum value for the variation slider. Default is: 0.0") + + st.session_state["defaults"].txt2vid.beta_start.max_value = st.number_input("Maximum txt2vid Beta Start Amount", + value=st.session_state['defaults'].txt2vid.beta_start.max_value, + help="Set the default maximum value for the variation slider. Default is: 1.0") + + st.session_state["defaults"].txt2vid.beta_start.step = st.number_input("txt2vid Beta Start Slider Steps", value=st.session_state['defaults'].txt2vid.beta_start.step, + help="Set the default value for the number of steps on the variation slider. Default is: 1") + st.session_state["defaults"].txt2vid.beta_start.format = st.text_input("Default txt2vid Beta Start Format", value=st.session_state['defaults'].txt2vid.beta_start.format, help="Set the default Beta Start Format. Default is: %.5\f") - + # Beta End - st.session_state["defaults"].txt2vid.beta_end.value = float(st.text_input("Default txt2vid Beta End Value", value=st.session_state['defaults'].txt2vid.beta_end.value, - help="Set the default variation to use. Default is: 0.0")) - - st.session_state["defaults"].txt2vid.beta_end.min_value = float(st.text_input("Minimum txt2vid Beta End Amount", value=st.session_state['defaults'].txt2vid.beta_end.min_value, - help="Set the default minimum value for the variation slider. Default is: 0.0")) - - st.session_state["defaults"].txt2vid.beta_end.max_value = float(st.text_input("Maximum txt2vid Beta End Amount", value=st.session_state['defaults'].txt2vid.beta_end.max_value, - help="Set the default maximum value for the variation slider. Default is: 1.0")) - - st.session_state["defaults"].txt2vid.beta_end.step = float(st.text_input("txt2vid Beta End Slider Steps", value=st.session_state['defaults'].txt2vid.beta_end.step, - help="Set the default value for the number of steps on the variation slider. Default is: 1")) - - st.session_state["defaults"].txt2vid.beta_end.format = st.text_input("Default txt2vid Beta End Format", value=st.session_state['defaults'].txt2vid.beta_start.format, + st.session_state["defaults"].txt2vid.beta_end.value = st.number_input("Default txt2vid Beta End Value", value=st.session_state['defaults'].txt2vid.beta_end.value, + help="Set the default variation to use. Default is: 0.0") + + st.session_state["defaults"].txt2vid.beta_end.min_value = st.number_input("Minimum txt2vid Beta End Amount", value=st.session_state['defaults'].txt2vid.beta_end.min_value, + help="Set the default minimum value for the variation slider. Default is: 0.0") + + st.session_state["defaults"].txt2vid.beta_end.max_value = st.number_input("Maximum txt2vid Beta End Amount", value=st.session_state['defaults'].txt2vid.beta_end.max_value, + help="Set the default maximum value for the variation slider. Default is: 1.0") + + st.session_state["defaults"].txt2vid.beta_end.step = st.number_input("txt2vid Beta End Slider Steps", value=st.session_state['defaults'].txt2vid.beta_end.step, + help="Set the default value for the number of steps on the variation slider. Default is: 1") + + st.session_state["defaults"].txt2vid.beta_end.format = st.text_input("Default txt2vid Beta End Format", value=st.session_state['defaults'].txt2vid.beta_start.format, help="Set the default Beta Start Format. Default is: %.5\f") - + with image_processing: col1, col2, col3, col4, col5 = st.columns(5, gap="large") with col1: st.title("GFPGAN") - - st.session_state["defaults"].gfpgan.strength = int(st.text_input("Default Img2Txt Batch Size", value=st.session_state['defaults'].gfpgan.strength, - help="Set the default global strength for GFPGAN. Default is: 100")) + + st.session_state["defaults"].gfpgan.strength = st.number_input("Default Img2Txt Batch Size", value=st.session_state['defaults'].gfpgan.strength, + help="Set the default global strength for GFPGAN. Default is: 100") with col2: st.title("GoBig") with col3: @@ -874,46 +785,46 @@ def layout(): st.title("LDSR") with col5: st.title("GoLatent") - + with textual_inversion_tab: st.title("Textual Inversion") - - st.session_state['defaults'].textual_inversion.pretrained_model_name_or_path = st.text_input("Default Textual Inversion Model Path", + + st.session_state['defaults'].textual_inversion.pretrained_model_name_or_path = st.text_input("Default Textual Inversion Model Path", value=st.session_state['defaults'].textual_inversion.pretrained_model_name_or_path, help="Default: models/ldm/stable-diffusion-v1-4") - - st.session_state['defaults'].textual_inversion.tokenizer_name = st.text_input("Default Img2Img Variation Seed", value=st.session_state['defaults'].textual_inversion.tokenizer_name, + + st.session_state['defaults'].textual_inversion.tokenizer_name = st.text_input("Default Img2Img Variation Seed", value=st.session_state['defaults'].textual_inversion.tokenizer_name, help="Default tokenizer seed.") - + with concepts_library_tab: st.title("Concepts Library") - #st.info("Under Construction. :construction_worker:") + #st.info("Under Construction. :construction_worker:") col1, col2, col3, col4, col5 = st.columns(5, gap='large') with col1: - st.session_state["defaults"].concepts_library.concepts_per_page = int(st.text_input("Concepts Per Page", value=st.session_state['defaults'].concepts_library.concepts_per_page, - help="Number of concepts per page to show on the Concepts Library. Default: '12'")) - - # add space for the buttons at the bottom + st.session_state["defaults"].concepts_library.concepts_per_page = st.number_input("Concepts Per Page", value=st.session_state['defaults'].concepts_library.concepts_per_page, + help="Number of concepts per page to show on the Concepts Library. Default: '12'") + + # add space for the buttons at the bottom st.markdown("---") - + # We need a submit button to save the Settings # as well as one to reset them to the defaults, just in case. _, _, save_button_col, reset_button_col, _, _ = st.columns([1,1,1,1,1,1], gap="large") with save_button_col: save_button = st.form_submit_button("Save") - + with reset_button_col: reset_button = st.form_submit_button("Reset") - + if save_button: OmegaConf.save(config=st.session_state.defaults, f="configs/webui/userconfig_streamlit.yaml") loaded = OmegaConf.load("configs/webui/userconfig_streamlit.yaml") - assert st.session_state.defaults == loaded - + assert st.session_state.defaults == loaded + # if (os.path.exists(".streamlit/config.toml")): - with open(".streamlit/config.toml", "w") as toml_file: + with open(".streamlit/config.toml", "w") as toml_file: toml.dump(st.session_state["streamlit_config"], toml_file) - + if reset_button: st.session_state["defaults"] = OmegaConf.load("configs/webui/webui_streamlit.yaml") \ No newline at end of file diff --git a/scripts/bridgeData_template.py b/scripts/bridgeData_template.py index 365ce9869..483ab2953 100644 --- a/scripts/bridgeData_template.py +++ b/scripts/bridgeData_template.py @@ -14,4 +14,8 @@ # If you put this to 32, it is equivalent to 1024x1024 pixels horde_max_power = 8 # Set this to false, if you do not want your worker to receive requests for NSFW generations -horde_nsfw = True \ No newline at end of file +horde_nsfw = True +# A list of words which you do not want to your worker to accept +horde_blacklist = [] +# A list of words for which you always want to allow the NSFW censor filter, even when this worker is in NSFW mode +horde_censorlist = [] diff --git a/scripts/img2img.py b/scripts/img2img.py index aa481b964..62412670f 100644 --- a/scripts/img2img.py +++ b/scripts/img2img.py @@ -12,7 +12,7 @@ # GNU Affero General Public License for more details. # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . +# along with this program. If not, see . # base webui import and utils. from sd_utils import * @@ -30,7 +30,7 @@ import skimage from ldm.models.diffusion.ddim import DDIMSampler from ldm.models.diffusion.plms import PLMSSampler -# Temp imports +# Temp imports # end of imports @@ -45,14 +45,14 @@ except: pass -def img2img(prompt: str = '', init_info: any = None, init_info_mask: any = None, mask_mode: int = 0, mask_blur_strength: int = 3, +def img2img(prompt: str = '', init_info: any = None, init_info_mask: any = None, mask_mode: int = 0, mask_blur_strength: int = 3, mask_restore: bool = False, ddim_steps: int = 50, sampler_name: str = 'DDIM', n_iter: int = 1, cfg_scale: float = 7.5, denoising_strength: float = 0.8, seed: int = -1, noise_mode: int = 0, find_noise_steps: str = "", height: int = 512, width: int = 512, resize_mode: int = 0, fp = None, variant_amount: float = None, variant_seed: int = None, ddim_eta:float = 0.0, write_info_files:bool = True, separate_prompts:bool = False, normalize_prompt_weights:bool = True, save_individual_images: bool = True, save_grid: bool = True, group_by_prompt: bool = True, - save_as_jpg: bool = True, use_GFPGAN: bool = True, GFPGAN_model: str = 'GFPGANv1.3', + save_as_jpg: bool = True, use_GFPGAN: bool = True, GFPGAN_model: str = 'GFPGANv1.4', use_RealESRGAN: bool = True, RealESRGAN_model: str = "RealESRGAN_x4plus_anime_6B", use_LDSR: bool = True, LDSR_model: str = "model", loopback: bool = False, @@ -167,7 +167,7 @@ def init(): init_image = 2. * image - 1. init_image = init_image.to(server_state["device"]) - init_latent = (server_state["model"] if not st.session_state['defaults'].general.optimized else server_state["modelFS"]).get_first_stage_encoding((server_state["model"] if not st.session_state['defaults'].general.optimized else modelFS).encode_first_stage(init_image)) # move to latent space + init_latent = (server_state["model"] if not st.session_state['defaults'].general.optimized else server_state["modelFS"]).get_first_stage_encoding((server_state["model"] if not st.session_state['defaults'].general.optimized else server_state["modelFS"]).encode_first_stage(init_image)) # move to latent space if st.session_state['defaults'].general.optimized: mem = torch.cuda.memory_allocated()/1e6 @@ -234,7 +234,7 @@ def sample(init_data, x, conditioning, unconditional_conditioning, sampler_name) from skimage import exposure do_color_correction = True except: - print("Install scikit-image to perform color correction on loopback") + print("Install scikit-image to perform color correction on loopback") for i in range(n_iter): if do_color_correction and i == 0: @@ -356,28 +356,28 @@ def sample(init_data, x, conditioning, unconditional_conditioning, sampler_name) del sampler return output_images, seed, info, stats - + # def layout(): with st.form("img2img-inputs"): st.session_state["generation_mode"] = "img2img" - + img2img_input_col, img2img_generate_col = st.columns([10,1]) with img2img_input_col: #prompt = st.text_area("Input Text","") prompt = st.text_input("Input Text","", placeholder="A corgi wearing a top hat as an oil painting.") - + # Every form must have a submit button, the extra blank spaces is a temp way to align it with the input field. Needs to be done in CSS or some other way. img2img_generate_col.write("") img2img_generate_col.write("") generate_button = img2img_generate_col.form_submit_button("Generate") - - + + # creating the page layout using columns - col1_img2img_layout, col2_img2img_layout, col3_img2img_layout = st.columns([1,2,2], gap="small") - + col1_img2img_layout, col2_img2img_layout, col3_img2img_layout = st.columns([1,2,2], gap="small") + with col1_img2img_layout: - # If we have custom models available on the "models/custom" + # If we have custom models available on the "models/custom" #folder then we show a menu to select which model we want to use, otherwise we use the main model for SD custom_models_available() if server_state["CustomModel_available"]: @@ -386,36 +386,35 @@ def layout(): help="Select the model you want to use. This option is only available if you have custom models \ on your 'models/custom' folder. The model name that will be shown here is the same as the name\ the file for the model has on said folder, it is recommended to give the .ckpt file a name that \ - will make it easier for you to distinguish it from other models. Default: Stable Diffusion v1.4") + will make it easier for you to distinguish it from other models. Default: Stable Diffusion v1.4") else: st.session_state["custom_model"] = "Stable Diffusion v1.4" - - - st.session_state["sampling_steps"] = st.slider("Sampling Steps", value=st.session_state['defaults'].img2img.sampling_steps.value, - min_value=st.session_state['defaults'].img2img.sampling_steps.min_value, - max_value=st.session_state['defaults'].img2img.sampling_steps.max_value, - step=st.session_state['defaults'].img2img.sampling_steps.step) - + + + st.session_state["sampling_steps"] = st.number_input("Sampling Steps", value=st.session_state['defaults'].img2img.sampling_steps.value, + min_value=st.session_state['defaults'].img2img.sampling_steps.min_value, + step=st.session_state['defaults'].img2img.sampling_steps.step) + sampler_name_list = ["k_lms", "k_euler", "k_euler_a", "k_dpm_2", "k_dpm_2_a", "k_heun", "PLMS", "DDIM"] - st.session_state["sampler_name"] = st.selectbox("Sampling method",sampler_name_list, - index=sampler_name_list.index(st.session_state['defaults'].img2img.sampler_name), help="Sampling method to use.") - + st.session_state["sampler_name"] = st.selectbox("Sampling method",sampler_name_list, + index=sampler_name_list.index(st.session_state['defaults'].img2img.sampler_name), help="Sampling method to use.") + width = st.slider("Width:", min_value=st.session_state['defaults'].img2img.width.min_value, max_value=st.session_state['defaults'].img2img.width.max_value, value=st.session_state['defaults'].img2img.width.value, step=st.session_state['defaults'].img2img.width.step) height = st.slider("Height:", min_value=st.session_state['defaults'].img2img.height.min_value, max_value=st.session_state['defaults'].img2img.height.max_value, value=st.session_state['defaults'].img2img.height.value, step=st.session_state['defaults'].img2img.height.step) - seed = st.text_input("Seed:", value=st.session_state['defaults'].img2img.seed, help=" The seed to use, if left blank a random seed will be generated.") - + seed = st.text_input("Seed:", value=st.session_state['defaults'].img2img.seed, help=" The seed to use, if left blank a random seed will be generated.") + cfg_scale = st.slider("CFG (Classifier Free Guidance Scale):", min_value=st.session_state['defaults'].img2img.cfg_scale.min_value, max_value=st.session_state['defaults'].img2img.cfg_scale.max_value, value=st.session_state['defaults'].img2img.cfg_scale.value, step=st.session_state['defaults'].img2img.cfg_scale.step, help="How strongly the image should follow the prompt.") - - st.session_state["denoising_strength"] = st.slider("Denoising Strength:", value=st.session_state['defaults'].img2img.denoising_strength.value, + + st.session_state["denoising_strength"] = st.slider("Denoising Strength:", value=st.session_state['defaults'].img2img.denoising_strength.value, min_value=st.session_state['defaults'].img2img.denoising_strength.min_value, max_value=st.session_state['defaults'].img2img.denoising_strength.max_value, - step=st.session_state['defaults'].img2img.denoising_strength.step) - - + step=st.session_state['defaults'].img2img.denoising_strength.step) + + mask_expander = st.empty() with mask_expander.expander("Mask"): mask_mode_list = ["Mask", "Inverted mask", "Image alpha"] @@ -424,8 +423,8 @@ def layout(): \"Inverted mask\" modifies the image where the mask is black. \"Image alpha\" modifies the image where the image is transparent." ) mask_mode = mask_mode_list.index(mask_mode) - - + + noise_mode_list = ["Seed", "Find Noise", "Matched Noise", "Find+Matched Noise"] noise_mode = st.selectbox( "Noise Mode", noise_mode_list, @@ -435,58 +434,54 @@ def layout(): find_noise_steps = st.slider("Find Noise Steps", value=st.session_state['defaults'].img2img.find_noise_steps.value, min_value=st.session_state['defaults'].img2img.find_noise_steps.min_value, max_value=st.session_state['defaults'].img2img.find_noise_steps.max_value, step=st.session_state['defaults'].img2img.find_noise_steps.step) - + with st.expander("Batch Options"): - batch_count = st.slider("Batch count.", min_value=st.session_state['defaults'].img2img.batch_count.min_value, max_value=st.session_state['defaults'].img2img.batch_count.max_value, - value=st.session_state['defaults'].img2img.batch_count.value, step=st.session_state['defaults'].img2img.batch_count.step, - help="How many iterations or batches of images to generate in total.") - - batch_size = st.slider("Batch size", min_value=st.session_state['defaults'].img2img.batch_size.min_value, max_value=st.session_state['defaults'].img2img.batch_size.max_value, - value=st.session_state['defaults'].img2img.batch_size.value, step=st.session_state['defaults'].img2img.batch_size.step, - help="How many images are at once in a batch. It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes to finish \ - generation as more images are generated at once.Default: 1") - + st.session_state["batch_count"] = int(st.text_input("Batch count.", value=st.session_state['defaults'].img2img.batch_count.value, + help="How many iterations or batches of images to generate in total.")) + + st.session_state["batch_size"] = int(st.text_input("Batch size", value=st.session_state.defaults.img2img.batch_size.value, + help="How many images are at once in a batch.\ + It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes to finish generation as more images are generated at once.\ + Default: 1")) + with st.expander("Preview Settings"): - st.session_state["update_preview"] = st.checkbox("Update Image Preview", value=st.session_state['defaults'].img2img.update_preview, - help="If enabled the image preview will be updated during the generation instead of at the end. \ - You can use the Update Preview \Frequency option bellow to customize how frequent it's updated. \ - By default this is enabled and the frequency is set to 1 step.") - + st.session_state["update_preview"] = st.session_state["defaults"].general.update_preview st.session_state["update_preview_frequency"] = st.text_input("Update Image Preview Frequency", value=st.session_state['defaults'].img2img.update_preview_frequency, help="Frequency in steps at which the the preview image is updated. By default the frequency \ - is set to 1 step.") - # + is set to 1 step.") + # with st.expander("Advanced"): - separate_prompts = st.checkbox("Create Prompt Matrix.", value=st.session_state['defaults'].img2img.separate_prompts, - help="Separate multiple prompts using the `|` character, and get all combinations of them.") - normalize_prompt_weights = st.checkbox("Normalize Prompt Weights.", value=st.session_state['defaults'].img2img.normalize_prompt_weights, - help="Ensure the sum of all weights add up to 1.0") - loopback = st.checkbox("Loopback.", value=st.session_state['defaults'].img2img.loopback, help="Use images from previous batch when creating next batch.") - random_seed_loopback = st.checkbox("Random loopback seed.", value=st.session_state['defaults'].img2img.random_seed_loopback, help="Random loopback seed") - img2img_mask_restore = st.checkbox("Only modify regenerated parts of image", - value=st.session_state['defaults'].img2img.mask_restore, - help="Enable to restore the unmasked parts of the image with the input, may not blend as well but preserves detail") - save_individual_images = st.checkbox("Save individual images.", value=st.session_state['defaults'].img2img.save_individual_images, - help="Save each image generated before any filter or enhancement is applied.") - save_grid = st.checkbox("Save grid",value=st.session_state['defaults'].img2img.save_grid, help="Save a grid with all the images generated into a single image.") - group_by_prompt = st.checkbox("Group results by prompt", value=st.session_state['defaults'].img2img.group_by_prompt, - help="Saves all the images with the same prompt into the same folder. \ - When using a prompt matrix each prompt combination will have its own folder.") - write_info_files = st.checkbox("Write Info file", value=st.session_state['defaults'].img2img.write_info_files, - help="Save a file next to the image with informartion about the generation.") - save_as_jpg = st.checkbox("Save samples as jpg", value=st.session_state['defaults'].img2img.save_as_jpg, help="Saves the images as jpg instead of png.") - + with st.expander("Output Settings"): + separate_prompts = st.checkbox("Create Prompt Matrix.", value=st.session_state['defaults'].img2img.separate_prompts, + help="Separate multiple prompts using the `|` character, and get all combinations of them.") + normalize_prompt_weights = st.checkbox("Normalize Prompt Weights.", value=st.session_state['defaults'].img2img.normalize_prompt_weights, + help="Ensure the sum of all weights add up to 1.0") + loopback = st.checkbox("Loopback.", value=st.session_state['defaults'].img2img.loopback, help="Use images from previous batch when creating next batch.") + random_seed_loopback = st.checkbox("Random loopback seed.", value=st.session_state['defaults'].img2img.random_seed_loopback, help="Random loopback seed") + img2img_mask_restore = st.checkbox("Only modify regenerated parts of image", + value=st.session_state['defaults'].img2img.mask_restore, + help="Enable to restore the unmasked parts of the image with the input, may not blend as well but preserves detail") + save_individual_images = st.checkbox("Save individual images.", value=st.session_state['defaults'].img2img.save_individual_images, + help="Save each image generated before any filter or enhancement is applied.") + save_grid = st.checkbox("Save grid",value=st.session_state['defaults'].img2img.save_grid, help="Save a grid with all the images generated into a single image.") + group_by_prompt = st.checkbox("Group results by prompt", value=st.session_state['defaults'].img2img.group_by_prompt, + help="Saves all the images with the same prompt into the same folder. \ + When using a prompt matrix each prompt combination will have its own folder.") + write_info_files = st.checkbox("Write Info file", value=st.session_state['defaults'].img2img.write_info_files, + help="Save a file next to the image with informartion about the generation.") + save_as_jpg = st.checkbox("Save samples as jpg", value=st.session_state['defaults'].img2img.save_as_jpg, help="Saves the images as jpg instead of png.") + # # check if GFPGAN, RealESRGAN and LDSR are available. if "GFPGAN_available" not in st.session_state: GFPGAN_available() - + if "RealESRGAN_available" not in st.session_state: RealESRGAN_available() - + if "LDSR_available" not in st.session_state: LDSR_available() - + if st.session_state["GFPGAN_available"] or st.session_state["RealESRGAN_available"] or st.session_state["LDSR_available"]: with st.expander("Post-Processing"): face_restoration_tab, upscaling_tab = st.tabs(["Face Restoration", "Upscaling"]) @@ -496,78 +491,93 @@ def layout(): #with st.expander("Face Restoration"): #if st.session_state["GFPGAN_available"]: #with st.expander("GFPGAN"): - st.session_state["use_GFPGAN"] = st.checkbox("Use GFPGAN", value=st.session_state['defaults'].txt2img.use_GFPGAN, + st.session_state["use_GFPGAN"] = st.checkbox("Use GFPGAN", value=st.session_state['defaults'].img2img.use_GFPGAN, help="Uses the GFPGAN model to improve faces after the generation.\ This greatly improve the quality and consistency of faces but uses\ extra VRAM. Disable if you need the extra VRAM.") - + st.session_state["GFPGAN_model"] = st.selectbox("GFPGAN model", st.session_state["GFPGAN_models"], - index=st.session_state["GFPGAN_models"].index(st.session_state['defaults'].general.GFPGAN_model)) - + index=st.session_state["GFPGAN_models"].index(st.session_state['defaults'].general.GFPGAN_model)) + #st.session_state["GFPGAN_strenght"] = st.slider("Effect Strenght", min_value=1, max_value=100, value=1, step=1, help='') - + else: - st.session_state["use_GFPGAN"] = False - + st.session_state["use_GFPGAN"] = False + with upscaling_tab: - #with st.expander("Upscaling"): - # RealESRGAN and LDSR used for upscaling. + st.session_state['us_upscaling'] = st.checkbox("Use Upscaling", value=st.session_state['defaults'].img2img.use_upscaling) + + # RealESRGAN and LDSR used for upscaling. if st.session_state["RealESRGAN_available"] or st.session_state["LDSR_available"]: - + upscaling_method_list = [] if st.session_state["RealESRGAN_available"]: upscaling_method_list.append("RealESRGAN") if st.session_state["LDSR_available"]: upscaling_method_list.append("LDSR") - + st.session_state["upscaling_method"] = st.selectbox("Upscaling Method", upscaling_method_list, index=upscaling_method_list.index(st.session_state['defaults'].general.upscaling_method)) - + if st.session_state["RealESRGAN_available"]: - # with st.expander("RealESRGAN"): - st.session_state["use_RealESRGAN"] = st.checkbox("Use RealESRGAN", value=st.session_state['defaults'].txt2img.use_RealESRGAN, - help="Uses the RealESRGAN model to upscale the images after the generation.\ - This greatly improve the quality and lets you have high resolution images but \ - uses extra VRAM. Disable if you need the extra VRAM.") - - st.session_state["RealESRGAN_model"] = st.selectbox("RealESRGAN model", st.session_state["RealESRGAN_models"], - index=st.session_state["RealESRGAN_models"].index(st.session_state['defaults'].general.RealESRGAN_model)) + with st.expander("RealESRGAN"): + if st.session_state["upscaling_method"] == "RealESRGAN" and st.session_state['us_upscaling']: + st.session_state["use_RealESRGAN"] = True + else: + st.session_state["use_RealESRGAN"] = False + + st.session_state["RealESRGAN_model"] = st.selectbox("RealESRGAN model", st.session_state["RealESRGAN_models"], + index=st.session_state["RealESRGAN_models"].index(st.session_state['defaults'].general.RealESRGAN_model)) else: st.session_state["use_RealESRGAN"] = False st.session_state["RealESRGAN_model"] = "RealESRGAN_x4plus" - - + + # if st.session_state["LDSR_available"]: - #with st.expander("LDSR"): - st.session_state["use_LDSR"] = st.checkbox("Use LDSR", value=st.session_state['defaults'].txt2img.use_LDSR, - help="Uses the LDSR model to upscale the images after the generation.\ - This greatly improve the quality and lets you have high resolution images but \ - uses extra VRAM. Disable if you need the extra VRAM.") - - st.session_state["LDSR_model"] = st.selectbox("LDSR model", st.session_state["LDSR_models"], - index=st.session_state["LDSR_models"].index(st.session_state['defaults'].general.LDSR_model)) + with st.expander("LDSR"): + if st.session_state["upscaling_method"] == "LDSR" and st.session_state['us_upscaling']: + st.session_state["use_LDSR"] = True + else: + st.session_state["use_LDSR"] = False + + st.session_state["LDSR_model"] = st.selectbox("LDSR model", st.session_state["LDSR_models"], + index=st.session_state["LDSR_models"].index(st.session_state['defaults'].general.LDSR_model)) + + st.session_state["ldsr_sampling_steps"] = int(st.text_input("Sampling Steps", value=st.session_state['defaults'].img2img.LDSR_config.sampling_steps, + help="")) + + st.session_state["preDownScale"] = int(st.text_input("PreDownScale", value=st.session_state['defaults'].img2img.LDSR_config.preDownScale, + help="")) + + st.session_state["postDownScale"] = int(st.text_input("postDownScale", value=st.session_state['defaults'].img2img.LDSR_config.postDownScale, + help="")) + + downsample_method_list = ['Nearest', 'Lanczos'] + st.session_state["downsample_method"] = st.selectbox("Downsample Method", downsample_method_list, + index=downsample_method_list.index(st.session_state['defaults'].img2img.LDSR_config.downsample_method)) + else: st.session_state["use_LDSR"] = False - st.session_state["LDSR_model"] = "model" - + st.session_state["LDSR_model"] = "model" + with st.expander("Variant"): variant_amount = st.slider("Variant Amount:", value=st.session_state['defaults'].img2img.variant_amount, min_value=0.0, max_value=1.0, step=0.01) variant_seed = st.text_input("Variant Seed:", value=st.session_state['defaults'].img2img.variant_seed, help="The seed to use when generating a variant, if left blank a random seed will be generated.") - - + + with col2_img2img_layout: editor_tab = st.tabs(["Editor"]) - + editor_image = st.empty() st.session_state["editor_image"] = editor_image - - st.form_submit_button("Refresh") - + masked_image_holder = st.empty() image_holder = st.empty() - + + st.form_submit_button("Refresh") + uploaded_images = st.file_uploader( "Upload Image", accept_multiple_files=False, type=["png", "jpg", "jpeg", "webp"], help="Upload an image which will be used for the image to image generation.", @@ -576,9 +586,9 @@ def layout(): image = Image.open(uploaded_images).convert('RGBA') new_img = image.resize((width, height)) image_holder.image(new_img) - + mask_holder = st.empty() - + uploaded_masks = st.file_uploader( "Upload Mask", accept_multiple_files=False, type=["png", "jpg", "jpeg", "webp"], help="Upload an mask image which will be used for masking the image to image generation.", @@ -592,7 +602,7 @@ def layout(): mask = Image.alpha_composite(background, mask) mask = mask.resize((width, height)) mask_holder.image(mask) - + if uploaded_images and uploaded_masks: if mask_mode != 2: final_img = new_img.copy() @@ -605,45 +615,46 @@ def layout(): elif mask_mode == 1: alpha_layer = alpha_layer.point(lambda a: a * strength) alpha_layer = ImageOps.invert(alpha_layer) - + final_img.putalpha(alpha_layer) - + with masked_image_holder.container(): st.text("Masked Image Preview") st.image(final_img) - - + + with col3_img2img_layout: result_tab = st.tabs(["Result"]) - + # create an empty container for the image, progress bar, etc so we can update it later and use session_state to hold them globally. preview_image = st.empty() st.session_state["preview_image"] = preview_image - + #st.session_state["loading"] = st.empty() - + st.session_state["progress_bar_text"] = st.empty() st.session_state["progress_bar"] = st.empty() - - + + message = st.empty() - + #if uploaded_images: #image = Image.open(uploaded_images).convert('RGB') ##img_array = np.array(image) # if you want to pass it to OpenCV #new_img = image.resize((width, height)) #st.image(new_img, use_column_width=True) - - + + if generate_button: #print("Loading models") # load the models when we hit the generate button for the first time, it wont be loaded after that so dont worry. with col3_img2img_layout: with hc.HyLoader('Loading Models...', hc.Loaders.standard_loaders,index=[0]): - load_models(st.session_state["use_LDSR"], st.session_state["LDSR_model"], st.session_state["use_GFPGAN"], - st.session_state["GFPGAN_model"] , st.session_state["use_RealESRGAN"], - st.session_state["RealESRGAN_model"], server_state["CustomModel_available"], st.session_state["custom_model"]) - + load_models(use_LDSR=st.session_state["use_LDSR"], LDSR_model=st.session_state["LDSR_model"], + use_GFPGAN=st.session_state["use_GFPGAN"], GFPGAN_model=st.session_state["GFPGAN_model"] , + use_RealESRGAN=st.session_state["use_RealESRGAN"], RealESRGAN_model=st.session_state["RealESRGAN_model"], + CustomModel_available=server_state["CustomModel_available"], custom_model=st.session_state["custom_model"]) + if uploaded_images: image = Image.open(uploaded_images).convert('RGBA') new_img = image.resize((width, height)) @@ -652,30 +663,30 @@ def layout(): if uploaded_masks: mask = Image.open(uploaded_masks).convert('RGBA') new_mask = mask.resize((width, height)) - + try: output_images, seed, info, stats = img2img(prompt=prompt, init_info=new_img, init_info_mask=new_mask, mask_mode=mask_mode, mask_restore=img2img_mask_restore, ddim_steps=st.session_state["sampling_steps"], - sampler_name=st.session_state["sampler_name"], n_iter=batch_count, + sampler_name=st.session_state["sampler_name"], n_iter=st.session_state["batch_count"], cfg_scale=cfg_scale, denoising_strength=st.session_state["denoising_strength"], variant_seed=variant_seed, - seed=seed, noise_mode=noise_mode, find_noise_steps=find_noise_steps, width=width, - height=height, variant_amount=variant_amount, + seed=seed, noise_mode=noise_mode, find_noise_steps=find_noise_steps, width=width, + height=height, variant_amount=variant_amount, ddim_eta=st.session_state.defaults.img2img.ddim_eta, write_info_files=write_info_files, separate_prompts=separate_prompts, normalize_prompt_weights=normalize_prompt_weights, - save_individual_images=save_individual_images, save_grid=save_grid, + save_individual_images=save_individual_images, save_grid=save_grid, group_by_prompt=group_by_prompt, save_as_jpg=save_as_jpg, use_GFPGAN=st.session_state["use_GFPGAN"], GFPGAN_model=st.session_state["GFPGAN_model"], use_RealESRGAN=st.session_state["use_RealESRGAN"], RealESRGAN_model=st.session_state["RealESRGAN_model"], use_LDSR=st.session_state["use_LDSR"], LDSR_model=st.session_state["LDSR_model"], loopback=loopback ) - + #show a message when the generation is complete. message.success('Render Complete: ' + info + '; Stats: ' + stats, icon="✅") - + except (StopException, KeyError): print(f"Received Streamlit StopException") - + # this will render all the images at the end of the generation but its better if its moved to a second tab inside col2 and shown as a gallery. # use the current col2 first tab to show the preview_img and update it as its generated. #preview_image.image(output_images, width=750) diff --git a/scripts/img2txt.py b/scripts/img2txt.py index 49e4150d0..778ffe73d 100644 --- a/scripts/img2txt.py +++ b/scripts/img2txt.py @@ -152,13 +152,11 @@ def generate_caption(pil_image): #print (caption) return caption[0] - def load_list(filename): with open(filename, 'r', encoding='utf-8', errors='replace') as f: items = [line.strip() for line in f.readlines()] return items - def rank(model, image_features, text_array, top_count=1): top_count = min(top_count, len(text_array)) text_tokens = clip.tokenize([text for text in text_array]).cuda() @@ -181,9 +179,9 @@ def clear_cuda(): def batch_rank(model, image_features, text_array, batch_size=st.session_state["defaults"].img2txt.batch_size): - batch_count = len(text_array) // batch_size + batch_size = min(batch_size, len(text_array)) + batch_count = int(len(text_array) / batch_size) batches = [text_array[i*batch_size:(i+1)*batch_size] for i in range(batch_count)] - batches.append(text_array[batch_count*batch_size:]) ranks = [] for batch in batches: ranks += rank(model, image_features, batch) @@ -225,12 +223,21 @@ def interrogate(image, models): st.session_state["log_message"].code(f"Interrogating with {model_name}...", language='') if model_name not in server_state["clip_models"]: + if not st.session_state["defaults"].img2txt.keep_all_models_loaded: + model_to_delete = [] + for model in server_state["clip_models"]: + if model != model_name: + model_to_delete.append(model) + for model in model_to_delete: + del server_state["clip_models"][model] + del server_state["preprocesses"][model] + clear_cuda() if model_name == 'ViT-H-14': - server_state["clip_models"][model_name], _, server_state["preprocesses"][model_name] = open_clip.create_model_and_transforms(model_name, pretrained='laion2b_s32b_b79k', cache_dir='user_data/model_cache/clip') + server_state["clip_models"][model_name], _, server_state["preprocesses"][model_name] = open_clip.create_model_and_transforms(model_name, pretrained='laion2b_s32b_b79k', cache_dir='models/clip') elif model_name == 'ViT-g-14': - server_state["clip_models"][model_name], _, server_state["preprocesses"][model_name] = open_clip.create_model_and_transforms(model_name, pretrained='laion2b_s12b_b42k', cache_dir='user_data/model_cache/clip') + server_state["clip_models"][model_name], _, server_state["preprocesses"][model_name] = open_clip.create_model_and_transforms(model_name, pretrained='laion2b_s12b_b42k', cache_dir='models/clip') else: - server_state["clip_models"][model_name], server_state["preprocesses"][model_name] = clip.load(model_name, device=device, download_root='user_data/model_cache/clip') + server_state["clip_models"][model_name], server_state["preprocesses"][model_name] = clip.load(model_name, device=device, download_root='models/clip') server_state["clip_models"][model_name] = server_state["clip_models"][model_name].cuda().eval() images = server_state["preprocesses"][model_name](image).unsqueeze(0).cuda() @@ -316,16 +323,18 @@ def img2txt(): models = [] - if st.session_state["ViTB32"]: - models.append('ViT-B/32') - if st.session_state['ViTB16']: - models.append('ViT-B/16') - if st.session_state["ViTL14"]: + if st.session_state["ViT-L/14"]: models.append('ViT-L/14') if st.session_state["ViT-H-14"]: models.append('ViT-H-14') if st.session_state["ViT-g-14"]: models.append('ViT-g-14') + + if st.session_state["ViTB32"]: + models.append('ViT-B/32') + if st.session_state['ViTB16']: + models.append('ViT-B/16') + if st.session_state["ViTL14_336px"]: models.append('ViT-L/14@336px') if st.session_state["RN101"]: @@ -337,7 +346,7 @@ def img2txt(): if st.session_state["RN50x16"]: models.append('RN50x16') if st.session_state["RN50x64"]: - models.append('RN50x64') + models.append('RN50x64') # if str(image_path_or_url).startswith('http://') or str(image_path_or_url).startswith('https://'): #image = Image.open(requests.get(image_path_or_url, stream=True).raw).convert('RGB') @@ -375,14 +384,16 @@ def layout(): #st.subheader("Input Image") st.session_state["uploaded_image"] = st.file_uploader('Input Image', type=['png', 'jpg', 'jpeg'], accept_multiple_files=True) - st.subheader("CLIP models") - with st.expander("Stable Diffusion", expanded=True): - st.session_state["ViTL14"] = st.checkbox("ViTL14", value=True, help="For StableDiffusion you can just use ViTL14.") - - with st.expander("Others"): - st.info("For DiscoDiffusion and JAX enable all the same models here as you intend to use when generating your images.") + with st.expander("CLIP models", expanded=True): + st.session_state["ViT-L/14"] = st.checkbox("ViT-L/14", value=True, help="ViT-L/14 model.") st.session_state["ViT-H-14"] = st.checkbox("ViT-H-14", value=False, help="ViT-H-14 model.") st.session_state["ViT-g-14"] = st.checkbox("ViT-g-14", value=False, help="ViT-g-14 model.") + + + + with st.expander("Others"): + st.info("For DiscoDiffusion and JAX enable all the same models here as you intend to use when generating your images.") + st.session_state["ViTL14_336px"] = st.checkbox("ViTL14_336px", value=False, help="ViTL14_336px model.") st.session_state["ViTB16"] = st.checkbox("ViTB16", value=False, help="ViTB16 model.") st.session_state["ViTB32"] = st.checkbox("ViTB32", value=False, help="ViTB32 model.") @@ -390,8 +401,8 @@ def layout(): st.session_state["RN50x4"] = st.checkbox("RN50x4", value=False, help="RN50x4 model.") st.session_state["RN50x16"] = st.checkbox("RN50x16", value=False, help="RN50x16 model.") st.session_state["RN50x64"] = st.checkbox("RN50x64", value=False, help="RN50x64 model.") - st.session_state["RN101"] = st.checkbox("RN101", value=False, help="RN101 model.") - + st.session_state["RN101"] = st.checkbox("RN101", value=False, help="RN101 model.") + # # st.subheader("Logs:") @@ -448,14 +459,14 @@ def layout(): if generate_button: # if model, pipe, RealESRGAN or GFPGAN is in st.session_state remove the model and pipe form session_state so that they are reloaded. - if "model" in st.session_state and st.session_state["defaults"].general.optimized: - del st.session_state["model"] - if "pipe" in st.session_state and st.session_state["defaults"].general.optimized: - del st.session_state["pipe"] - if "RealESRGAN" in st.session_state and st.session_state["defaults"].general.optimized: - del st.session_state["RealESRGAN"] - if "GFPGAN" in st.session_state and st.session_state["defaults"].general.optimized: - del st.session_state["GFPGAN"] + if "model" in server_state and st.session_state["defaults"].general.optimized: + del server_state["model"] + if "pipe" in server_state and st.session_state["defaults"].general.optimized: + del server_state["pipe"] + if "RealESRGAN" in server_state and st.session_state["defaults"].general.optimized: + del server_state["RealESRGAN"] + if "GFPGAN" in server_state and st.session_state["defaults"].general.optimized: + del server_state["GFPGAN"] # run clip interrogator img2txt() diff --git a/scripts/modeldownload.py b/scripts/modeldownload.py index f98fee585..acaa1d319 100644 --- a/scripts/modeldownload.py +++ b/scripts/modeldownload.py @@ -25,30 +25,30 @@ def updateModels(): # os.system('wget https://cdn-lfs.huggingface.co/repos/ab/41/ab41ccb635cd5bd124c8eac1b5796b4f64049c9453c4e50d51819468ca69ceb8/14749efc0ae8ef0329391ad4436feb781b402f4fece4883c7ad8d10556d8a36a?response-content-disposition=attachment%3B%20filename%3D%22modelfull.ckpt%22 -o models/ldm/stable-diffusion-v1/model.ckpt') # os.rename('models/ldm/stable-diffusion-v1/modelfull.ckpt','models/ldm/stable-diffusion-v1/model.ckpt') - if op.exists('src/realesrgan/experiments/pretrained_models/RealESRGAN_x4plus.pth') and op.exists('src/realesrgan/experiments/pretrained_models/RealESRGAN_x4plus_anime_6B.pth'): + if op.exists('models/realesrgan/RealESRGAN_x4plus.pth') and op.exists('models/realesrgan/RealESRGAN_x4plus_anime_6B.pth'): pass else: - os.system('wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P src/realesrgan/experiments/pretrained_models') - os.system('wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth -P src/realesrgan/experiments/pretrained_models') + os.system('wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth -P models/realesrgan') + os.system('wget https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth -P models/realesrgan') - if op.exists('src/gfpgan/experiments/pretrained_models/GFPGANv1.3.pth'): + if op.exists('models/gfpgan/GFPGANv1.3.pth'): pass else: - os.system('wget https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth -P src/gfpgan/experiments/pretrained_models') + os.system('wget https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth -P models/gfpgan') - if op.exists('src/latent-diffusion'): + if op.exists('models/ldsr'): pass else: os.system('git clone https://github.com/devilismyfriend/latent-diffusion.git') - os.system('mv latent-diffusion src/latent-diffusion') + os.system('mv latent-diffusion models/ldsr') - if op.exists('src/latent-diffusion/experiments/pretrained_models/model.ckpt'): + if op.exists('models/ldsr/model.ckpt'): pass else: - os.mkdir('src/latent-diffusion/experiments') - os.mkdir('src/latent-diffusion/experiments/pretrained_models') - os.system('wget https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1 -o src/latent-diffusion/experiments/pretrained_models/project.yaml') - # os.rename('src/latent-diffusion/experiments/pretrained_models/index.html?dl=1', 'src/latent-diffusion/experiments/pretrained_models/project.yaml') - os.system('wget https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1 -o src/latent-diffusion/experiments/pretrained_models/model.ckpt') - # os.rename('src/latent-diffusion/experiments/pretrained_models/index.html?dl=1', 'src/latent-diffusion/experiments/pretrained_models/model.ckpt') + os.mkdir('models/ldsr/experiments') + os.mkdir('models/ldsr') + os.system('wget https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1 -o models/ldsr/project.yaml') + # os.rename('models/ldsr/index.html?dl=1', 'models/ldsr/project.yaml') + os.system('wget https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1 -o models/ldsr/model.ckpt') + # os.rename('models/ldsr/index.html?dl=1', 'models/ldsr/model.ckpt') \ No newline at end of file diff --git a/scripts/relauncher.py b/scripts/relauncher.py index 3ce2e9173..e56cde990 100644 --- a/scripts/relauncher.py +++ b/scripts/relauncher.py @@ -40,6 +40,7 @@ additional_arguments = "" parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument('-v', '--verbosity', action='count', default=0, help="The default logging level is ERROR or higher. This value increases the amount of logging seen in your screen") parser.add_argument('-n', '--horde_name', action="store", required=False, type=str, help="The server name for the Horde. It will be shown to the world and there can be only one.") parser.add_argument('--bridge', action="store_true", required=False, default=False, help="When specified, start the stable horde bridge instead of the webui.") args = parser.parse_args() @@ -48,6 +49,10 @@ additional_arguments += f' --bridge' if args.horde_name: additional_arguments += f' --horde_name "{args.horde_name}"' + if args.verbosity: + for iter in range(args.verbosity): + additional_arguments += ' -v' + print(f"Additional args: {additional_arguments}") diff --git a/scripts/scn2img.py b/scripts/scn2img.py index ef96d3efd..55837ee36 100644 --- a/scripts/scn2img.py +++ b/scripts/scn2img.py @@ -15,6 +15,7 @@ import torch from frontend.job_manager import JobInfo +from frontend.image_metadata import ImageMetadata scn2img_cache = { "seed": None, @@ -279,8 +280,17 @@ def get_scn2img(MemUsageMonitor:Type, save_sample:Callable, get_next_sequence_nu opt = opt or argparse.Namespace() def next_seed(s): - s = seed_to_int(s) - return random.Random(s).randint(0, 2**32 - 1) + return random.Random(seed_to_int(s)).randint(0, 2**32 - 1) + + class SeedGenerator: + def __init__(self, seed): + self._seed = seed_to_int(seed) + def next_seed(self): + seed = self._seed + self._seed = next_seed(self._seed) + return seed + def peek_seed(self): + return self._seed def scn2img(prompt: str, toggles: List[int], seed: Union[int, str, None], fp = None, job_info: JobInfo = None): global scn2img_cache @@ -336,11 +346,6 @@ def log_exception(*args, **kwargs): log_info("scn2img_cache") log_info(list(scn2img_cache["cache"].keys())) - def gen_seeds(seed): - while True: - yield seed - seed = next_seed(seed) - def is_seed_invalid(s): result = ( (type(s) != int) @@ -631,20 +636,28 @@ def parse_scene_args(scene): return scene - def save_sample_scn2img(img, obj): + def save_sample_scn2img(img, obj, name, seed): if img is None: return base_count = get_next_sequence_number(outpath) filename = "[SEED]_result" filename = f"{base_count:05}-" + filename filename = filename.replace("[SEED]", str(seed)) - save_sample(img, outpath, filename, jpg_sample, None, None, None, None, None, False, None, None, None, None, None, None, None, None, None, False) - if write_info_files or write_sample_info_to_log_file: - info_dict = { - "prompt": prompt, - "scene_object": str(obj), - "seed": seed - } + wrapped = SceneObject( + func=name, + title=obj.title, + args={"seed":seed}, + depth=obj.depth-1, + children=[obj] + ) + info_dict = { + "prompt": prompt, + "scene_object": str(wrapped), + "seed": seed + } + metadata = ImageMetadata(prompt=info_dict["scene_object"], seed=seed, width=img.size[0], height=img.size[0]) + ImageMetadata.set_on_image(img, metadata) + save_sample(img, outpath, filename, jpg_sample, None, None, None, None, None, False, None, None, None, None, None, None, None, None, None, False, False) if write_info_files: filename_i = os.path.join(outpath, filename) with open(f"{filename_i}.yaml", "w", encoding="utf8") as f: @@ -931,11 +944,11 @@ def output_img(img): output_image_set.add(img_id) output_images.append(img) - def render_intermediate(img, obj): + def render_intermediate(img, obj, name, seed): if output_intermediates: output_img(img) if not skip_save: - save_sample_scn2img(img, obj) + save_sample_scn2img(img, obj, name, seed) return img def render_3d(img, obj): @@ -944,7 +957,7 @@ def render_3d(img, obj): if obj["transform3d"] == True: d2r = math.pi / 180.0 depth_model = obj["transform3d_depth_model"] if "transform3d_depth_model" in obj else 1 - depth_near = obj["transform3d_depth_near"] if "transform3d_depth_near" in obj else 0.1 + depth_near = obj["transform3d_depth_near"] if "transform3d_depth_near" in obj else 0.1 depth_scale = obj["transform3d_depth_scale"] if "transform3d_depth_scale" in obj else 1.0 from_hfov = obj["transform3d_from_hfov"] if "transform3d_from_hfov" in obj else (45*d2r) from_pose = obj["transform3d_from_pose"] if "transform3d_from_pose" in obj else (0,0,0, 0,0,0) @@ -983,6 +996,7 @@ def render_3d(img, obj): return img def render_image(seeds, obj): + start_seed = seeds.peek_seed() img = create_image(obj["size"], obj["color"]) img = blend_objects( seeds, @@ -993,7 +1007,7 @@ def render_image(seeds, obj): img = resize_image(img, obj["resize"], obj["crop"]) # if img is None: log_warn(f"result of render_image({obj}) is None") img = render_3d(img, obj) - img = render_intermediate(img, obj) + img = render_intermediate(img, obj, "render_image", start_seed) return img def prepare_img2img_kwargs(seeds, obj, img): @@ -1025,7 +1039,7 @@ def prepare_img2img_kwargs(seeds, obj, img): if is_seed_valid(s): img2img_kwargs["seed"] = int(s) else: - img2img_kwargs["seed"] = next(seeds) + img2img_kwargs["seed"] = seeds.next_seed() log_info('img2img_kwargs["seed"]', img2img_kwargs["seed"]) @@ -1047,7 +1061,7 @@ def prepare_img2img_kwargs(seeds, obj, img): "image": img.convert("RGB").convert("RGBA"), "mask": img.getchannel("A") } - # render_intermediate(img2img_kwargs["init_info_mask"]["mask"].convert("RGBA"), obj) + # render_intermediate(img2img_kwargs["init_info_mask"]["mask"].convert("RGBA"), obj, "img2img_init_info_mask", start_seed) log_info("img2img_kwargs") log_info(img2img_kwargs) @@ -1079,7 +1093,7 @@ def prepare_txt2img_kwargs(seeds, obj): if is_seed_valid(s): txt2img_kwargs["seed"] = int(s) else: - txt2img_kwargs["seed"] = next(seeds) + txt2img_kwargs["seed"] = seeds.next_seed() log_info('txt2img_kwargs["seed"]', txt2img_kwargs["seed"]) @@ -1102,6 +1116,7 @@ def prepare_txt2img_kwargs(seeds, obj): return txt2img_kwargs def render_img2img(seeds, obj): + start_seed = seeds.peek_seed() global scn2img_cache if obj["size"] is None: obj["size"] = (img2img_defaults["width"], img2img_defaults["height"]) @@ -1112,7 +1127,7 @@ def render_img2img(seeds, obj): obj.children ) img = render_mask(seeds, obj, img) - img = render_intermediate(img, obj) + img = render_intermediate(img, obj, "render_img2img_input", start_seed) img2img_kwargs = prepare_img2img_kwargs(seeds, obj, img) @@ -1161,10 +1176,11 @@ def render_img2img(seeds, obj): img = resize_image(img, obj["resize"], obj["crop"]) if img is None: log_warn(f"result of render_img2img({obj}) is None") img = render_3d(img, obj) - img = render_intermediate(img, obj) + img = render_intermediate(img, obj, "render_img2img", start_seed) return img def render_txt2img(seeds, obj): + start_seed = seeds.peek_seed() global scn2img_cache txt2img_kwargs = prepare_txt2img_kwargs(seeds, obj) @@ -1213,14 +1229,16 @@ def render_txt2img(seeds, obj): img = resize_image(img, obj["resize"], obj["crop"]) if img is None: log_warn(f"result of render_txt2img({obj}) is None") img = render_3d(img, obj) - img = render_intermediate(img, obj) + img = render_intermediate(img, obj, "render_txt2img", start_seed) return img def render_object(seeds, obj): # log_trace(f"render_object({str(obj)})") if "initial_seed" in obj: - seeds = gen_seeds(obj["initial_seed"]) + # create new generator rather than resetting current generator, + # so that seeds generator from function argument is not changed. + seeds = SeedGenerator(obj["initial_seed"]) if obj.func == "scene": assert(len(obj.children) == 1) @@ -1240,7 +1258,9 @@ def render_scn2img(seeds, obj): result = [] if "initial_seed" in obj: - seeds = gen_seeds(obj["initial_seed"]) + # create new generator rather than resetting current generator, + # so that seeds generator from function argument is not changed. + seeds = SeedGenerator(obj["initial_seed"]) if obj.func == "scn2img": # Note on seed generation and for-loops instead of @@ -1257,6 +1277,7 @@ def render_scn2img(seeds, obj): result.append(render_object(seeds, obj)) return result + start_seed = seeds.peek_seed() for img in render_scn2img(seeds, scene): if output_intermediates: # img already in output, do nothing here @@ -1267,7 +1288,7 @@ def render_scn2img(seeds, obj): if skip_save: # individual image save was skipped, # we need to save them now - save_sample_scn2img(img, scene) + save_sample_scn2img(img, scene, "render_scene", start_seed) return output_images @@ -1285,7 +1306,7 @@ def render_scn2img(seeds, obj): log_info(scene) # log_info("comments", comments) - render_scene(output_images, scene, gen_seeds(seed)) + render_scene(output_images, scene, SeedGenerator(seed)) log_info("output_images", output_images) # log_info("comments", comments) diff --git a/scripts/sd_utils.py b/scripts/sd_utils.py index 50707e922..2b12425eb 100644 --- a/scripts/sd_utils.py +++ b/scripts/sd_utils.py @@ -12,9 +12,10 @@ # GNU Affero General Public License for more details. # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . +# along with this program. If not, see . # base webui import and utils. #from webui_streamlit import st +import gfpgan import hydralit as st @@ -65,7 +66,7 @@ from ldm.util import ismap -# Temp imports +# Temp imports #from basicsr.utils.registry import ARCH_REGISTRY @@ -82,7 +83,7 @@ # remove some annoying deprecation warnings that show every now and then. warnings.filterwarnings("ignore", category=DeprecationWarning) -warnings.filterwarnings("ignore", category=UserWarning) +warnings.filterwarnings("ignore", category=UserWarning) # this is a fix for Windows users. Without it, javascript files will be served with text/html content-type and the bowser will not show any UI mimetypes.init() @@ -94,16 +95,19 @@ if not "defaults" in st.session_state: st.session_state["defaults"] = {} - + st.session_state["defaults"] = OmegaConf.load("configs/webui/webui_streamlit.yaml") if (os.path.exists("configs/webui/userconfig_streamlit.yaml")): user_defaults = OmegaConf.load("configs/webui/userconfig_streamlit.yaml") - st.session_state["defaults"] = OmegaConf.merge(st.session_state["defaults"], user_defaults) + try: + st.session_state["defaults"] = OmegaConf.merge(st.session_state["defaults"], user_defaults) + except KeyError: + st.experimental_rerun() else: OmegaConf.save(config=st.session_state.defaults, f="configs/webui/userconfig_streamlit.yaml") loaded = OmegaConf.load("configs/webui/userconfig_streamlit.yaml") - assert st.session_state.defaults == loaded + assert st.session_state.defaults == loaded if (os.path.exists(".streamlit/config.toml")): st.session_state["streamlit_config"] = toml.load(".streamlit/config.toml") @@ -112,7 +116,7 @@ if os.path.exists("scripts/modeldownload.py"): import modeldownload modeldownload.updateModels() - + # #app = st.HydraApp(title='Stable Diffusion WebUI', favicon="", sidebar_state="expanded", #hide_streamlit_markers=False, allow_url_nav=True , clear_cross_app_sessions=False) @@ -155,7 +159,7 @@ if save_quality < 0: # e.g. webp:-100 for lossless mode save_lossless = True save_quality = abs(save_quality) - + # this should force GFPGAN and RealESRGAN onto the selected gpu as well os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = str(st.session_state["defaults"].general.gpu) @@ -164,7 +168,7 @@ # # functions to load css locally OR remotely starts here. Options exist for future flexibility. Called as st.markdown with unsafe_allow_html as css injection -# TODO, maybe look into async loading the file especially for remote fetching +# TODO, maybe look into async loading the file especially for remote fetching def local_css(file_name): with open(file_name) as f: st.markdown(f'', unsafe_allow_html=True) @@ -177,7 +181,7 @@ def load_css(isLocal, nameOrURL): local_css(nameOrURL) else: remote_css(nameOrURL) - + def set_page_title(title): """ Simple function to allows us to change the title dynamically. @@ -206,7 +210,7 @@ def set_page_title(title): title.text = '{title}' " /> """) - + def human_readable_size(size, decimal_places=3): """Return a human readable size from bytes.""" for unit in ['B','KB','MB','GB','TB']: @@ -216,15 +220,15 @@ def human_readable_size(size, decimal_places=3): return f"{size:.{decimal_places}f}{unit}" -def load_models(use_LDSR = False, LDSR_model='model', use_GFPGAN=False, GFPGAN_model='GFPGANv1.3', use_RealESRGAN=False, RealESRGAN_model="RealESRGAN_x4plus", +def load_models(use_LDSR = False, LDSR_model='model', use_GFPGAN=False, GFPGAN_model='GFPGANv1.4', use_RealESRGAN=False, RealESRGAN_model="RealESRGAN_x4plus", CustomModel_available=False, custom_model="Stable Diffusion v1.4"): """Load the different models. We also reuse the models that are already in memory to speed things up instead of loading them again. """ - + print ("Loading models.") if "progress_bar_text" in st.session_state: st.session_state["progress_bar_text"].text("") - + # Generate random run ID # Used to link runs linked w/ continue_prev_run which is not yet implemented @@ -241,7 +245,7 @@ def load_models(use_LDSR = False, LDSR_model='model', use_GFPGAN=False, GFPGAN_m else: if "LDSR" in server_state: del server_state["LDSR"] - + # Load GFPGAN if os.path.exists(st.session_state["defaults"].general.LDSR_dir): try: @@ -250,11 +254,11 @@ def load_models(use_LDSR = False, LDSR_model='model', use_GFPGAN=False, GFPGAN_m except Exception: import traceback print(f"Error loading LDSR:", file=sys.stderr) - print(traceback.format_exc(), file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) else: if "LDSR" in server_state: - del server_state["LDSR"] - + del server_state["LDSR"] + with server_state_lock["GFPGAN"]: if use_GFPGAN: if "GFPGAN" in server_state and server_state["GFPGAN"].name == GFPGAN_model: @@ -262,7 +266,7 @@ def load_models(use_LDSR = False, LDSR_model='model', use_GFPGAN=False, GFPGAN_m else: if "GFPGAN" in server_state: del server_state["GFPGAN"] - + # Load GFPGAN if os.path.exists(st.session_state["defaults"].general.GFPGAN_dir): try: @@ -271,40 +275,40 @@ def load_models(use_LDSR = False, LDSR_model='model', use_GFPGAN=False, GFPGAN_m except Exception: import traceback print(f"Error loading GFPGAN:", file=sys.stderr) - print(traceback.format_exc(), file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) else: if "GFPGAN" in server_state: - del server_state["GFPGAN"] + del server_state["GFPGAN"] with server_state_lock["RealESRGAN"]: if use_RealESRGAN: if "RealESRGAN" in server_state and server_state["RealESRGAN"].model.name == RealESRGAN_model: print("RealESRGAN already loaded") else: - #Load RealESRGAN + #Load RealESRGAN try: # We first remove the variable in case it has something there, # some errors can load the model incorrectly and leave things in memory. del server_state["RealESRGAN"] except KeyError: pass - + if os.path.exists(st.session_state["defaults"].general.RealESRGAN_dir): # st.session_state is used for keeping the models in memory across multiple pages or runs. server_state["RealESRGAN"] = load_RealESRGAN(RealESRGAN_model) print("Loaded RealESRGAN with model "+ server_state["RealESRGAN"].model.name) - + else: if "RealESRGAN" in server_state: - del server_state["RealESRGAN"] + del server_state["RealESRGAN"] with server_state_lock["model"], server_state_lock["modelCS"], server_state_lock["modelFS"], server_state_lock["loaded_model"]: - + if "model" in server_state: if "model" in server_state and server_state["loaded_model"] == custom_model: - # TODO: check if the optimized mode was changed? + # TODO: check if the optimized mode was changed? print("Model already loaded") - + return else: try: @@ -312,46 +316,46 @@ def load_models(use_LDSR = False, LDSR_model='model', use_GFPGAN=False, GFPGAN_m del server_state["modelCS"] del server_state["modelFS"] del server_state["loaded_model"] - + except KeyError: pass - + # if the model from txt2vid is in memory we need to remove it to improve performance. with server_state_lock["pipe"]: if "pipe" in server_state: - del server_state["pipe"] - + del server_state["pipe"] + if "textual_inversion" in st.session_state: del st.session_state['textual_inversion'] - + # At this point the model is either # not loaded yet or have been evicted: # load new model into memory server_state["custom_model"] = custom_model - + config, device, model, modelCS, modelFS = load_sd_model(custom_model) - + server_state["device"] = device server_state["model"] = model - + server_state["modelCS"] = modelCS server_state["modelFS"] = modelFS server_state["loaded_model"] = custom_model - - #trying to disable multiprocessing as it makes it so streamlit cant stop when the + + #trying to disable multiprocessing as it makes it so streamlit cant stop when the # model is loaded in memory and you need to kill the process sometimes. - + server_state["model"].args.use_multiprocessing_for_evaluation = False - - + + if st.session_state.defaults.general.enable_attention_slicing: - server_state["model"].enable_attention_slicing() - - if st.session_state.defaults.general.enable_minimal_memory_usage: - server_state["model"].enable_minimal_memory_usage() - + server_state["model"].enable_attention_slicing() + + if st.session_state.defaults.general.enable_minimal_memory_usage: + server_state["model"].enable_minimal_memory_usage() + print("Model loaded.") - + return True @@ -401,7 +405,7 @@ def run(self): except: print(f"[{self.name}] Unable to initialize NVIDIA management. No memory stats. \n") return - print(f"[{self.name}] Recording max memory usage...\n") + print(f"[{self.name}] Recording memory usage...\n") # Missing context #handle = pynvml.nvmlDeviceGetHandleByIndex(st.session_state['defaults'].general.gpu) handle = pynvml.nvmlDeviceGetHandleByIndex(0) @@ -485,9 +489,9 @@ def _fft2(data): out_fft = np.zeros((data.shape[0], data.shape[1]), dtype=np.complex128) out_fft[:,:] = np.fft.fft2(np.fft.fftshift(data),norm="ortho") out_fft[:,:] = np.fft.ifftshift(out_fft[:,:]) - + return out_fft - + def _ifft2(data): if data.ndim > 2: # has channels out_ifft = np.zeros((data.shape[0], data.shape[1], data.shape[2]), dtype=np.complex128) @@ -499,14 +503,14 @@ def _ifft2(data): out_ifft = np.zeros((data.shape[0], data.shape[1]), dtype=np.complex128) out_ifft[:,:] = np.fft.ifft2(np.fft.fftshift(data),norm="ortho") out_ifft[:,:] = np.fft.ifftshift(out_ifft[:,:]) - + return out_ifft - + def _get_gaussian_window(width, height, std=3.14, mode=0): window_scale_x = float(width / min(width, height)) window_scale_y = float(height / min(width, height)) - + window = np.zeros((width, height)) x = (np.arange(width) / width * 2. - 1.) * window_scale_x for y in range(height): @@ -515,7 +519,7 @@ def _get_gaussian_window(width, height, std=3.14, mode=0): window[:, y] = np.exp(-(x**2+fy**2) * std) else: window[:, y] = (1/((x**2+1.) * (fy**2+1.))) ** (std/3.14) # hey wait a minute that's not gaussian - + return window def _get_masked_window_rgb(np_mask_grey, hardness=1.): @@ -528,14 +532,14 @@ def _get_masked_window_rgb(np_mask_grey, hardness=1.): np_mask_rgb[:,:,c] = hardened[:] return np_mask_rgb -def get_matched_noise(_np_src_image, np_mask_rgb, noise_q, color_variation): +def get_matched_noise(_np_src_image, np_mask_rgb, noise_q, color_variation): """ Explanation: Getting good results in/out-painting with stable diffusion can be challenging. Although there are simpler effective solutions for in-painting, out-painting can be especially challenging because there is no color data in the masked area to help prompt the generator. Ideally, even for in-painting we'd like work effectively without that data as well. Provided here is my take on a potential solution to this problem. - + By taking a fourier transform of the masked src img we get a function that tells us the presence and orientation of each feature scale in the unmasked src. Shaping the init/seed noise for in/outpainting to the same distribution of feature scales, orientations, and positions increases output coherence by helping keep features aligned. This technique is applicable to any continuous generation task such as audio or video, each of which can @@ -543,61 +547,61 @@ def get_matched_noise(_np_src_image, np_mask_rgb, noise_q, color_variation): or stereo sound the "color tone" or histogram of the seed noise can be matched to improve quality (using scikit-image currently) This method is quite robust and has the added benefit of being fast independently of the size of the out-painted area. The effects of this method include things like helping the generator integrate the pre-existing view distance and camera angle. - + Carefully managing color and brightness with histogram matching is also essential to achieving good coherence. - + noise_q controls the exponent in the fall-off of the distribution can be any positive number, lower values means higher detail (range > 0, default 1.) color_variation controls how much freedom is allowed for the colors/palette of the out-painted area (range 0..1, default 0.01) This code is provided as is under the Unlicense (https://unlicense.org/) Although you have no obligation to do so, if you found this code helpful please find it in your heart to credit me [parlance-zz]. - + Questions or comments can be sent to parlance@fifth-harmonic.com (https://github.com/parlance-zz/) This code is part of a new branch of a discord bot I am working on integrating with diffusers (https://github.com/parlance-zz/g-diffuser-bot) - + """ global DEBUG_MODE global TMP_ROOT_PATH - + width = _np_src_image.shape[0] height = _np_src_image.shape[1] num_channels = _np_src_image.shape[2] np_src_image = _np_src_image[:] * (1. - np_mask_rgb) - np_mask_grey = (np.sum(np_mask_rgb, axis=2)/3.) - np_src_grey = (np.sum(np_src_image, axis=2)/3.) + np_mask_grey = (np.sum(np_mask_rgb, axis=2)/3.) + np_src_grey = (np.sum(np_src_image, axis=2)/3.) all_mask = np.ones((width, height), dtype=bool) img_mask = np_mask_grey > 1e-6 ref_mask = np_mask_grey < 1e-3 - + windowed_image = _np_src_image * (1.-_get_masked_window_rgb(np_mask_grey)) windowed_image /= np.max(windowed_image) windowed_image += np.average(_np_src_image) * np_mask_rgb# / (1.-np.average(np_mask_rgb)) # rather than leave the masked area black, we get better results from fft by filling the average unmasked color #windowed_image += np.average(_np_src_image) * (np_mask_rgb * (1.- np_mask_rgb)) / (1.-np.average(np_mask_rgb)) # compensate for darkening across the mask transition area #_save_debug_img(windowed_image, "windowed_src_img") - + src_fft = _fft2(windowed_image) # get feature statistics from masked src img src_dist = np.absolute(src_fft) src_phase = src_fft / src_dist #_save_debug_img(src_dist, "windowed_src_dist") - + noise_window = _get_gaussian_window(width, height, mode=1) # start with simple gaussian noise noise_rgb = np.random.random_sample((width, height, num_channels)) - noise_grey = (np.sum(noise_rgb, axis=2)/3.) + noise_grey = (np.sum(noise_rgb, axis=2)/3.) noise_rgb *= color_variation # the colorfulness of the starting noise is blended to greyscale with a parameter for c in range(num_channels): noise_rgb[:,:,c] += (1. - color_variation) * noise_grey - + noise_fft = _fft2(noise_rgb) for c in range(num_channels): noise_fft[:,:,c] *= noise_window noise_rgb = np.real(_ifft2(noise_fft)) shaped_noise_fft = _fft2(noise_rgb) shaped_noise_fft[:,:,:] = np.absolute(shaped_noise_fft[:,:,:])**2 * (src_dist ** noise_q) * src_phase # perform the actual shaping - + brightness_variation = 0.#color_variation # todo: temporarily tieing brightness variation to color variation for now contrast_adjusted_np_src = _np_src_image[:] * (brightness_variation + 1.) - brightness_variation * 2. - + # scikit-image is used for histogram matching, very convenient! shaped_noise = np.real(_ifft2(shaped_noise_fft)) shaped_noise -= np.min(shaped_noise) @@ -605,20 +609,20 @@ def get_matched_noise(_np_src_image, np_mask_rgb, noise_q, color_variation): shaped_noise[img_mask,:] = skimage.exposure.match_histograms(shaped_noise[img_mask,:]**1., contrast_adjusted_np_src[ref_mask,:], channel_axis=1) shaped_noise = _np_src_image[:] * (1. - np_mask_rgb) + shaped_noise * np_mask_rgb #_save_debug_img(shaped_noise, "shaped_noise") - + matched_noise = np.zeros((width, height, num_channels)) matched_noise = shaped_noise[:] #matched_noise[all_mask,:] = skimage.exposure.match_histograms(shaped_noise[all_mask,:], _np_src_image[ref_mask,:], channel_axis=1) #matched_noise = _np_src_image[:] * (1. - np_mask_rgb) + matched_noise * np_mask_rgb - + #_save_debug_img(matched_noise, "matched_noise") - + """ todo: color_variation doesnt have to be a single number, the overall color tone of the out-painted area could be param controlled """ - - return np.clip(matched_noise, 0., 1.) + + return np.clip(matched_noise, 0., 1.) # @@ -672,11 +676,11 @@ def find_noise_for_image(model, device, init_image, prompt, steps=200, cond_scal # def folder_picker(label="Select:", value="", help="", folder_button_label="Select", folder_button_help="", folder_button_key=""): - """A folder picker that has a text_input field next to it and a button to select the folder. + """A folder picker that has a text_input field next to it and a button to select the folder. Returns the text_input field with the folder path.""" import tkinter as tk from tkinter import filedialog - import string + import string # Set up tkinter root = tk.Tk() @@ -697,22 +701,22 @@ def folder_picker(label="Select:", value="", help="", folder_button_label="Selec # Folder picker button #st.title('Folder Picker') #st.write('Please select a folder:') - + # Create a label and add a random number of invisible characters # to it so no two buttons inside a form are the same. #folder_button_label = ''.join(random.choice(f"{folder_button_label}") for _ in range(5)) folder_button_label = f"{str(folder_button_label)}{'‎' * random.randint(1, 500)}" clicked = folder_button_key + '‎' * random.randint(5, 500) - + #try: #clicked = folder_picker.button(folder_button_label, help=folder_button_help, key=folder_button_key) #except StreamlitAPIException: clicked = folder_picker.form_submit_button(folder_button_label, help=folder_button_help) if clicked: - dirname = dirname.text_input(label, filedialog.askdirectory(master=root), help=help) + dirname = dirname.text_input(label, filedialog.askdirectory(master=root), help=help) else: - dirname = dirname.text_input(label, value, help=help) + dirname = dirname.text_input(label, value, help=help) return dirname @@ -817,7 +821,7 @@ def get_model(self): model, step = self.load_model_from_config(config, path_ckpt) return model - + def get_custom_cond(mode): dest = "data/example_conditioning" @@ -868,7 +872,7 @@ def select_cond_path(mode): return selected_path ''' - + ''' # Google Collab stuff @@ -1031,38 +1035,33 @@ def get_cond(mode, selected_path): @torch.no_grad() - + @torch.no_grad() - - def superResolution(self,image,ddimSteps=100,preDownScale='None',postDownScale='None'): + + def superResolution(self, image, ddimSteps = 100, preDownScale = 1, postDownScale = 1, downsample_method= "Lanczos"): + """ + #Run settings + + diffusion_steps = int(ddimSteps) #@param [25, 50, 100, 250, 500, 1000] + eta = 1.0 #@param {type: 'raw'} + stride = 0 #not working atm + + # ####Scaling options: + # Downsampling to 256px first will often improve the final image and runs faster. + + # You can improve sharpness without upscaling by upscaling and then downsampling to the original size (i.e. Super Resolution) + preDownScale: Values ['None', '2', '4'] + + postDownScale: Values ['None', 'Original Size', '2', '4'] + + # Nearest gives sharper results, but may look more pixellated. Lancoz is much higher quality, but result may be less crisp. + downsample_method = 'Lanczos' #@param ['Nearest', 'Lanczos'] + """ + diffMode = 'superresolution' model = self.load_model_from_config() - #@title Import location - #@markdown ***File height and width should be multiples of 64, or image will be padded.*** - - #@markdown *To change upload settings without adding more, run and cancel upload* - #import_method = 'Directory' #@param ['Google Drive', 'Upload'] - #output_subfolder_name = 'processed' #@param {type: 'string'} - - #@markdown Drive method options: - #drive_directory = '/content/drive/MyDrive/upscaleTest' #@param {type: 'string'} - - #@markdown Upload method options: - #remove_previous_uploads = False #@param {type: 'boolean'} - #save_output_to_drive = False #@param {type: 'boolean'} - #zip_if_not_drive = False #@param {type: 'boolean'} - ''' - os.makedirs(pathInput+'/content/input'.replace('\\',os.sep).replace('/',os.sep), exist_ok=True) - output_directory = os.getcwd()+f'/content/output/{output_subfolder_name}'.replace('\\',os.sep).replace('/',os.sep) - os.makedirs(output_directory, exist_ok=True) - uploaded_img = pathInput+'/content/input/'.replace('\\',os.sep).replace('/',os.sep) - pathInput, dirsInput, filesInput = next(os.walk(pathInput+'/content/input').replace('\\',os.sep).replace('/',os.sep)) - file_count = len(filesInput) - print(f'Found {file_count} files total') - ''' - #Run settings @@ -1072,49 +1071,34 @@ def superResolution(self,image,ddimSteps=100,preDownScale='None',postDownScale=' # ####Scaling options: # Downsampling to 256px first will often improve the final image and runs faster. - + # You can improve sharpness without upscaling by upscaling and then downsampling to the original size (i.e. Super Resolution) - pre_downsample = preDownScale #@param ['None', '1/2', '1/4'] + pre_downsample = preDownScale #@param ['None', '2', '4'] - post_downsample = postDownScale #@param ['None', 'Original Size', '1/2', '1/4'] + post_downsample = postDownScale #@param ['None', 'Original Size', '2', '4'] # Nearest gives sharper results, but may look more pixellated. Lancoz is much higher quality, but result may be less crisp. - downsample_method = 'Lanczos' #@param ['Nearest', 'Lanczos'] + #downsample_method = 'Lanczos' #@param ['Nearest', 'Lanczos'] overwrite_prior_runs = True #@param {type: 'boolean'} - #pathProcessed, dirsProcessed, filesProcessed = next(os.walk(output_directory)) - - #for img in filesInput: - # if img in filesProcessed and overwrite_prior_runs is False: - # print(f'Skipping {img}: Already processed') - # continue gc.collect() torch.cuda.empty_cache() - #dir = pathInput - #filepath = os.path.join(dir, img).replace('\\',os.sep).replace('/',os.sep) im_og = image width_og, height_og = im_og.size #Downsample Pre - if pre_downsample == '1/2': - downsample_rate = 2 - elif pre_downsample == '1/4': - downsample_rate = 4 - else: - downsample_rate = 1 + + downsample_rate = preDownScale + # get system temp directory - #dir = tempfile.gettempdir() width_downsampled_pre = width_og//downsample_rate height_downsampled_pre = height_og//downsample_rate if downsample_rate != 1: print(f'Downsampling from [{width_og}, {height_og}] to [{width_downsampled_pre}, {height_downsampled_pre}]') im_og = im_og.resize((width_downsampled_pre, height_downsampled_pre), Image.LANCZOS) - #os.makedirs(dir, exist_ok=True) - #im_og.save(dir + '/ldsr/temp.png'.replace('\\',os.sep).replace('/',os.sep)) - #filepath = dir + '/ldsr/temp.png'.replace('\\',os.sep).replace('/',os.sep) logs = self.run(model["model"], im_og, diffMode, diffusion_steps, eta) @@ -1124,16 +1108,11 @@ def superResolution(self,image,ddimSteps=100,preDownScale='None',postDownScale=' sample = (sample + 1.) / 2. * 255 sample = sample.numpy().astype(np.uint8) sample = np.transpose(sample, (0, 2, 3, 1)) - #print(sample.shape) + a = Image.fromarray(sample[0]) #Downsample Post - if post_downsample == '1/2': - downsample_rate = 2 - elif post_downsample == '1/4': - downsample_rate = 4 - else: - downsample_rate = 1 + downsample_rate = postDownScale width, height = a.size width_downsampled_post = width//downsample_rate @@ -1151,19 +1130,10 @@ def superResolution(self,image,ddimSteps=100,preDownScale='None',postDownScale=' print(f'Downsampling from [{width}, {height}] to Original Size [{width_og}, {height_og}]') a = a.resize((width_og, height_og), aliasing) - #display.display(a) - #a.save(f'{output_directory}/{img}') del model gc.collect() torch.cuda.empty_cache() - ''' - if import_method != 'Google Drive' and zip_if_not_drive is True: - print('Zipping files') - current_time = datetime.now().strftime('%y%m%d-%H%M%S_%f') - output_zip_name = 'output'+str(current_time)+'.zip' - #!zip -r {output_zip_name} {output_directory} - print(f'Zipped outputs in {output_zip_name}') - ''' + print(f'Processing finished!') return a @@ -1211,37 +1181,36 @@ def torch_gc(): @retry(tries=5) #@st.experimental_memo(persist="disk", show_spinner=False, suppress_st_warning=True) -def load_GFPGAN(model_name='GFPGANv1.3'): +def load_GFPGAN(model_name='GFPGANv1.4'): #model_name = 'GFPGANv1.3' - - model_path = os.path.join(st.session_state['defaults'].general.GFPGAN_dir, 'experiments/pretrained_models', model_name + '.pth') - - if not os.path.isfile(model_path): - model_path = os.path.join(st.session_state['defaults'].general.GFPGAN_dir, model_name + '.pth') - + + model_path = os.path.join(st.session_state['defaults'].general.GFPGAN_dir, model_name + '.pth') + + #if not os.path.isfile(model_path): + #model_path = os.path.join(st.session_state['defaults'].general.GFPGAN_dir, model_name + '.pth') + if not os.path.isfile(model_path): raise Exception("GFPGAN model not found at path "+model_path) sys.path.append(os.path.abspath(st.session_state['defaults'].general.GFPGAN_dir)) from gfpgan import GFPGANer - with server_state_lock['GFPGAN']: if st.session_state['defaults'].general.gfpgan_cpu or st.session_state['defaults'].general.extra_models_cpu: server_state['GFPGAN'] = GFPGANer(model_path=model_path, upscale=1, arch='clean', channel_multiplier=2, bg_upsampler=None, device=torch.device('cpu')) - + elif st.session_state['defaults'].general.extra_models_gpu: server_state['GFPGAN'] = GFPGANer(model_path=model_path, upscale=1, arch='clean', channel_multiplier=2, bg_upsampler=None, device=torch.device(f"cuda:{st.session_state['defaults'].general.gfpgan_gpu}")) else: server_state['GFPGAN'] = GFPGANer(model_path=model_path, upscale=1, arch='clean', - channel_multiplier=2, bg_upsampler=None, + channel_multiplier=2, bg_upsampler=None, device=torch.device(f"cuda:{st.session_state['defaults'].general.gpu}")) - - # Add the model_name to model loaded so we can later + + # Add the model_name to model loaded so we can later # check if its the same when we change it on the UI. server_state['GFPGAN'].name = model_name - + return server_state['GFPGAN'] @retry(tries=5) @@ -1252,11 +1221,11 @@ def load_RealESRGAN(model_name: str): 'RealESRGAN_x4plus_anime_6B': RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4) } - model_path = os.path.join(st.session_state['defaults'].general.RealESRGAN_dir, 'experiments/pretrained_models', model_name + '.pth') - + model_path = os.path.join(st.session_state['defaults'].general.RealESRGAN_dir, model_name + '.pth') + if not os.path.isfile(model_path): model_path = os.path.join(st.session_state['defaults'].general.RealESRGAN_dir, model_name + '.pth') - + if not os.path.exists(model_path): raise Exception(model_name+".pth not found at path "+model_path) @@ -1267,19 +1236,19 @@ def load_RealESRGAN(model_name: str): if st.session_state['defaults'].general.esrgan_cpu or st.session_state['defaults'].general.extra_models_cpu: server_state['RealESRGAN'] = RealESRGANer(scale=2, model_path=model_path, model=RealESRGAN_models[model_name], pre_pad=0, half=False) # cpu does not support half - + server_state['RealESRGAN'].device = torch.device('cpu') server_state['RealESRGAN'].model.to('cpu') - + elif st.session_state['defaults'].general.extra_models_gpu: server_state['RealESRGAN'] = RealESRGANer(scale=2, model_path=model_path, model=RealESRGAN_models[model_name], pre_pad=0, half=not st.session_state['defaults'].general.no_half, device=torch.device(f"cuda:{st.session_state['defaults'].general.esrgan_gpu}")) else: server_state['RealESRGAN'] = RealESRGANer(scale=2, model_path=model_path, model=RealESRGAN_models[model_name], pre_pad=0, half=not st.session_state['defaults'].general.no_half, device=torch.device(f"cuda:{st.session_state['defaults'].general.gpu}")) - - # Add the model_name to model loaded so we can later - # check if its the same when we change it on the UI. + + # Add the model_name to model loaded so we can later + # check if its the same when we change it on the UI. server_state['RealESRGAN'].model.name = model_name return server_state['RealESRGAN'] @@ -1289,10 +1258,10 @@ def load_RealESRGAN(model_name: str): def load_LDSR(model_name="model", config="project", checking=False): #model_name = 'model' #yaml_name = 'project' - + model_path = os.path.join(st.session_state['defaults'].general.LDSR_dir, model_name + '.ckpt') yaml_path = os.path.join(st.session_state['defaults'].general.LDSR_dir, config + '.yaml') - + if not os.path.isfile(model_path): raise Exception("LDSR model not found at path " + model_path) if not os.path.isfile(yaml_path): @@ -1303,9 +1272,9 @@ def load_LDSR(model_name="model", config="project", checking=False): #sys.path.append(os.path.abspath(st.session_state['defaults'].general.LDSR_dir)) #from LDSR import LDSR server_state['LDSR'] = LDSR(model_path, yaml_path) - + server_state['LDSR'].name = model_name - + return server_state['LDSR'] # @@ -1326,16 +1295,16 @@ def load_LDSR(model_name="model", config="project", checking=False): #print("Error loading LDSR:", file=sys.stderr) #print(traceback.format_exc(), file=sys.stderr) #else: - #print("LDSR not found at path, please make sure you have cloned the LDSR repo to ./src/latent-diffusion/") + #print("LDSR not found at path, please make sure you have cloned the LDSR repo to ./models/ldsr/") #try_loading_LDSR('model',checking=True) #@retry(tries=5) -def load_sd_model(model_name: str): +def load_sd_model(model_name: str): """Loads Stable Diffusion model by name""" ckpt_path = st.session_state.defaults.general.default_model_path - + if model_name != st.session_state.defaults.general.default_model: ckpt_path = os.path.join("models", "custom", f"{model_name}.ckpt") @@ -1444,7 +1413,7 @@ def generation_callback(img, i=0): raise StopException try: - if i == 0: + if i == 0: if img['i']: i = img['i'] except TypeError: pass @@ -1456,7 +1425,7 @@ def generation_callback(img, i=0): #print (img) #print (type(img)) # The following lines will convert the tensor we got on img to an actual image we can render on the UI. - # It can probably be done in a better way for someone who knows what they're doing. I don't. + # It can probably be done in a better way for someone who knows what they're doing. I don't. #print (img,isinstance(img, torch.Tensor)) if isinstance(img, torch.Tensor): x_samples_ddim = (server_state["model"].to('cuda') if not st.session_state['defaults'].general.optimized else server_state["modelFS"].to('cuda') @@ -1467,20 +1436,20 @@ def generation_callback(img, i=0): x_samples_ddim = (server_state["model"].to('cuda') if not st.session_state['defaults'].general.optimized else server_state["modelFS"].to('cuda') ).decode_first_stage(img["denoised"]).to('cuda') - x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) - + x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0) + if x_samples_ddim.ndimension() == 4: pil_images = [transforms.ToPILImage()(x.squeeze_(0)) for x in x_samples_ddim] pil_image = image_grid(pil_images, 1) else: pil_image = transforms.ToPILImage()(x_samples_ddim.squeeze_(0)) - - + + # update image on the UI so we can see the progress - st.session_state["preview_image"].image(pil_image) + st.session_state["preview_image"].image(pil_image) # Show a progress bar so we can keep track of the progress even when the image progress is not been shown, - # Dont worry, it doesnt affect the performance. + # Dont worry, it doesnt affect the performance. if st.session_state["generation_mode"] == "txt2img": percent = int(100 * float(i+1 if i+1 < st.session_state.sampling_steps else st.session_state.sampling_steps)/float(st.session_state.sampling_steps)) st.session_state["progress_bar_text"].text( @@ -1496,7 +1465,7 @@ def generation_callback(img, i=0): percent = int(100 * float(i+1 if i+1 < st.session_state.sampling_steps else st.session_state.sampling_steps)/float(st.session_state.sampling_steps)) st.session_state["progress_bar_text"].text( f"Running step: {i+1 if i+1 < st.session_state.sampling_steps else st.session_state.sampling_steps}/{st.session_state.sampling_steps}" - f"{percent if percent < 100 else 100}%") + f"{percent if percent < 100 else 100}%") st.session_state["progress_bar"].progress(percent if percent < 100 else 100) @@ -1506,10 +1475,10 @@ def generation_callback(img, i=0): [^:]+ # match one or more non ':' characters ) # end 'prompt' (?: # non-capture group - :+ # match one or more ':' characters + :+ # match one or more ':' characters (?P # capture group for 'weight' -?\\d+(?:\\.\\d+)? # match positive or negative decimal number - )? # end weight capture group, make optional + )? # end weight capture group, make optional \\s* # strip spaces after weight | # OR $ # else, if no ':' then match end of line @@ -1551,22 +1520,22 @@ def slerp(device, t, v0:torch.Tensor, v1:torch.Tensor, DOT_THRESHOLD=0.9995): # @st.experimental_memo(persist="disk", show_spinner=False, suppress_st_warning=True) def optimize_update_preview_frequency(current_chunk_speed, previous_chunk_speed_list, update_preview_frequency, update_preview_frequency_list): - """Find the optimal update_preview_frequency value maximizing + """Find the optimal update_preview_frequency value maximizing performance while minimizing the time between updates.""" from statistics import mean - + previous_chunk_avg_speed = mean(previous_chunk_speed_list) - + previous_chunk_speed_list.append(current_chunk_speed) current_chunk_avg_speed = mean(previous_chunk_speed_list) - + if current_chunk_avg_speed >= previous_chunk_avg_speed: #print(f"{current_chunk_speed} >= {previous_chunk_speed}") update_preview_frequency_list.append(update_preview_frequency + 1) else: #print(f"{current_chunk_speed} <= {previous_chunk_speed}") update_preview_frequency_list.append(update_preview_frequency - 1) - + update_preview_frequency = round(mean(update_preview_frequency_list)) return current_chunk_speed, previous_chunk_speed_list, update_preview_frequency, update_preview_frequency_list @@ -1593,10 +1562,18 @@ def load_learned_embed_in_clip(learned_embeds_path, text_encoder, tokenizer, tok # separate token and the embeds if learned_embeds_path.endswith('.pt'): - print(loaded_learned_embeds['string_to_token']) - trained_token = list(loaded_learned_embeds['string_to_token'].keys())[0] - embeds = list(loaded_learned_embeds['string_to_param'].values())[0] - + # old format + # token = * so replace with file directory name when converting + trained_token = os.path.basename(learned_embeds_path) + params_dict = { + trained_token: torch.tensor(list(loaded_learned_embeds['string_to_param'].items())[0][1]) + } + learned_embeds_path = os.path.splitext(learned_embeds_path)[0] + '.bin' + torch.save(params_dict, learned_embeds_path) + loaded_learned_embeds = torch.load(learned_embeds_path, map_location="cpu") + trained_token = list(loaded_learned_embeds.keys())[0] + embeds = loaded_learned_embeds[trained_token] + elif learned_embeds_path.endswith('.bin'): trained_token = list(loaded_learned_embeds.keys())[0] embeds = loaded_learned_embeds[trained_token] @@ -1740,7 +1717,7 @@ def enable_minimal_memory_usage(model): torch.cuda.empty_cache() torch_gc() - + def check_prompt_length(prompt, comments): """this function tests if prompt is too long, and if so, adds a message to comments""" @@ -1767,12 +1744,12 @@ def custom_models_available(): # Allow for custom models to be used instead of the default one, # an example would be Waifu-Diffusion or any other fine tune of stable diffusion server_state["custom_models"]:sorted = [] - + for root, dirs, files in os.walk(os.path.join("models", "custom")): for file in files: - if os.path.splitext(file)[1] == '.ckpt': + if os.path.splitext(file)[1] == '.ckpt': server_state["custom_models"].append(os.path.splitext(file)[0]) - + with server_state_lock["CustomModel_available"]: if len(server_state["custom_models"]) > 0: server_state["CustomModel_available"] = True @@ -1784,18 +1761,33 @@ def custom_models_available(): def GFPGAN_available(): #with server_state_lock["GFPGAN_models"]: # - # Allow for custom models to be used instead of the default one, - # an example would be Waifu-Diffusion or any other fine tune of stable diffusion + st.session_state["GFPGAN_models"]:sorted = [] + model = st.session_state["defaults"].model_manager.models.gfpgan + + files_available = 0 + for file in model['files']: + if "save_location" in model['files'][file]: + if os.path.exists(os.path.join(model['files'][file]['save_location'], model['files'][file]['file_name'] )): + files_available += 1 + + elif os.path.exists(os.path.join(model['save_location'], model['files'][file]['file_name'] )): + base_name = os.path.splitext(model['files'][file]['file_name'])[0] + if "GFPGANv" in base_name: + st.session_state["GFPGAN_models"].append(base_name) + files_available += 1 + + # we need to show the other models from previous verions that we have on the + # same directory in case we want to see how they perform vs each other. for root, dirs, files in os.walk(st.session_state['defaults'].general.GFPGAN_dir): for file in files: - if os.path.splitext(file)[1] == '.pth': - st.session_state["GFPGAN_models"].append(os.path.splitext(file)[0]) + if os.path.splitext(file)[1] == '.pth': + if os.path.splitext(file)[0] not in st.session_state["GFPGAN_models"]: + st.session_state["GFPGAN_models"].append(os.path.splitext(file)[0]) + - #print (len(st.session_state["GFPGAN_models"])) - #with server_state_lock["GFPGAN_available"]: - if len(st.session_state["GFPGAN_models"]) > 0: + if len(st.session_state["GFPGAN_models"]) > 0 and files_available == len(model['files']): st.session_state["GFPGAN_available"] = True else: st.session_state["GFPGAN_available"] = False @@ -1807,13 +1799,12 @@ def RealESRGAN_available(): # Allow for custom models to be used instead of the default one, # an example would be Waifu-Diffusion or any other fine tune of stable diffusion st.session_state["RealESRGAN_models"]:sorted = [] + model = st.session_state["defaults"].model_manager.models.realesrgan + for file in model['files']: + if os.path.exists(os.path.join(model['save_location'], model['files'][file]['file_name'] )): + base_name = os.path.splitext(model['files'][file]['file_name'])[0] + st.session_state["RealESRGAN_models"].append(base_name) - for root, dirs, files in os.walk(st.session_state['defaults'].general.RealESRGAN_dir): - for file in files: - if os.path.splitext(file)[1] == '.pth': - st.session_state["RealESRGAN_models"].append(os.path.splitext(file)[0]) - - #with server_state_lock["RealESRGAN_available"]: if len(st.session_state["RealESRGAN_models"]) > 0: st.session_state["RealESRGAN_available"] = True else: @@ -1825,22 +1816,25 @@ def LDSR_available(): # Allow for custom models to be used instead of the default one, # an example would be Waifu-Diffusion or any other fine tune of stable diffusion st.session_state["LDSR_models"]:sorted = [] - - for root, dirs, files in os.walk(st.session_state['defaults'].general.LDSR_dir): - for file in files: - if os.path.splitext(file)[1] == '.ckpt': - st.session_state["LDSR_models"].append(os.path.splitext(file)[0]) - - #print (st.session_state['defaults'].general.LDSR_dir) - #print (st.session_state["LDSR_models"]) - #with server_state_lock["LDSR_available"]: - if len(st.session_state["LDSR_models"]) > 0: + files_available = 0 + model = st.session_state["defaults"].model_manager.models.ldsr + for file in model['files']: + if os.path.exists(os.path.join(model['save_location'], model['files'][file]['file_name'] )): + base_name = os.path.splitext(model['files'][file]['file_name'])[0] + extension = os.path.splitext(model['files'][file]['file_name'])[1] + if extension == ".ckpt": + st.session_state["LDSR_models"].append(base_name) + files_available += 1 + if files_available == len(model['files']): st.session_state["LDSR_available"] = True else: - st.session_state["LDSR_available"] = False - + st.session_state["LDSR_available"] = False + + -def save_sample(image, sample_path_i, filename, jpg_sample, prompts, seeds, width, height, steps, cfg_scale, + + +def save_sample(image, sample_path_i, filename, jpg_sample, prompts, seeds, width, height, steps, cfg_scale, normalize_prompt_weights, use_GFPGAN, write_info_files, prompt_matrix, init_img, uses_loopback, uses_random_seed_loopback, save_grid, sort_samples, sampler_name, ddim_eta, n_iter, batch_size, i, denoising_strength, resize_mode, save_individual_images, model_name): @@ -1999,7 +1993,7 @@ def classToArrays( items, seed, n_iter ): texts.append( item.text ) parts.append( f"Seed: {itemseed}\n" + "\n".join(item.parts) ) seeds.append( itemseed ) - itemseed += 1 + itemseed += 1 return seeds, texts, parts @@ -2020,14 +2014,14 @@ def classToArrays( items, seed, n_iter ): # def process_images( outpath, func_init, func_sample, prompt, seed, sampler_name, save_grid, batch_size, - n_iter, steps, cfg_scale, width, height, prompt_matrix, use_GFPGAN: bool = True, GFPGAN_model: str = 'GFPGANv1.3', + n_iter, steps, cfg_scale, width, height, prompt_matrix, use_GFPGAN: bool = True, GFPGAN_model: str = 'GFPGANv1.4', use_RealESRGAN: bool = False, realesrgan_model_name:str = 'RealESRGAN_x4plus', use_LDSR:bool = False, LDSR_model_name:str = 'model', ddim_eta=0.0, normalize_prompt_weights=True, init_img=None, init_mask=None, mask_blur_strength=3, mask_restore=False, denoising_strength=0.75, noise_mode=0, find_noise_steps=1, resize_mode=None, uses_loopback=False, uses_random_seed_loopback=False, sort_samples=True, write_info_files=True, jpg_sample=False, variant_amount=0.0, variant_seed=None, save_individual_images: bool = True): """this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch""" - + torch_gc() # start time after garbage collection (or before?) start_time = time.time() @@ -2037,25 +2031,25 @@ def process_images( mem_mon = MemUsageMonitor('MemMon') mem_mon.start() - + if st.session_state.defaults.general.use_sd_concepts_library: - prompt_tokens = re.findall('<([a-zA-Z0-9-]+)>', prompt) + prompt_tokens = re.findall('<([a-zA-Z0-9-]+)>', prompt) if prompt_tokens: # compviz tokenizer = (server_state["model"] if not st.session_state['defaults'].general.optimized else server_state["modelCS"]).cond_stage_model.tokenizer text_encoder = (server_state["model"] if not st.session_state['defaults'].general.optimized else server_state["modelCS"]).cond_stage_model.transformer - + # diffusers #tokenizer = pipe.tokenizer #text_encoder = pipe.text_encoder - + ext = ('pt', 'bin') - - if len(prompt_tokens) > 1: + + if len(prompt_tokens) > 1: for token_name in prompt_tokens: - embedding_path = os.path.join(st.session_state['defaults'].general.sd_concepts_library_folder, token_name) + embedding_path = os.path.join(st.session_state['defaults'].general.sd_concepts_library_folder, token_name) if os.path.exists(embedding_path): for files in os.listdir(embedding_path): if files.endswith(ext): @@ -2065,11 +2059,11 @@ def process_images( if os.path.exists(embedding_path): for files in os.listdir(embedding_path): if files.endswith(ext): - load_learned_embed_in_clip(f"{os.path.join(embedding_path, files)}", text_encoder, tokenizer, f"<{prompt_tokens[0]}>") - + load_learned_embed_in_clip(f"{os.path.join(embedding_path, files)}", text_encoder, tokenizer, f"<{prompt_tokens[0]}>") + # - - + + os.makedirs(outpath, exist_ok=True) sample_path = os.path.join(outpath, "samples") @@ -2140,9 +2134,9 @@ def process_images( target_seed_randomizer = seed_to_int('') # random seed torch.manual_seed(seed) # this has to be the single starting seed (not per-iteration) base_x = create_random_tensors([opt_C, height // opt_f, width // opt_f], seeds=[seed]) - # we don't want all_seeds to be sequential from starting seed with variants, - # since that makes the same variants each time, - # so we add target_seed_randomizer as a random offset + # we don't want all_seeds to be sequential from starting seed with variants, + # since that makes the same variants each time, + # so we add target_seed_randomizer as a random offset for si in range(len(all_seeds)): all_seeds[si] += target_seed_randomizer @@ -2197,7 +2191,7 @@ def process_images( x = create_random_tensors(shape, seeds=seeds) if variant_amount > 0.0: # we are making variants - # using variant_seed as sneaky toggle, + # using variant_seed as sneaky toggle, # when not None or '' use the variant_seed # otherwise use seeds if variant_seed != None and variant_seed != '': @@ -2249,44 +2243,22 @@ def process_images( st.session_state["preview_image"].image(image) - if use_GFPGAN and server_state["GFPGAN"] is not None and not use_RealESRGAN: - st.session_state["progress_bar_text"].text("Running GFPGAN on image %d of %d..." % (i+1, len(x_samples_ddim))) - - torch_gc() - cropped_faces, restored_faces, restored_img = server_state["GFPGAN"].enhance(x_sample[:,:,::-1], has_aligned=False, only_center_face=False, paste_back=True) - - gfpgan_sample = restored_img[:,:,::-1] - gfpgan_image = Image.fromarray(gfpgan_sample) - - #if st.session_state["GFPGAN_strenght"]: - #gfpgan_sample = Image.blend(image, gfpgan_image, st.session_state["GFPGAN_strenght"]) - - gfpgan_filename = original_filename + '-gfpgan' - - save_sample(gfpgan_image, sample_path_i, gfpgan_filename, jpg_sample, prompts, seeds, width, height, steps, cfg_scale, - normalize_prompt_weights, use_GFPGAN, write_info_files, prompt_matrix, init_img, uses_loopback, - uses_random_seed_loopback, save_grid, sort_samples, sampler_name, ddim_eta, - n_iter, batch_size, i, denoising_strength, resize_mode, False, server_state["loaded_model"]) - - output_images.append(gfpgan_image) #287 - run_images.append(gfpgan_image) - - if simple_templating: - grid_captions.append( captions[i] + "\ngfpgan" ) - # - elif use_GFPGAN and server_state["GFPGAN"] is not None and not use_LDSR: + if use_GFPGAN and server_state["GFPGAN"] is not None and not use_RealESRGAN and not use_LDSR: st.session_state["progress_bar_text"].text("Running GFPGAN on image %d of %d..." % (i+1, len(x_samples_ddim))) + if server_state["GFPGAN"].name != GFPGAN_model: + load_models(use_LDSR=use_LDSR, LDSR_model=LDSR_model_name, use_GFPGAN=use_GFPGAN, use_RealESRGAN=use_RealESRGAN, RealESRGAN_model=realesrgan_model_name) + torch_gc() cropped_faces, restored_faces, restored_img = server_state["GFPGAN"].enhance(x_sample[:,:,::-1], has_aligned=False, only_center_face=False, paste_back=True) - + gfpgan_sample = restored_img[:,:,::-1] gfpgan_image = Image.fromarray(gfpgan_sample) - + #if st.session_state["GFPGAN_strenght"]: - #gfpgan_sample = Image.blend(image, gfpgan_image, st.session_state["GFPGAN_strenght"]) - + #gfpgan_sample = Image.blend(image, gfpgan_image, st.session_state["GFPGAN_strenght"]) + gfpgan_filename = original_filename + '-gfpgan' save_sample(gfpgan_image, sample_path_i, gfpgan_filename, jpg_sample, prompts, seeds, width, height, steps, cfg_scale, @@ -2298,8 +2270,9 @@ def process_images( run_images.append(gfpgan_image) if simple_templating: - grid_captions.append( captions[i] + "\ngfpgan" ) + grid_captions.append( captions[i] + "\ngfpgan" ) + # elif use_RealESRGAN and server_state["RealESRGAN"] is not None and not use_GFPGAN: st.session_state["progress_bar_text"].text("Running RealESRGAN on image %d of %d..." % (i+1, len(x_samples_ddim))) #skip_save = True # #287 >_> @@ -2327,9 +2300,9 @@ def process_images( if simple_templating: grid_captions.append( captions[i] + "\nesrgan" ) - + # - elif use_LDSR and server_state["LDSR"] is not None and not use_GFPGAN: + elif use_LDSR and "LDSR" in server_state and not use_GFPGAN: print ("Running LDSR on image %d of %d..." % (i+1, len(x_samples_ddim))) st.session_state["progress_bar_text"].text("Running LDSR on image %d of %d..." % (i+1, len(x_samples_ddim))) #skip_save = True # #287 >_> @@ -2339,24 +2312,68 @@ def process_images( #try_loading_RealESRGAN(realesrgan_model_name) load_models(use_LDSR=use_LDSR, LDSR_model=LDSR_model_name, use_GFPGAN=use_GFPGAN, use_RealESRGAN=use_RealESRGAN, RealESRGAN_model=realesrgan_model_name) - result = server_state["LDSR"].superResolution(image, 2, 2, 2) + result = server_state["LDSR"].superResolution(image, ddimSteps = st.session_state["ldsr_sampling_steps"], + preDownScale = st.session_state["preDownScale"], postDownScale = st.session_state["postDownScale"], + downsample_method=st.session_state["downsample_method"]) + ldsr_filename = original_filename + '-ldsr4x' - ldsr_sample = result[:,:,::-1] - ldsr_image = Image.fromarray(ldsr_sample) + #ldsr_sample = result[:,:,::-1] + #ldsr_image = Image.fromarray(ldsr_sample) #save_sample(image, sample_path_i, original_filename, jpg_sample, prompts, seeds, width, height, steps, cfg_scale, #normalize_prompt_weights, use_GFPGAN, write_info_files, prompt_matrix, init_img, uses_loopback, uses_random_seed_loopback, skip_save, #save_grid, sort_samples, sampler_name, ddim_eta, n_iter, batch_size, i, denoising_strength, resize_mode) - save_sample(esrgan_image, sample_path_i, ldsr_filename, jpg_sample, prompts, seeds, width, height, steps, cfg_scale, + save_sample(result, sample_path_i, ldsr_filename, jpg_sample, prompts, seeds, width, height, steps, cfg_scale, normalize_prompt_weights, use_GFPGAN, write_info_files, prompt_matrix, init_img, uses_loopback, uses_random_seed_loopback, save_grid, sort_samples, sampler_name, ddim_eta, n_iter, batch_size, i, denoising_strength, resize_mode, False, server_state["loaded_model"]) - output_images.append(ldsr_image) #287 - run_images.append(ldsr_image) + output_images.append(result) #287 + run_images.append(result) if simple_templating: - grid_captions.append( captions[i] + "\nldsr" ) + grid_captions.append( captions[i] + "\nldsr" ) + + # + elif use_LDSR and "LDSR" in server_state and use_GFPGAN and "GFPGAN" in server_state: + print ("Running GFPGAN+LDSR on image %d of %d..." % (i+1, len(x_samples_ddim))) + st.session_state["progress_bar_text"].text("Running GFPGAN+LDSR on image %d of %d..." % (i+1, len(x_samples_ddim))) + + if server_state["GFPGAN"].name != GFPGAN_model: + load_models(use_LDSR=use_LDSR, LDSR_model=LDSR_model_name, use_GFPGAN=use_GFPGAN, use_RealESRGAN=use_RealESRGAN, RealESRGAN_model=realesrgan_model_name) + + torch_gc() + cropped_faces, restored_faces, restored_img = server_state["GFPGAN"].enhance(x_sample[:,:,::-1], has_aligned=False, only_center_face=False, paste_back=True) + + gfpgan_sample = restored_img[:,:,::-1] + gfpgan_image = Image.fromarray(gfpgan_sample) + + if server_state["LDSR"].name != LDSR_model_name: + #try_loading_RealESRGAN(realesrgan_model_name) + load_models(use_LDSR=use_LDSR, LDSR_model=LDSR_model_name, use_GFPGAN=use_GFPGAN, use_RealESRGAN=use_RealESRGAN, RealESRGAN_model=realesrgan_model_name) + + #LDSR.superResolution(gfpgan_image, ddimSteps=100, preDownScale='None', postDownScale='None', downsample_method="Lanczos") + result = server_state["LDSR"].superResolution(gfpgan_image, ddimSteps = st.session_state["ldsr_sampling_steps"], + preDownScale = st.session_state["preDownScale"], postDownScale = st.session_state["postDownScale"], + downsample_method=st.session_state["downsample_method"]) + + ldsr_filename = original_filename + '-gfpgan-ldsr2x' + #ldsr_sample = result[:,:,::-1] + #ldsr_image = Image.fromarray(result) + + #save_sample(image, sample_path_i, original_filename, jpg_sample, prompts, seeds, width, height, steps, cfg_scale, + #normalize_prompt_weights, use_GFPGAN, write_info_files, prompt_matrix, init_img, uses_loopback, uses_random_seed_loopback, skip_save, + #save_grid, sort_samples, sampler_name, ddim_eta, n_iter, batch_size, i, denoising_strength, resize_mode) + + save_sample(result, sample_path_i, ldsr_filename, jpg_sample, prompts, seeds, width, height, steps, cfg_scale, + normalize_prompt_weights, use_GFPGAN, write_info_files, prompt_matrix, init_img, uses_loopback, uses_random_seed_loopback, + save_grid, sort_samples, sampler_name, ddim_eta, n_iter, batch_size, i, denoising_strength, resize_mode, False, server_state["loaded_model"]) + + output_images.append(result) #287 + run_images.append(result) + + if simple_templating: + grid_captions.append( captions[i] + "\ngfpgan-ldsr" ) elif use_RealESRGAN and server_state["RealESRGAN"] is not None and use_GFPGAN and server_state["GFPGAN"] is not None: st.session_state["progress_bar_text"].text("Running GFPGAN+RealESRGAN on image %d of %d..." % (i+1, len(x_samples_ddim))) @@ -2383,34 +2400,9 @@ def process_images( if simple_templating: grid_captions.append( captions[i] + "\ngfpgan_esrgan" ) - + # - elif use_LDSR and server_state["LDSR"] is not None and use_GFPGAN and server_state["GFPGAN"] is not None: - st.session_state["progress_bar_text"].text("Running GFPGAN+LDSR on image %d of %d..." % (i+1, len(x_samples_ddim))) - #skip_save = True # #287 >_> - torch_gc() - cropped_faces, restored_faces, restored_img = server_state["LDSR"].enhance(x_sample[:,:,::-1], has_aligned=False, only_center_face=False, paste_back=True) - gfpgan_sample = restored_img[:,:,::-1] - - if server_state["LDSR"].model.name != ldsr_model_name: - #try_loading_RealESRGAN(realesrgan_model_name) - load_models(use_LDSR=use_LDSR, LDSR_model=LDSR_model_name,use_GFPGAN=use_GFPGAN, use_RealESRGAN=use_RealESRGAN, RealESRGAN_model=realesrgan_model_name) - - output, img_mode = server_state["LDSR"].enhance(gfpgan_sample[:,:,::-1]) - gfpgan_ldsr_filename = original_filename + '-gfpgan-ldsr4x' - gfpgan_ldsr_sample = output[:,:,::-1] - gfpgan_ldsr_image = Image.fromarray(gfpgan_ldsr_sample) - - save_sample(gfpgan_ldsr_image, sample_path_i, gfpgan_ldsr_filename, jpg_sample, prompts, seeds, width, height, steps, cfg_scale, - normalize_prompt_weights, False, write_info_files, prompt_matrix, init_img, uses_loopback, uses_random_seed_loopback, - save_grid, sort_samples, sampler_name, ddim_eta, n_iter, batch_size, i, denoising_strength, resize_mode, False, server_state["loaded_model"]) - - output_images.append(gfpgan_ldsr_image) #287 - run_images.append(gfpgan_ldsr_image) - - if simple_templating: - grid_captions.append( captions[i] + "\ngfpgan_ldsr" ) - + else: output_images.append(image) run_images.append(image) @@ -2438,7 +2430,7 @@ def process_images( image = Image.composite(init_img, image, init_mask) if save_individual_images: - save_sample(image, sample_path_i, filename, jpg_sample, prompts, seeds, width, height, steps, cfg_scale, + save_sample(image, sample_path_i, filename, jpg_sample, prompts, seeds, width, height, steps, cfg_scale, normalize_prompt_weights, use_GFPGAN, write_info_files, prompt_matrix, init_img, uses_loopback, uses_random_seed_loopback, save_grid, sort_samples, sampler_name, ddim_eta, n_iter, batch_size, i, denoising_strength, resize_mode, save_individual_images, server_state["loaded_model"]) @@ -2561,4 +2553,3 @@ def convert_pt_to_bin_and_load(input_file, text_encoder, tokenizer, placeholder_ torch.save(params_dict, "learned_embeds.bin") load_learned_embed_in_clip("learned_embeds.bin", text_encoder, tokenizer, placeholder_token) print("loaded", placeholder_token) - \ No newline at end of file diff --git a/scripts/txt2img.py b/scripts/txt2img.py index 0f6130875..283245db8 100644 --- a/scripts/txt2img.py +++ b/scripts/txt2img.py @@ -29,7 +29,7 @@ from ldm.models.diffusion.ddim import DDIMSampler from ldm.models.diffusion.plms import PLMSSampler -# Temp imports +# Temp imports # end of imports @@ -95,7 +95,7 @@ def txt2img(prompt: str, ddim_steps: int, sampler_name: str, n_iter: int, batch_ height: int, width: int, separate_prompts:bool = False, normalize_prompt_weights:bool = True, save_individual_images: bool = True, save_grid: bool = True, group_by_prompt: bool = True, save_as_jpg: bool = True, use_GFPGAN: bool = True, GFPGAN_model: str = 'GFPGANv1.3', use_RealESRGAN: bool = False, - RealESRGAN_model: str = "RealESRGAN_x4plus_anime_6B", use_LDSR: bool = True, LDSR_model: str = "model", + RealESRGAN_model: str = "RealESRGAN_x4plus_anime_6B", use_LDSR: bool = True, LDSR_model: str = "model", fp = None, variant_amount: float = None, variant_seed: int = None, ddim_eta:float = 0.0, write_info_files:bool = True): @@ -153,7 +153,7 @@ def sample(init_data, x, conditioning, unconditional_conditioning, sampler_name) use_RealESRGAN=st.session_state["use_RealESRGAN"], realesrgan_model_name=RealESRGAN_model, use_LDSR=st.session_state["use_LDSR"], - LDSR_model_name=LDSR_model, + LDSR_model_name=LDSR_model, ddim_eta=ddim_eta, normalize_prompt_weights=normalize_prompt_weights, save_individual_images=save_individual_images, @@ -173,7 +173,7 @@ def sample(init_data, x, conditioning, unconditional_conditioning, sampler_name) #err_msg = f'CRASHED:


Please wait while the program restarts.' #stats = err_msg #return [], seed, 'err', stats - + # def layout(): with st.form("txt2img-inputs"): @@ -200,25 +200,31 @@ def layout(): seed = st.text_input("Seed:", value=st.session_state['defaults'].txt2img.seed, help=" The seed to use, if left blank a random seed will be generated.") with st.expander("Batch Options"): - batch_count = st.slider("Batch count.", min_value=st.session_state['defaults'].txt2img.batch_count.min_value, max_value=st.session_state['defaults'].txt2img.batch_count.max_value, - value=st.session_state['defaults'].txt2img.batch_count.value, step=st.session_state['defaults'].txt2img.batch_count.step, - help="How many iterations or batches of images to generate in total.") + #batch_count = st.slider("Batch count.", min_value=st.session_state['defaults'].txt2img.batch_count.min_value, max_value=st.session_state['defaults'].txt2img.batch_count.max_value, + #value=st.session_state['defaults'].txt2img.batch_count.value, step=st.session_state['defaults'].txt2img.batch_count.step, + #help="How many iterations or batches of images to generate in total.") + + #batch_size = st.slider("Batch size", min_value=st.session_state['defaults'].txt2img.batch_size.min_value, max_value=st.session_state['defaults'].txt2img.batch_size.max_value, + #value=st.session_state.defaults.txt2img.batch_size.value, step=st.session_state.defaults.txt2img.batch_size.step, + #help="How many images are at once in a batch.\ + #It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes to finish generation as more images are generated at once.\ + #Default: 1") + + st.session_state["batch_count"] = int(st.text_input("Batch count.", value=st.session_state['defaults'].txt2img.batch_count.value, + help="How many iterations or batches of images to generate in total.")) - batch_size = st.slider("Batch size", min_value=st.session_state['defaults'].txt2img.batch_size.min_value, max_value=st.session_state['defaults'].txt2img.batch_size.max_value, - value=st.session_state.defaults.txt2img.batch_size.value, step=st.session_state.defaults.txt2img.batch_size.step, - help="How many images are at once in a batch.\ - It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes to finish generation as more images are generated at once.\ - Default: 1") + st.session_state["batch_size"] = int(st.text_input("Batch size", value=st.session_state.defaults.txt2img.batch_size.value, + help="How many images are at once in a batch.\ + It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes \ + to finish generation as more images are generated at once.\ + Default: 1") ) with st.expander("Preview Settings"): - st.session_state["update_preview"] = st.checkbox("Update Image Preview", value=st.session_state['defaults'].txt2img.update_preview, - help="If enabled the image preview will be updated during the generation instead of at the end. \ - You can use the Update Preview \Frequency option bellow to customize how frequent it's updated. \ - By default this is enabled and the frequency is set to 1 step.") + st.session_state["update_preview"] = st.session_state["defaults"].general.update_preview st.session_state["update_preview_frequency"] = st.text_input("Update Image Preview Frequency", value=st.session_state['defaults'].txt2img.update_preview_frequency, help="Frequency in steps at which the the preview image is updated. By default the frequency \ - is set to 1 step.") + is set to 10 step.") with col2: preview_tab, gallery_tab = st.tabs(["Preview", "Gallery"]) @@ -232,17 +238,17 @@ def layout(): # create an empty container for the image, progress bar, etc so we can update it later and use session_state to hold them globally. st.session_state["preview_image"] = st.empty() - + st.session_state["progress_bar_text"] = st.empty() st.session_state["progress_bar_text"].info("Nothing but crickets here, try generating something first.") - + st.session_state["progress_bar"] = st.empty() message = st.empty() - + with gallery_tab: - st.session_state["gallery"] = st.empty() + st.session_state["gallery"] = st.empty() st.session_state["gallery"].info("Nothing but crickets here, try generating something first.") with col3: @@ -258,44 +264,45 @@ def layout(): the file for the model has on said folder, it is recommended to give the .ckpt file a name that \ will make it easier for you to distinguish it from other models. Default: Stable Diffusion v1.4") - st.session_state.sampling_steps = st.slider("Sampling Steps", value=st.session_state.defaults.txt2img.sampling_steps.value, - min_value=st.session_state.defaults.txt2img.sampling_steps.min_value, - max_value=st.session_state['defaults'].txt2img.sampling_steps.max_value, - step=st.session_state['defaults'].txt2img.sampling_steps.step) + st.session_state.sampling_steps = st.number_input("Sampling Steps", value=st.session_state.defaults.txt2img.sampling_steps.value, + min_value=st.session_state.defaults.txt2img.sampling_steps.min_value, + step=st.session_state['defaults'].txt2img.sampling_steps.step, + help="Set the default number of sampling steps to use. Default is: 30 (with k_euler)") sampler_name_list = ["k_lms", "k_euler", "k_euler_a", "k_dpm_2", "k_dpm_2_a", "k_heun", "PLMS", "DDIM"] sampler_name = st.selectbox("Sampling method", sampler_name_list, index=sampler_name_list.index(st.session_state['defaults'].txt2img.default_sampler), help="Sampling method to use. Default: k_euler") with st.expander("Advanced"): - separate_prompts = st.checkbox("Create Prompt Matrix.", value=st.session_state['defaults'].txt2img.separate_prompts, - help="Separate multiple prompts using the `|` character, and get all combinations of them.") - - normalize_prompt_weights = st.checkbox("Normalize Prompt Weights.", value=st.session_state['defaults'].txt2img.normalize_prompt_weights, - help="Ensure the sum of all weights add up to 1.0") - - save_individual_images = st.checkbox("Save individual images.", value=st.session_state['defaults'].txt2img.save_individual_images, - help="Save each image generated before any filter or enhancement is applied.") - - save_grid = st.checkbox("Save grid",value=st.session_state['defaults'].txt2img.save_grid, help="Save a grid with all the images generated into a single image.") - group_by_prompt = st.checkbox("Group results by prompt", value=st.session_state['defaults'].txt2img.group_by_prompt, - help="Saves all the images with the same prompt into the same folder. When using a prompt matrix each prompt combination will have its own folder.") - - write_info_files = st.checkbox("Write Info file", value=st.session_state['defaults'].txt2img.write_info_files, - help="Save a file next to the image with informartion about the generation.") - - save_as_jpg = st.checkbox("Save samples as jpg", value=st.session_state['defaults'].txt2img.save_as_jpg, help="Saves the images as jpg instead of png.") - + with st.expander("Output Settings"): + separate_prompts = st.checkbox("Create Prompt Matrix.", value=st.session_state['defaults'].txt2img.separate_prompts, + help="Separate multiple prompts using the `|` character, and get all combinations of them.") + + normalize_prompt_weights = st.checkbox("Normalize Prompt Weights.", value=st.session_state['defaults'].txt2img.normalize_prompt_weights, + help="Ensure the sum of all weights add up to 1.0") + + save_individual_images = st.checkbox("Save individual images.", value=st.session_state['defaults'].txt2img.save_individual_images, + help="Save each image generated before any filter or enhancement is applied.") + + save_grid = st.checkbox("Save grid",value=st.session_state['defaults'].txt2img.save_grid, help="Save a grid with all the images generated into a single image.") + group_by_prompt = st.checkbox("Group results by prompt", value=st.session_state['defaults'].txt2img.group_by_prompt, + help="Saves all the images with the same prompt into the same folder. When using a prompt matrix each prompt combination will have its own folder.") + + write_info_files = st.checkbox("Write Info file", value=st.session_state['defaults'].txt2img.write_info_files, + help="Save a file next to the image with informartion about the generation.") + + save_as_jpg = st.checkbox("Save samples as jpg", value=st.session_state['defaults'].txt2img.save_as_jpg, help="Saves the images as jpg instead of png.") + # check if GFPGAN, RealESRGAN and LDSR are available. - if "GFPGAN_available" not in st.session_state: - GFPGAN_available() - - if "RealESRGAN_available" not in st.session_state: - RealESRGAN_available() - - if "LDSR_available" not in st.session_state: - LDSR_available() - + #if "GFPGAN_available" not in st.session_state: + GFPGAN_available() + + #if "RealESRGAN_available" not in st.session_state: + RealESRGAN_available() + + #if "LDSR_available" not in st.session_state: + LDSR_available() + if st.session_state["GFPGAN_available"] or st.session_state["RealESRGAN_available"] or st.session_state["LDSR_available"]: with st.expander("Post-Processing"): face_restoration_tab, upscaling_tab = st.tabs(["Face Restoration", "Upscaling"]) @@ -309,57 +316,73 @@ def layout(): help="Uses the GFPGAN model to improve faces after the generation.\ This greatly improve the quality and consistency of faces but uses\ extra VRAM. Disable if you need the extra VRAM.") - + st.session_state["GFPGAN_model"] = st.selectbox("GFPGAN model", st.session_state["GFPGAN_models"], - index=st.session_state["GFPGAN_models"].index(st.session_state['defaults'].general.GFPGAN_model)) - + index=st.session_state["GFPGAN_models"].index(st.session_state['defaults'].general.GFPGAN_model)) + #st.session_state["GFPGAN_strenght"] = st.slider("Effect Strenght", min_value=1, max_value=100, value=1, step=1, help='') - + else: - st.session_state["use_GFPGAN"] = False - + st.session_state["use_GFPGAN"] = False + with upscaling_tab: - #with st.expander("Upscaling"): - # RealESRGAN and LDSR used for upscaling. + st.session_state['use_upscaling'] = st.checkbox("Use Upscaling", value=st.session_state['defaults'].txt2img.use_upscaling) + + # RealESRGAN and LDSR used for upscaling. if st.session_state["RealESRGAN_available"] or st.session_state["LDSR_available"]: - + upscaling_method_list = [] if st.session_state["RealESRGAN_available"]: upscaling_method_list.append("RealESRGAN") if st.session_state["LDSR_available"]: upscaling_method_list.append("LDSR") - + + #print (st.session_state["RealESRGAN_available"]) st.session_state["upscaling_method"] = st.selectbox("Upscaling Method", upscaling_method_list, - index=upscaling_method_list.index(st.session_state['defaults'].general.upscaling_method)) - + index=upscaling_method_list.index(str(st.session_state['defaults'].general.upscaling_method))) + if st.session_state["RealESRGAN_available"]: - # with st.expander("RealESRGAN"): - st.session_state["use_RealESRGAN"] = st.checkbox("Use RealESRGAN", value=st.session_state['defaults'].txt2img.use_RealESRGAN, - help="Uses the RealESRGAN model to upscale the images after the generation.\ - This greatly improve the quality and lets you have high resolution images but \ - uses extra VRAM. Disable if you need the extra VRAM.") - - st.session_state["RealESRGAN_model"] = st.selectbox("RealESRGAN model", st.session_state["RealESRGAN_models"], - index=st.session_state["RealESRGAN_models"].index(st.session_state['defaults'].general.RealESRGAN_model)) + with st.expander("RealESRGAN"): + if st.session_state["upscaling_method"] == "RealESRGAN" and st.session_state['use_upscaling']: + st.session_state["use_RealESRGAN"] = True + else: + st.session_state["use_RealESRGAN"] = False + + st.session_state["RealESRGAN_model"] = st.selectbox("RealESRGAN model", st.session_state["RealESRGAN_models"], + index=st.session_state["RealESRGAN_models"].index(st.session_state['defaults'].general.RealESRGAN_model)) else: st.session_state["use_RealESRGAN"] = False st.session_state["RealESRGAN_model"] = "RealESRGAN_x4plus" - - + + # if st.session_state["LDSR_available"]: - #with st.expander("LDSR"): - st.session_state["use_LDSR"] = st.checkbox("Use LDSR", value=st.session_state['defaults'].txt2img.use_LDSR, - help="Uses the LDSR model to upscale the images after the generation.\ - This greatly improve the quality and lets you have high resolution images but \ - uses extra VRAM. Disable if you need the extra VRAM.") - - st.session_state["LDSR_model"] = st.selectbox("LDSR model", st.session_state["LDSR_models"], - index=st.session_state["LDSR_models"].index(st.session_state['defaults'].general.LDSR_model)) + with st.expander("LDSR"): + if st.session_state["upscaling_method"] == "LDSR" and st.session_state['use_upscaling']: + st.session_state["use_LDSR"] = True + else: + st.session_state["use_LDSR"] = False + + st.session_state["LDSR_model"] = st.selectbox("LDSR model", st.session_state["LDSR_models"], + index=st.session_state["LDSR_models"].index(st.session_state['defaults'].general.LDSR_model)) + + st.session_state["ldsr_sampling_steps"] = int(st.text_input("Sampling Steps", value=st.session_state['defaults'].txt2img.LDSR_config.sampling_steps, + help="")) + + st.session_state["preDownScale"] = int(st.text_input("PreDownScale", value=st.session_state['defaults'].txt2img.LDSR_config.preDownScale, + help="")) + + st.session_state["postDownScale"] = int(st.text_input("postDownScale", value=st.session_state['defaults'].txt2img.LDSR_config.postDownScale, + help="")) + + downsample_method_list = ['Nearest', 'Lanczos'] + st.session_state["downsample_method"] = st.selectbox("Downsample Method", downsample_method_list, + index=downsample_method_list.index(st.session_state['defaults'].txt2img.LDSR_config.downsample_method)) + else: st.session_state["use_LDSR"] = False - st.session_state["LDSR_model"] = "model" - + st.session_state["LDSR_model"] = "model" + with st.expander("Variant"): variant_amount = st.slider("Variant Amount:", value=st.session_state['defaults'].txt2img.variant_amount.value, min_value=st.session_state['defaults'].txt2img.variant_amount.min_value, max_value=st.session_state['defaults'].txt2img.variant_amount.max_value, @@ -375,13 +398,13 @@ def layout(): generate_button = generate_col1.form_submit_button("Generate") # - if generate_button: - + if generate_button: + with col2: with hc.HyLoader('Loading Models...', hc.Loaders.standard_loaders,index=[0]): load_models(use_LDSR=st.session_state["use_LDSR"], LDSR_model=st.session_state["LDSR_model"], - use_GFPGAN=st.session_state["use_GFPGAN"], GFPGAN_model=st.session_state["GFPGAN_model"] , - use_RealESRGAN=st.session_state["use_RealESRGAN"], RealESRGAN_model=st.session_state["RealESRGAN_model"], + use_GFPGAN=st.session_state["use_GFPGAN"], GFPGAN_model=st.session_state["GFPGAN_model"] , + use_RealESRGAN=st.session_state["use_RealESRGAN"], RealESRGAN_model=st.session_state["RealESRGAN_model"], CustomModel_available=server_state["CustomModel_available"], custom_model=st.session_state["custom_model"]) @@ -389,12 +412,12 @@ def layout(): #print(st.session_state['use_LDSR']) #try: # - - output_images, seeds, info, stats = txt2img(prompt, st.session_state.sampling_steps, sampler_name, batch_count, batch_size, + + output_images, seeds, info, stats = txt2img(prompt, st.session_state.sampling_steps, sampler_name, st.session_state["batch_count"], st.session_state["batch_size"], cfg_scale, seed, height, width, separate_prompts, normalize_prompt_weights, save_individual_images, - save_grid, group_by_prompt, save_as_jpg, st.session_state["use_GFPGAN"], st.session_state['GFPGAN_model'], + save_grid, group_by_prompt, save_as_jpg, st.session_state["use_GFPGAN"], st.session_state['GFPGAN_model'], use_RealESRGAN=st.session_state["use_RealESRGAN"], RealESRGAN_model=st.session_state["RealESRGAN_model"], - use_LDSR=st.session_state["use_LDSR"], LDSR_model=st.session_state["LDSR_model"], + use_LDSR=st.session_state["use_LDSR"], LDSR_model=st.session_state["LDSR_model"], variant_amount=variant_amount, variant_seed=variant_seed, write_info_files=write_info_files) message.success('Render Complete: ' + info + '; Stats: ' + stats, icon="✅") @@ -432,11 +455,11 @@ def layout(): #st.session_state['historyTab'] = [history_tab,col1,col2,col3,PlaceHolder,col1_cont,col2_cont,col3_cont] - + with gallery_tab: print(seeds) sdGallery(output_images) - + #except (StopException, KeyError): #print(f"Received Streamlit StopException") diff --git a/scripts/txt2vid.py b/scripts/txt2vid.py index 75335c3fb..773b87e94 100644 --- a/scripts/txt2vid.py +++ b/scripts/txt2vid.py @@ -12,7 +12,7 @@ # GNU Affero General Public License for more details. # You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . +# along with this program. If not, see . # base webui import and utils. from sd_utils import * @@ -54,22 +54,11 @@ pass class plugin_info(): - plugname = "txt2img" + plugname = "txt2vid" description = "Text to Image" isTab = True displayPriority = 1 - -if os.path.exists(os.path.join(st.session_state['defaults'].general.GFPGAN_dir, "experiments", "pretrained_models", "GFPGANv1.3.pth")): - server_state["GFPGAN_available"] = True -else: - server_state["GFPGAN_available"] = False - -if os.path.exists(os.path.join(st.session_state['defaults'].general.RealESRGAN_dir, "experiments","pretrained_models", f"{st.session_state['defaults'].txt2vid.RealESRGAN_model}.pth")): - server_state["RealESRGAN_available"] = True -else: - server_state["RealESRGAN_available"] = False - # # ----------------------------------------------------------------------------- @@ -159,13 +148,13 @@ def diffuse( if st.session_state['defaults'].txt2vid.update_preview_frequency == step_counter or step_counter == st.session_state.sampling_steps: if st.session_state.dynamic_preview_frequency: - st.session_state["current_chunk_speed"], + st.session_state["current_chunk_speed"], st.session_state["previous_chunk_speed_list"], st.session_state['defaults'].txt2vid.update_preview_frequency, st.session_state["avg_update_preview_frequency"] = optimize_update_preview_frequency(st.session_state["current_chunk_speed"], - st.session_state["previous_chunk_speed_list"], - st.session_state['defaults'].txt2vid.update_preview_frequency, - st.session_state["update_preview_frequency_list"]) + st.session_state["previous_chunk_speed_list"], + st.session_state['defaults'].txt2vid.update_preview_frequency, + st.session_state["update_preview_frequency_list"]) #scale and decode the image latents with vae cond_latents_2 = 1 / 0.18215 * cond_latents @@ -177,8 +166,8 @@ def diffuse( st.session_state["preview_image"].image(image2) - step_counter = 0 - + step_counter = 0 + duration = timeit.default_timer() - start st.session_state["current_chunk_speed"] = duration @@ -206,7 +195,7 @@ def diffuse( f"{frames_percent if frames_percent < 100 else 100}% {st.session_state.frame_duration:.2f}{st.session_state.frame_speed}" ) st.session_state["progress_bar"].progress(percent if percent < 100 else 100) - + #scale and decode the image latents with vae cond_latents_2 = 1 / 0.18215 * cond_latents image = pipe.vae.decode(cond_latents_2) @@ -214,32 +203,29 @@ def diffuse( # generate output numpy image as uint8 image = torch.clamp((image["sample"] + 1.0) / 2.0, min=0.0, max=1.0) image2 = transforms.ToPILImage()(image.squeeze_(0)) - + return image2 # -@st.experimental_singleton(show_spinner=False, suppress_st_warning=True) def load_diffusers_model(weights_path,torch_device): with server_state_lock["model"]: if "model" in server_state: del server_state["model"] - + if "textual_inversion" in st.session_state: - del st.session_state['textual_inversion'] - + del st.session_state['textual_inversion'] + try: with server_state_lock["pipe"]: - if not "pipe" in st.session_state or st.session_state["weights_path"] != weights_path: + if "pipe" not in server_state: if ("weights_path" in st.session_state) and st.session_state["weights_path"] != weights_path: del st.session_state["weights_path"] - + st.session_state["weights_path"] = weights_path - # if folder "user_data/model_cache/stable-diffusion-v1-4" exists, load the model from there + # if folder "models/diffusers/stable-diffusion-v1-4" exists, load the model from there if weights_path == "CompVis/stable-diffusion-v1-4": - model_path = os.path.join("user_data", "model_cache", "stable-diffusion-v1-4") - elif weights_path == "hakurei/waifu-diffusion": - model_path = os.path.join("user_data", "model_cache", "waifu-diffusion") + model_path = os.path.join("models", "diffusers", "stable-diffusion-v1-4") if not os.path.exists(model_path + "/model_index.json"): server_state["pipe"] = StableDiffusionPipeline.from_pretrained( @@ -257,17 +243,17 @@ def load_diffusers_model(weights_path,torch_device): torch_dtype=torch.float16 if st.session_state['defaults'].general.use_float16 else None, revision="fp16" if not st.session_state['defaults'].general.no_half else None ) - + server_state["pipe"].unet.to(torch_device) server_state["pipe"].vae.to(torch_device) server_state["pipe"].text_encoder.to(torch_device) - + if st.session_state.defaults.general.enable_attention_slicing: server_state["pipe"].enable_attention_slicing() - - if st.session_state.defaults.general.enable_minimal_memory_usage: + + if st.session_state.defaults.general.enable_minimal_memory_usage: server_state["pipe"].enable_minimal_memory_usage() - + print("Tx2Vid Model Loaded") else: print("Tx2Vid Model already Loaded") @@ -276,7 +262,7 @@ def load_diffusers_model(weights_path,torch_device): "You need a huggingface token in order to use the Text to Video tab. Use the Settings page from the sidebar on the left to add your token." ) raise OSError("You need a huggingface token in order to use the Text to Video tab. Use the Settings page from the sidebar on the left to add your token.") - + # def txt2vid( # -------------------------------------- @@ -400,34 +386,32 @@ def txt2vid( klms_scheduler = LMSDiscreteScheduler( beta_start=beta_start, beta_end=beta_end, beta_schedule=beta_schedule ) - + SCHEDULERS = dict(default=default_scheduler, ddim=ddim_scheduler, klms=klms_scheduler) - # ------------------------------------------------------------------------------ - #st.session_state["progress_bar_text"].text("Loading models...") - with st.session_state["progress_bar_text"].container(): - with hc.HyLoader('Loading Models...', hc.Loaders.standard_loaders,index=[0]): - try: + if "pipe" not in server_state: + with st.session_state["progress_bar_text"].container(): + with hc.HyLoader('Loading Models...', hc.Loaders.standard_loaders,index=[0]): if "model" in st.session_state: del st.session_state["model"] - except: - pass - - #print (st.session_state["weights_path"] != weights_path) - - load_diffusers_model(weights_path, torch_device) - + load_diffusers_model(weights_path, torch_device) + else: + print("Model already loaded") + + if "pipe" not in server_state: + print('wtf') + server_state["pipe"].scheduler = SCHEDULERS[scheduler] - + server_state["pipe"].use_multiprocessing_for_evaluation = False - server_state["pipe"].use_multiprocessed_decoding = False - + server_state["pipe"].use_multiprocessed_decoding = False + if do_loop: prompts = str([prompts, prompts]) seeds = [seeds, seeds] #first_seed, *seeds = seeds #prompts.append(prompts) - #seeds.append(first_seed) + #seeds.append(first_seed) # get the conditional text embeddings based on the prompt text_input = server_state["pipe"].tokenizer(prompts, padding="max_length", max_length=server_state["pipe"].tokenizer.model_max_length, truncation=True, return_tensors="pt") @@ -436,7 +420,7 @@ def txt2vid( # if st.session_state.defaults.general.use_sd_concepts_library: - prompt_tokens = re.findall('<([a-zA-Z0-9-]+)>', prompts) + prompt_tokens = re.findall('<([a-zA-Z0-9-]+)>', prompts) if prompt_tokens: # compviz @@ -449,10 +433,10 @@ def txt2vid( ext = ('pt', 'bin') #print (prompt_tokens) - - if len(prompt_tokens) > 1: + + if len(prompt_tokens) > 1: for token_name in prompt_tokens: - embedding_path = os.path.join(st.session_state['defaults'].general.sd_concepts_library_folder, token_name) + embedding_path = os.path.join(st.session_state['defaults'].general.sd_concepts_library_folder, token_name) if os.path.exists(embedding_path): for files in os.listdir(embedding_path): if files.endswith(ext): @@ -462,7 +446,7 @@ def txt2vid( if os.path.exists(embedding_path): for files in os.listdir(embedding_path): if files.endswith(ext): - load_learned_embed_in_clip(f"{os.path.join(embedding_path, files)}", text_encoder, tokenizer, f"<{prompt_tokens[0]}>") + load_learned_embed_in_clip(f"{os.path.join(embedding_path, files)}", text_encoder, tokenizer, f"<{prompt_tokens[0]}>") # sample a source init1 = torch.randn((1, server_state["pipe"].unet.in_channels, height // 8, width // 8), device=torch_device) @@ -497,19 +481,19 @@ def txt2vid( with autocast("cuda"): image = diffuse(server_state["pipe"], cond_embeddings, init, num_inference_steps, cfg_scale, eta) - + if st.session_state["save_individual_images"] and not st.session_state["use_GFPGAN"] and not st.session_state["use_RealESRGAN"]: #im = Image.fromarray(image) outpath = os.path.join(full_path, 'frame%06d.png' % frame_index) image.save(outpath, quality=quality) - + # send the image to the UI to update it #st.session_state["preview_image"].image(im) - + #append the frames to the frames list so we can use them later. frames.append(np.asarray(image)) - - + + # #try: #if st.session_state["use_GFPGAN"] and server_state["GFPGAN"] is not None and not st.session_state["use_RealESRGAN"]: @@ -521,18 +505,18 @@ def txt2vid( cropped_faces, restored_faces, restored_img = server_state["GFPGAN"].enhance(np.array(image)[:,:,::-1], has_aligned=False, only_center_face=False, paste_back=True) gfpgan_sample = restored_img[:,:,::-1] gfpgan_image = Image.fromarray(gfpgan_sample) - + outpath = os.path.join(full_path, 'frame%06d.png' % frame_index) gfpgan_image.save(outpath, quality=quality) - + #append the frames to the frames list so we can use them later. - frames.append(np.asarray(gfpgan_image)) - + frames.append(np.asarray(gfpgan_image)) + st.session_state["preview_image"].image(gfpgan_image) #except AttributeError: #print("Cant perform GFPGAN, skipping.") #pass - + #increase frame_index counter. frame_index += 1 @@ -599,7 +583,7 @@ def layout(): generate_button = generate_col1.form_submit_button("Generate") # creating the page layout using columns - col1, col2, col3 = st.columns([1,2,1], gap="large") + col1, col2, col3 = st.columns([1,2,1], gap="large") with col1: width = st.slider("Width:", min_value=st.session_state['defaults'].txt2vid.width.min_value, max_value=st.session_state['defaults'].txt2vid.width.max_value, @@ -611,9 +595,10 @@ def layout(): step=st.session_state['defaults'].txt2vid.cfg_scale.step, help="How strongly the image should follow the prompt.") #uploaded_images = st.file_uploader("Upload Image", accept_multiple_files=False, type=["png", "jpg", "jpeg", "webp"], - #help="Upload an image which will be used for the image to image generation.") + #help="Upload an image which will be used for the image to image generation.") seed = st.text_input("Seed:", value=st.session_state['defaults'].txt2vid.seed, help=" The seed to use, if left blank a random seed will be generated.") - #batch_count = st.slider("Batch count.", min_value=1, max_value=100, value=st.session_state['defaults'].txt2vid.batch_count, step=1, help="How many iterations or batches of images to generate in total.") + #batch_count = st.slider("Batch count.", min_value=1, max_value=100, value=st.session_state['defaults'].txt2vid.batch_count, + # step=1, help="How many iterations or batches of images to generate in total.") #batch_size = st.slider("Batch size", min_value=1, max_value=250, value=st.session_state['defaults'].txt2vid.batch_size, step=1, #help="How many images are at once in a batch.\ #It increases the VRAM usage a lot but if you have enough VRAM it can reduce the time it takes to finish generation as more images are generated at once.\ @@ -622,19 +607,20 @@ def layout(): st.session_state["max_frames"] = int(st.text_input("Max Frames:", value=st.session_state['defaults'].txt2vid.max_frames, help="Specify the max number of frames you want to generate.")) with st.expander("Preview Settings"): - st.session_state["update_preview"] = st.checkbox("Update Image Preview", value=st.session_state['defaults'].txt2vid.update_preview, - help="If enabled the image preview will be updated during the generation instead of at the end. \ - You can use the Update Preview \Frequency option bellow to customize how frequent it's updated. \ - By default this is enabled and the frequency is set to 1 step.") + #st.session_state["update_preview"] = st.checkbox("Update Image Preview", value=st.session_state['defaults'].txt2vid.update_preview, + #help="If enabled the image preview will be updated during the generation instead of at the end. \ + #You can use the Update Preview \Frequency option bellow to customize how frequent it's updated. \ + #By default this is enabled and the frequency is set to 1 step.") + st.session_state["update_preview"] = st.session_state["defaults"].general.update_preview st.session_state["update_preview_frequency"] = st.text_input("Update Image Preview Frequency", value=st.session_state['defaults'].txt2vid.update_preview_frequency, help="Frequency in steps at which the the preview image is updated. By default the frequency \ is set to 1 step.") - + # - - - + + + with col2: preview_tab, gallery_tab = st.tabs(["Preview", "Gallery"]) @@ -680,15 +666,13 @@ def layout(): #custom_model = "CompVis/stable-diffusion-v1-4" #st.session_state["weights_path"] = f"CompVis/{slugify(custom_model.lower())}" - st.session_state.sampling_steps = st.slider("Sampling Steps", value=st.session_state['defaults'].txt2vid.sampling_steps.value, + st.session_state.sampling_steps = st.number_input("Sampling Steps", value=st.session_state['defaults'].txt2vid.sampling_steps.value, min_value=st.session_state['defaults'].txt2vid.sampling_steps.min_value, - max_value=st.session_state['defaults'].txt2vid.sampling_steps.max_value, - step=st.session_state['defaults'].txt2vid.sampling_steps.step, help="Number of steps between each pair of sampled points") - - st.session_state.num_inference_steps = st.slider("Inference Steps:", value=st.session_state['defaults'].txt2vid.num_inference_steps.value, + step=st.session_state['defaults'].txt2vid.sampling_steps.step, help="Number of steps between each pair of sampled points") + + st.session_state.num_inference_steps = st.number_input("Inference Steps:", value=st.session_state['defaults'].txt2vid.num_inference_steps.value, min_value=st.session_state['defaults'].txt2vid.num_inference_steps.min_value, step=st.session_state['defaults'].txt2vid.num_inference_steps.step, - max_value=st.session_state['defaults'].txt2vid.num_inference_steps.max_value, help="Higher values (e.g. 100, 200 etc) can create better images.") #sampler_name_list = ["k_lms", "k_euler", "k_euler_a", "k_dpm_2", "k_dpm_2_a", "k_heun", "PLMS", "DDIM"] @@ -710,53 +694,125 @@ def layout(): #help="Press the Enter key to summit, when 'No' is selected you can use the Enter key to write multiple lines.") with st.expander("Advanced"): - st.session_state["separate_prompts"] = st.checkbox("Create Prompt Matrix.", value=st.session_state['defaults'].txt2vid.separate_prompts, - help="Separate multiple prompts using the `|` character, and get all combinations of them.") - st.session_state["normalize_prompt_weights"] = st.checkbox("Normalize Prompt Weights.", - value=st.session_state['defaults'].txt2vid.normalize_prompt_weights, help="Ensure the sum of all weights add up to 1.0") - st.session_state["save_individual_images"] = st.checkbox("Save individual images.", - value=st.session_state['defaults'].txt2vid.save_individual_images, - help="Save each image generated before any filter or enhancement is applied.") - st.session_state["save_video"] = st.checkbox("Save video",value=st.session_state['defaults'].txt2vid.save_video, - help="Save a video with all the images generated as frames at the end of the generation.") - - st.session_state["group_by_prompt"] = st.checkbox("Group results by prompt", value=st.session_state['defaults'].txt2vid.group_by_prompt, - help="Saves all the images with the same prompt into the same folder. When using a prompt matrix each prompt combination will have its own folder.") - st.session_state["write_info_files"] = st.checkbox("Write Info file", value=st.session_state['defaults'].txt2vid.write_info_files, - help="Save a file next to the image with informartion about the generation.") - st.session_state["dynamic_preview_frequency"] = st.checkbox("Dynamic Preview Frequency", value=st.session_state['defaults'].txt2vid.dynamic_preview_frequency, - help="This option tries to find the best value at which we can update \ - the preview image during generation while minimizing the impact it has in performance. Default: True") - st.session_state["do_loop"] = st.checkbox("Do Loop", value=st.session_state['defaults'].txt2vid.do_loop, - help="Do loop") - st.session_state["save_as_jpg"] = st.checkbox("Save samples as jpg", value=st.session_state['defaults'].txt2vid.save_as_jpg, help="Saves the images as jpg instead of png.") - - if server_state["GFPGAN_available"]: - st.session_state["use_GFPGAN"] = st.checkbox("Use GFPGAN", value=st.session_state['defaults'].txt2vid.use_GFPGAN, - help="Uses the GFPGAN model to improve faces after the generation. This greatly improve the quality and consistency \ - of faces but uses extra VRAM. Disable if you need the extra VRAM.") - else: - st.session_state["use_GFPGAN"] = False - - if server_state["RealESRGAN_available"]: - st.session_state["use_RealESRGAN"] = st.checkbox("Use RealESRGAN", value=st.session_state['defaults'].txt2vid.use_RealESRGAN, - help="Uses the RealESRGAN model to upscale the images after the generation. \ - This greatly improve the quality and lets you have high resolution images but \ - uses extra VRAM. Disable if you need the extra VRAM.") - st.session_state["RealESRGAN_model"] = st.selectbox("RealESRGAN model", ["RealESRGAN_x4plus", "RealESRGAN_x4plus_anime_6B"], index=0) - else: - st.session_state["use_RealESRGAN"] = False - st.session_state["RealESRGAN_model"] = "RealESRGAN_x4plus" - + with st.expander("Output Settings"): + st.session_state["separate_prompts"] = st.checkbox("Create Prompt Matrix.", value=st.session_state['defaults'].txt2vid.separate_prompts, + help="Separate multiple prompts using the `|` character, and get all combinations of them.") + st.session_state["normalize_prompt_weights"] = st.checkbox("Normalize Prompt Weights.", + value=st.session_state['defaults'].txt2vid.normalize_prompt_weights, help="Ensure the sum of all weights add up to 1.0") + st.session_state["save_individual_images"] = st.checkbox("Save individual images.", + value=st.session_state['defaults'].txt2vid.save_individual_images, + help="Save each image generated before any filter or enhancement is applied.") + st.session_state["save_video"] = st.checkbox("Save video",value=st.session_state['defaults'].txt2vid.save_video, + help="Save a video with all the images generated as frames at the end of the generation.") + + st.session_state["group_by_prompt"] = st.checkbox("Group results by prompt", value=st.session_state['defaults'].txt2vid.group_by_prompt, + help="Saves all the images with the same prompt into the same folder. When using a prompt matrix each prompt combination will have its own folder.") + st.session_state["write_info_files"] = st.checkbox("Write Info file", value=st.session_state['defaults'].txt2vid.write_info_files, + help="Save a file next to the image with informartion about the generation.") + st.session_state["dynamic_preview_frequency"] = st.checkbox("Dynamic Preview Frequency", value=st.session_state['defaults'].txt2vid.dynamic_preview_frequency, + help="This option tries to find the best value at which we can update \ + the preview image during generation while minimizing the impact it has in performance. Default: True") + st.session_state["do_loop"] = st.checkbox("Do Loop", value=st.session_state['defaults'].txt2vid.do_loop, + help="Do loop") + st.session_state["save_as_jpg"] = st.checkbox("Save samples as jpg", value=st.session_state['defaults'].txt2vid.save_as_jpg, help="Saves the images as jpg instead of png.") + + # + if "GFPGAN_available" not in st.session_state: + GFPGAN_available() + + if "RealESRGAN_available" not in st.session_state: + RealESRGAN_available() + + if "LDSR_available" not in st.session_state: + LDSR_available() + + if st.session_state["GFPGAN_available"] or st.session_state["RealESRGAN_available"] or st.session_state["LDSR_available"]: + with st.expander("Post-Processing"): + face_restoration_tab, upscaling_tab = st.tabs(["Face Restoration", "Upscaling"]) + with face_restoration_tab: + # GFPGAN used for face restoration + if st.session_state["GFPGAN_available"]: + #with st.expander("Face Restoration"): + #if st.session_state["GFPGAN_available"]: + #with st.expander("GFPGAN"): + st.session_state["use_GFPGAN"] = st.checkbox("Use GFPGAN", value=st.session_state['defaults'].txt2vid.use_GFPGAN, + help="Uses the GFPGAN model to improve faces after the generation.\ + This greatly improve the quality and consistency of faces but uses\ + extra VRAM. Disable if you need the extra VRAM.") + + st.session_state["GFPGAN_model"] = st.selectbox("GFPGAN model", st.session_state["GFPGAN_models"], + index=st.session_state["GFPGAN_models"].index(st.session_state['defaults'].general.GFPGAN_model)) + + #st.session_state["GFPGAN_strenght"] = st.slider("Effect Strenght", min_value=1, max_value=100, value=1, step=1, help='') + + else: + st.session_state["use_GFPGAN"] = False + + with upscaling_tab: + st.session_state['us_upscaling'] = st.checkbox("Use Upscaling", value=st.session_state['defaults'].txt2vid.use_upscaling) + # RealESRGAN and LDSR used for upscaling. + if st.session_state["RealESRGAN_available"] or st.session_state["LDSR_available"]: + + upscaling_method_list = [] + if st.session_state["RealESRGAN_available"]: + upscaling_method_list.append("RealESRGAN") + if st.session_state["LDSR_available"]: + upscaling_method_list.append("LDSR") + + st.session_state["upscaling_method"] = st.selectbox("Upscaling Method", upscaling_method_list, + index=upscaling_method_list.index(st.session_state['defaults'].general.upscaling_method)) + + if st.session_state["RealESRGAN_available"]: + with st.expander("RealESRGAN"): + if st.session_state["upscaling_method"] == "RealESRGAN" and st.session_state['us_upscaling']: + st.session_state["use_RealESRGAN"] = True + else: + st.session_state["use_RealESRGAN"] = False + + st.session_state["RealESRGAN_model"] = st.selectbox("RealESRGAN model", st.session_state["RealESRGAN_models"], + index=st.session_state["RealESRGAN_models"].index(st.session_state['defaults'].general.RealESRGAN_model)) + else: + st.session_state["use_RealESRGAN"] = False + st.session_state["RealESRGAN_model"] = "RealESRGAN_x4plus" + + + # + if st.session_state["LDSR_available"]: + with st.expander("LDSR"): + if st.session_state["upscaling_method"] == "LDSR" and st.session_state['us_upscaling']: + st.session_state["use_LDSR"] = True + else: + st.session_state["use_LDSR"] = False + + st.session_state["LDSR_model"] = st.selectbox("LDSR model", st.session_state["LDSR_models"], + index=st.session_state["LDSR_models"].index(st.session_state['defaults'].general.LDSR_model)) + + st.session_state["ldsr_sampling_steps"] = st.number_input("Sampling Steps", value=st.session_state['defaults'].txt2vid.LDSR_config.sampling_steps, + help="") + + st.session_state["preDownScale"] = st.number_input("PreDownScale", value=st.session_state['defaults'].txt2vid.LDSR_config.preDownScale, + help="") + + st.session_state["postDownScale"] = st.number_input("postDownScale", value=st.session_state['defaults'].txt2vid.LDSR_config.postDownScale, + help="") + + downsample_method_list = ['Nearest', 'Lanczos'] + st.session_state["downsample_method"] = st.selectbox("Downsample Method", downsample_method_list, + index=downsample_method_list.index(st.session_state['defaults'].txt2vid.LDSR_config.downsample_method)) + + else: + st.session_state["use_LDSR"] = False + st.session_state["LDSR_model"] = "model" + with st.expander("Variant"): - st.session_state["variant_amount"] = st.slider("Variant Amount:", value=st.session_state['defaults'].txt2vid.variant_amount.value, + st.session_state["variant_amount"] = st.number_input("Variant Amount:", value=st.session_state['defaults'].txt2vid.variant_amount.value, min_value=st.session_state['defaults'].txt2vid.variant_amount.min_value, max_value=st.session_state['defaults'].txt2vid.variant_amount.max_value, step=st.session_state['defaults'].txt2vid.variant_amount.step) - - st.session_state["variant_seed"] = st.text_input("Variant Seed:", value=st.session_state['defaults'].txt2vid.seed, + + st.session_state["variant_seed"] = st.text_input("Variant Seed:", value=st.session_state['defaults'].txt2vid.seed, help="The seed to use when generating a variant, if left blank a random seed will be generated.") - + #st.session_state["beta_start"] = st.slider("Beta Start:", value=st.session_state['defaults'].txt2vid.beta_start.value, #min_value=st.session_state['defaults'].txt2vid.beta_start.min_value, #max_value=st.session_state['defaults'].txt2vid.beta_start.max_value, @@ -769,7 +825,7 @@ def layout(): #print("Loading models") # load the models when we hit the generate button for the first time, it wont be loaded after that so dont worry. #load_models(False, st.session_state["use_GFPGAN"], True, st.session_state["RealESRGAN_model"]) - + if st.session_state["use_GFPGAN"]: if "GFPGAN" in st.session_state: print("GFPGAN already loaded") @@ -784,22 +840,22 @@ def layout(): except Exception: import traceback print("Error loading GFPGAN:", file=sys.stderr) - print(traceback.format_exc(), file=sys.stderr) + print(traceback.format_exc(), file=sys.stderr) else: if "GFPGAN" in st.session_state: - del server_state["GFPGAN"] + del server_state["GFPGAN"] #try: # run video generation video, seed, info, stats = txt2vid(prompts=prompt, gpu=st.session_state["defaults"].general.gpu, num_steps=st.session_state.sampling_steps, max_frames=int(st.session_state.max_frames), - num_inference_steps=st.session_state.num_inference_steps, - cfg_scale=cfg_scale,do_loop=st.session_state["do_loop"], - seeds=seed, quality=100, eta=0.0, width=width, - height=height, weights_path=custom_model, scheduler=scheduler_name, - disable_tqdm=False, beta_start=st.session_state['defaults'].txt2vid.beta_start.value, - beta_end=st.session_state['defaults'].txt2vid.beta_end.value, - beta_schedule=beta_scheduler_type, starting_image=None) + num_inference_steps=st.session_state.num_inference_steps, + cfg_scale=cfg_scale,do_loop=st.session_state["do_loop"], + seeds=seed, quality=100, eta=0.0, width=width, + height=height, weights_path=custom_model, scheduler=scheduler_name, + disable_tqdm=False, beta_start=st.session_state['defaults'].txt2vid.beta_start.value, + beta_end=st.session_state['defaults'].txt2vid.beta_end.value, + beta_schedule=beta_scheduler_type, starting_image=None) #message.success('Done!', icon="✅") message.success('Render Complete: ' + info + '; Stats: ' + stats, icon="✅") diff --git a/scripts/webui.py b/scripts/webui.py index 4178246ef..263f9d7d3 100644 --- a/scripts/webui.py +++ b/scripts/webui.py @@ -33,12 +33,12 @@ parser.add_argument("--extra-models-cpu", action='store_true', help="run extra models (GFGPAN/ESRGAN) on cpu", default=False) parser.add_argument("--extra-models-gpu", action='store_true', help="run extra models (GFGPAN/ESRGAN) on gpu", default=False) parser.add_argument("--gfpgan-cpu", action='store_true', help="run GFPGAN on cpu", default=False) -parser.add_argument("--gfpgan-dir", type=str, help="GFPGAN directory", default=('./src/gfpgan' if os.path.exists('./src/gfpgan') else './GFPGAN')) # i disagree with where you're putting it but since all guidefags are doing it this way, there you go +parser.add_argument("--gfpgan-dir", type=str, help="GFPGAN directory", default=('./models/gfpgan' if os.path.exists('./models/gfpgan') else './GFPGAN')) # i disagree with where you're putting it but since all guidefags are doing it this way, there you go parser.add_argument("--gfpgan-gpu", type=int, help="run GFPGAN on specific gpu (overrides --gpu) ", default=0) parser.add_argument("--gpu", type=int, help="choose which GPU to use if you have multiple", default=0) parser.add_argument("--grid-format", type=str, help="png for lossless png files; jpg:quality for lossy jpeg; webp:quality for lossy webp, or webp:-compression for lossless webp", default="jpg:95") parser.add_argument("--inbrowser", action='store_true', help="automatically launch the interface in a new tab on the default browser", default=False) -parser.add_argument("--ldsr-dir", type=str, help="LDSR directory", default=('./src/latent-diffusion' if os.path.exists('./src/latent-diffusion') else './LDSR')) +parser.add_argument("--ldsr-dir", type=str, help="LDSR directory", default=('./models/ldsr' if os.path.exists('./models/ldsr') else './LDSR')) parser.add_argument("--n_rows", type=int, default=-1, help="rows in the grid; use -1 for autodetect and 0 for n_rows to be same as batch_size (default: -1)",) parser.add_argument("--no-half", action='store_true', help="do not switch the model to 16-bit floats", default=False) parser.add_argument("--no-progressbar-hiding", action='store_true', help="do not hide progressbar in gradio UI (we hide it because it slows down ML if you have hardware accleration in browser)", default=False) @@ -53,7 +53,7 @@ parser.add_argument("--filename_format", type=str, nargs="?", help="filenames format", default=None) parser.add_argument("--port", type=int, help="choose the port for the gradio webserver to use", default=7860) parser.add_argument("--precision", type=str, help="evaluate at this precision", choices=["full", "autocast"], default="autocast") -parser.add_argument("--realesrgan-dir", type=str, help="RealESRGAN directory", default=('./src/realesrgan' if os.path.exists('./src/realesrgan') else './RealESRGAN')) +parser.add_argument("--realesrgan-dir", type=str, help="RealESRGAN directory", default=('./models/realesrgan' if os.path.exists('./models/realesrgan') else './RealESRGAN')) parser.add_argument("--realesrgan-model", type=str, help="Upscaling model for RealESRGAN", default=('RealESRGAN_x4plus')) parser.add_argument("--save-metadata", action='store_true', help="Store generation parameters in the output png. Drop saved png into Image Lab to read parameters", default=False) parser.add_argument("--share-password", type=str, help="Sharing is open by default, use this to set a password. Username: webui", default=None) @@ -71,7 +71,10 @@ parser.add_argument('--horde_url', action="store", required=False, type=str, help="The SH Horde URL. Where the bridge will pickup prompts and send the finished generations.") parser.add_argument('--horde_priority_usernames',type=str, action='append', required=False, help="Usernames which get priority use in this horde instance. The owner's username is always in this list.") parser.add_argument('--horde_max_power',type=int, required=False, help="How much power this instance has to generate pictures. Min: 2") -parser.add_argument('--horde_nsfw', action='store_true', required=False, help="Set to false if you do not want this worker generating NSFW images.") +parser.add_argument('--horde_sfw', action='store_true', required=False, help="Set to true if you do not want this worker generating NSFW images.") +parser.add_argument('--horde_blacklist', nargs='+', required=False, help="List the words that you want to blacklist.") +parser.add_argument('--horde_censorlist', nargs='+', required=False, help="List the words that you want to censor.") +parser.add_argument('--horde_censor_nsfw', action='store_true', required=False, help="Set to true if you want this bridge worker to censor NSFW images.") opt = parser.parse_args() #Should not be needed anymore @@ -369,8 +372,8 @@ def torch_gc(): def load_LDSR(checking=False): model_name = 'model' yaml_name = 'project' - model_path = os.path.join(LDSR_dir, 'experiments/pretrained_models', model_name + '.ckpt') - yaml_path = os.path.join(LDSR_dir, 'experiments/pretrained_models', yaml_name + '.yaml') + model_path = os.path.join(LDSR_dir, model_name + '.ckpt') + yaml_path = os.path.join(LDSR_dir, yaml_name + '.yaml') if not os.path.isfile(model_path): raise Exception("LDSR model not found at path "+model_path) if not os.path.isfile(yaml_path): @@ -384,7 +387,7 @@ def load_LDSR(checking=False): return LDSRObject def load_GFPGAN(checking=False): model_name = 'GFPGANv1.3' - model_path = os.path.join(GFPGAN_dir, 'experiments/pretrained_models', model_name + '.pth') + model_path = os.path.join(GFPGAN_dir, model_name + '.pth') if not os.path.isfile(model_path): raise Exception("GFPGAN model not found at path "+model_path) if checking == True: @@ -407,7 +410,7 @@ def load_RealESRGAN(model_name: str, checking = False): 'RealESRGAN_x4plus_anime_6B': RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4) } - model_path = os.path.join(RealESRGAN_dir, 'experiments/pretrained_models', model_name + '.pth') + model_path = os.path.join(RealESRGAN_dir, model_name + '.pth') if not os.path.isfile(model_path): raise Exception(model_name+".pth not found at path "+model_path) if checking == True: @@ -467,7 +470,7 @@ def try_loading_LDSR(model_name: str,checking=False): print("Error loading LDSR:", file=sys.stderr) print(traceback.format_exc(), file=sys.stderr) else: - print("LDSR not found at path, please make sure you have cloned the LDSR repo to ./src/latent-diffusion/") + print("LDSR not found at path, please make sure you have cloned the LDSR repo to ./models/ldsr/") try_loading_LDSR('model',checking=True) def load_SD_model(): @@ -2649,16 +2652,25 @@ def run_headless(): print() @logger.catch -def run_bridge(interval, api_key, horde_name, horde_url, priority_usernames, horde_max_pixels, horde_nsfw): +def run_bridge(interval, api_key, horde_name, horde_url, priority_usernames, horde_max_pixels, horde_nsfw, horde_censor_nsfw, horde_blacklist, horde_censorlist): current_id = None current_payload = None loop_retry = 0 while True: + if loop_retry > 10 and current_id: + logger.error(f"Exceeded retry count {loop_retry} for generation id {current_id}. Aborting generation!") + current_id = None + current_payload = None + current_generation = None + loop_retry = 0 + elif current_id: + logger.debug(f"Retrying ({loop_retry}/10) for generation id {current_id}...") gen_dict = { "name": horde_name, "max_pixels": horde_max_pixels, "priority_usernames": priority_usernames, "nsfw": horde_nsfw, + "blacklist": horde_blacklist, } headers = {"apikey": api_key} if current_id: @@ -2708,8 +2720,18 @@ def run_bridge(interval, api_key, horde_name, horde_url, priority_usernames, hor current_id = None current_payload = None current_generation = None + loop_retry = 0 time.sleep(10) continue + current_payload['toggles'] = current_payload.get('toggles', [1,4]) + # In bridge-mode, matrix is prepared on the horde and split in multiple nodes + if 0 in current_payload['toggles']: + current_payload['toggles'].remove(0) + if 8 not in current_payload['toggles']: + if horde_censor_nsfw and not horde_nsfw: + current_payload['toggles'].append(8) + elif any(word in current_payload['prompt'] for word in horde_censorlist): + current_payload['toggles'].append(8) images, seed, info, stats = txt2img(**current_payload) buffer = BytesIO() # We send as WebP to avoid using all the horde bandwidth @@ -2723,19 +2745,19 @@ def run_bridge(interval, api_key, horde_name, horde_url, priority_usernames, hor "max_pixels": horde_max_pixels, } current_generation = seed - while current_id and current_generation: + while current_id and current_generation != None: try: submit_req = requests.post(horde_url + '/api/v2/generate/submit', json = submit_dict, headers = headers) try: submit = submit_req.json() except json.decoder.JSONDecodeError: - logger.error(f"Something has gone wrong with {horde_url} during submit. Please inform its administrator!") + logger.error(f"Something has gone wrong with {horde_url} during submit. Please inform its administrator! (Retry {loop_retry}/10)") time.sleep(interval) continue if submit_req.status_code == 404: logger.warning(f"The generation we were working on got stale. Aborting!") elif not submit_req.ok: - logger.warning(f"During gen submit, server {horde_url} responded with status code {submit_req.status_code}: {submit['message']}. Waiting for 10 seconds...") + logger.warning(f"During gen submit, server {horde_url} responded with status code {submit_req.status_code}: {submit['message']}. Waiting for 10 seconds... (Retry {loop_retry}/10)") if 'errors' in submit: logger.warning(f"Detailed Request Errors: {submit['errors']}") time.sleep(10) @@ -2745,8 +2767,9 @@ def run_bridge(interval, api_key, horde_name, horde_url, priority_usernames, hor current_id = None current_payload = None current_generation = None + loop_retry = 0 except requests.exceptions.ConnectionError: - logger.warning(f"Server {horde_url} unavailable during submit. Waiting 10 seconds...") + logger.warning(f"Server {horde_url} unavailable during submit. Waiting 10 seconds... (Retry {loop_retry}/10)") time.sleep(10) continue time.sleep(interval) @@ -2782,15 +2805,27 @@ def __init__(self): horde_priority_usernames = opt.horde_priority_usernames if opt.horde_priority_usernames else cd.horde_priority_usernames horde_max_power = opt.horde_max_power if opt.horde_max_power else cd.horde_max_power try: - horde_nsfw = opt.horde_nsfw if opt.horde_nsfw else cd.horde_nsfw + horde_nsfw = not opt.horde_sfw if opt.horde_sfw else cd.horde_nsfw except AttributeError: horde_nsfw = True + try: + horde_censor_nsfw = opt.horde_censor_nsfw if opt.horde_censor_nsfw else cd.horde_censor_nsfw + except AttributeError: + horde_censor_nsfw = False + try: + horde_blacklist = opt.horde_blacklist if opt.horde_blacklist else cd.horde_blacklist + except AttributeError: + horde_blacklist = [] + try: + horde_censorlist = opt.horde_censorlist if opt.horde_censorlist else cd.horde_censorlist + except AttributeError: + horde_censorlist = [] if horde_max_power < 2: horde_max_power = 2 horde_max_pixels = 64*64*8*horde_max_power logger.info(f"Joining Horde with parameters: API Key '{horde_api_key}'. Server Name '{horde_name}'. Horde URL '{horde_url}'. Max Pixels {horde_max_pixels}") try: - run_bridge(1, horde_api_key, horde_name, horde_url, horde_priority_usernames, horde_max_pixels, horde_nsfw) + run_bridge(1, horde_api_key, horde_name, horde_url, horde_priority_usernames, horde_max_pixels, horde_nsfw, horde_censor_nsfw, horde_blacklist, horde_censorlist) except KeyboardInterrupt: logger.info(f"Keyboard Interrupt Received. Ending Bridge") else: diff --git a/scripts/webui_streamlit.py b/scripts/webui_streamlit.py index 0950a0a66..63f287f63 100644 --- a/scripts/webui_streamlit.py +++ b/scripts/webui_streamlit.py @@ -100,13 +100,13 @@ def layout(): # check if the models exist on their respective folders with server_state_lock["GFPGAN_available"]: - if os.path.exists(os.path.join(st.session_state["defaults"].general.GFPGAN_dir, "experiments", "pretrained_models", "GFPGANv1.3.pth")): + if os.path.exists(os.path.join(st.session_state["defaults"].general.GFPGAN_dir, f"{st.session_state['defaults'].general.GFPGAN_model}.pth")): server_state["GFPGAN_available"] = True else: server_state["GFPGAN_available"] = False with server_state_lock["RealESRGAN_available"]: - if os.path.exists(os.path.join(st.session_state["defaults"].general.RealESRGAN_dir, "experiments","pretrained_models", f"{st.session_state['defaults'].general.RealESRGAN_model}.pth")): + if os.path.exists(os.path.join(st.session_state["defaults"].general.RealESRGAN_dir, f"{st.session_state['defaults'].general.RealESRGAN_model}.pth")): server_state["RealESRGAN_available"] = True else: server_state["RealESRGAN_available"] = False @@ -124,7 +124,10 @@ def layout(): if tabs =='Stable Diffusion': # set the page url and title st.experimental_set_query_params(page='stable-diffusion') - set_page_title("Stable Diffusion Playground") + try: + set_page_title("Stable Diffusion Playground") + except NameError: + st.experimental_rerun() txt2img_tab, img2img_tab, txt2vid_tab, img2txt_tab, concept_library_tab = st.tabs(["Text-to-Image", "Image-to-Image", "Text-to-Video", "Image-To-Text", diff --git a/webui.sh b/webui.sh index 624cb2b39..7fdce4397 100755 --- a/webui.sh +++ b/webui.sh @@ -104,35 +104,35 @@ sd_model_loading () { # Checks to see if the upscaling models exist in their correct locations. If they do not they will be downloaded as required post_processor_model_loading () { # Check to see if GFPGAN has been added yet, if not it will download it and place it in the proper directory - if [ -f "$DIRECTORY/src/gfpgan/experiments/pretrained_models/GFPGANv1.3.pth" ]; then + if [ -f "$DIRECTORY/models/gfpgan/GFPGANv1.3.pth" ]; then printf "GFPGAN already exists. Continuing...\n\n" else printf "Downloading GFPGAN model. Please wait...\n" - wget $GFPGAN_MODEL -P $DIRECTORY/src/gfpgan/experiments/pretrained_models + wget $GFPGAN_MODEL -P $DIRECTORY/models/gfpgan fi # Check to see if realESRGAN has been added yet, if not it will download it and place it in the proper directory - if [ -f "$DIRECTORY/src/realesrgan/experiments/pretrained_models/RealESRGAN_x4plus.pth" ]; then + if [ -f "$DIRECTORY/models/realesrgan/RealESRGAN_x4plus.pth" ]; then printf "realESRGAN already exists. Continuing...\n\n" else printf "Downloading realESRGAN model. Please wait...\n" - wget $REALESRGAN_MODEL -P $DIRECTORY/src/realesrgan/experiments/pretrained_models - wget $REALESRGAN_ANIME_MODEL -P $DIRECTORY/src/realesrgan/experiments/pretrained_models + wget $REALESRGAN_MODEL -P $DIRECTORY/models/realesrgan + wget $REALESRGAN_ANIME_MODEL -P $DIRECTORY/models/realesrgan fi # Check to see if LDSR has been added yet, if not it will be cloned and its models downloaded to the correct directory - if [ -f "$DIRECTORY/src/latent-diffusion/experiments/pretrained_models/model.ckpt" ]; then + if [ -f "$DIRECTORY/models/ldsr/model.ckpt" ]; then printf "LDSR already exists. Continuing...\n\n" else printf "Cloning LDSR and downloading model. Please wait...\n" git clone $LATENT_DIFFUSION_REPO - mv latent-diffusion $DIRECTORY/src/latent-diffusion - mkdir $DIRECTORY/src/latent-diffusion/experiments - mkdir $DIRECTORY/src/latent-diffusion/experiments/pretrained_models - wget $LSDR_CONFIG -P $DIRECTORY/src/latent-diffusion/experiments/pretrained_models - mv $DIRECTORY/src/latent-diffusion/experiments/pretrained_models/index.html?dl=1 $DIRECTORY/src/latent-diffusion/experiments/pretrained_models/project.yaml - wget $LSDR_MODEL -P $DIRECTORY/src/latent-diffusion/experiments/pretrained_models - mv $DIRECTORY/src/latent-diffusion/experiments/pretrained_models/index.html?dl=1 $DIRECTORY/src/latent-diffusion/experiments/pretrained_models/model.ckpt + mv latent-diffusion $DIRECTORY/models/ldsr + mkdir $DIRECTORY/models/ldsr/experiments + mkdir $DIRECTORY/models/ldsr + wget $LSDR_CONFIG -P $DIRECTORY/models/ldsr + mv $DIRECTORY/models/ldsr/index.html?dl=1 $DIRECTORY/models/ldsr/project.yaml + wget $LSDR_MODEL -P $DIRECTORY/models/ldsr + mv $DIRECTORY/models/ldsr/index.html?dl=1 $DIRECTORY/models/ldsr/model.ckpt fi # Check to see if SD Concepts has been added yet, if not it will download it and place it in the proper directory