The Merge (#1705)

Sygil-Dev · Dec 5, 2022 · 5291437 · 5291437
2 parents 09b64d4 + d90c44c
commit 5291437
Show file tree

Hide file tree

Showing 83 changed files with 12,064 additions and 2,043 deletions.
diff --git a/.gitignore b/.gitignore
@@ -54,10 +54,12 @@ condaenv.*.requirements.txt
 # 	Repo-specific
 # =========================================================================== #
 /configs/webui/userconfig_streamlit.yaml
+/configs/webui/userconfig_flet.yaml
 /custom-conda-path.txt
 !/src/components/*
 !/src/pages/*
 /src/*
+/inputs
 /outputs
 /model_cache
 /log/**/*.png

diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,24 @@
+[submodule "backend"]
+	path = backend
+	url = ../../Sygil-Dev/dalle-flow.git
+[submodule "backend/clip-as-service"]
+	path = backend/clip-as-service
+	url = ../../jina-ai/clip-as-service.git
+[submodule "backend/clipseg"]
+	path = backend/clipseg
+	url = ../../timojl/clipseg.git
+[submodule "backend/dalle_flow"]
+	path = backend/dalle_flow
+	url = ../../Sygil-Dev/dalle-flow.git
+[submodule "backend/glid-3-xl"]
+	path = backend/glid-3-xl
+	url = ../../jina-ai/glid-3-xl.git
+[submodule "backend/latent-diffusion"]
+	path = backend/latent-diffusion
+	url = ../../CompVis/latent-diffusion.git
+[submodule "backend/stable-diffusion"]
+	path = backend/stable-diffusion
+	url = ../../AmericanPresidentJimmyCarter/stable-diffusion.git
+[submodule "backend/SwinIR"]
+	path = backend/SwinIR
+	url = ../../jina-ai/SwinIR.git
diff --git a/Dockerfile b/Dockerfile
@@ -9,15 +9,17 @@ SHELL ["/bin/bash", "-c"]
 ENV PYTHONPATH=/sd
 
 EXPOSE 8501
+COPY ./entrypoint.sh /sd/
 COPY ./data/DejaVuSans.ttf /usr/share/fonts/truetype/
 COPY ./data/ /sd/data/
 copy ./images/ /sd/images/
 copy ./scripts/ /sd/scripts/
 copy ./ldm/ /sd/ldm/
 copy ./frontend/ /sd/frontend/
 copy ./configs/ /sd/configs/
+copy ./configs/webui/webui_streamlit.yaml /sd/configs/webui/userconfig_streamlit.yaml
 copy ./.streamlit/ /sd/.streamlit/
-COPY ./entrypoint.sh /sd/
+copy ./optimizedSD/ /sd/optimizedSD/
 ENTRYPOINT /sd/entrypoint.sh
 
 RUN mkdir -p ~/.streamlit/

diff --git a/Dockerfile_base b/Dockerfile_base
@@ -6,11 +6,12 @@ SHELL ["/bin/bash", "-c"]
 WORKDIR /install
 
 RUN apt-get update && \
-    apt-get install -y wget curl git build-essential zip unzip nano openssh-server libgl1 && \
+    apt-get install -y wget curl git build-essential zip unzip nano openssh-server libgl1 libsndfile1-dev && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
 COPY ./requirements.txt /install/
+COPY ./setup.py /install/
 
 RUN /opt/conda/bin/python -m pip install -r /install/requirements.txt
 

diff --git a/Dockerfile_runpod b/Dockerfile_runpod
@@ -9,17 +9,19 @@ SHELL ["/bin/bash", "-c"]
 ENV PYTHONPATH=/sd
 
 EXPOSE 8501
+COPY ./runpod_entrypoint.sh /sd/entrypoint.sh
 COPY ./data/DejaVuSans.ttf /usr/share/fonts/truetype/
 COPY ./configs/ /sd/configs/
+copy ./configs/webui/webui_streamlit.yaml /sd/configs/webui/userconfig_streamlit.yaml
 COPY ./data/ /sd/data/
 COPY ./frontend/ /sd/frontend/
 COPY ./gfpgan/ /sd/gfpgan/
 COPY ./images/ /sd/images/
 COPY ./ldm/ /sd/ldm/
 COPY ./models/ /sd/models/
+copy ./optimizedSD/ /sd/optimizedSD/
 COPY ./scripts/ /sd/scripts/
 COPY ./.streamlit/ /sd/.streamlit/
-COPY ./runpod_entrypoint.sh /sd/entrypoint.sh
 ENTRYPOINT /sd/entrypoint.sh
 
 RUN mkdir -p ~/.streamlit/

diff --git a/README.md b/README.md
@@ -6,8 +6,8 @@
 
 ## Installation instructions for:
 
-- **[Windows](https://sygil-dev.github.io/sygil-webui/docs/1.windows-installation.html)** 
-- **[Linux](https://sygil-dev.github.io/sygil-webui/docs/2.linux-installation.html)**
+- **[Windows](https://sygil-dev.github.io/sygil-webui/docs/Installation/windows-installation)** 
+- **[Linux](https://sygil-dev.github.io/sygil-webui/docs/Installation/linux-installation)**
 
 ### Want to ask a question or request a feature?
 
@@ -118,7 +118,7 @@ Please see the [Streamlit Documentation](docs/4.streamlit-interface.md) to learn
 
 **Note: the Gradio interface is no longer being actively developed by Sygil.Dev and is only receiving bug fixes.**
 
-Please see the [Gradio Documentation](docs/5.gradio-interface.md) to learn more.
+Please see the [Gradio Documentation](https://sygil-dev.github.io/sygil-webui/docs/Gradio/gradio-interface/) to learn more.
 
 ## Image Upscalers
 
@@ -146,13 +146,13 @@ Put them into the `sygil-webui/models/realesrgan` directory.
 
 ### LSDR
 
-Download **LDSR** [project.yaml](https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1) and [model last.cpkt](https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1). Rename last.ckpt to model.ckpt and place both under `sygil-webui/models/ldsr/`
+Download **LDSR** [project.yaml](https://heibox.uni-heidelberg.de/f/31a76b13ea27482981b4/?dl=1) and [model last.cpkt](https://heibox.uni-heidelberg.de/f/578df07c8fc04ffbadf3/?dl=1). Rename `last.ckpt` to `model.ckpt` and place both under `sygil-webui/models/ldsr/`
 
 ### GoBig, and GoLatent *(Currently on the Gradio version Only)*
 
-More powerful upscalers that uses a seperate Latent Diffusion model to more cleanly upscale images.
+More powerful upscalers that uses a separate Latent Diffusion model to more cleanly upscale images.
 
-Please see the [Image Enhancers Documentation](docs/6.image_enhancers.md) to learn more.
+Please see the [Post-Processing Documentation](https://sygil-dev.github.io/sygil-webui/docs/post-processing) to learn more.
 
 -----
 
@@ -162,12 +162,12 @@ Please see the [Image Enhancers Documentation](docs/6.image_enhancers.md) to lea
 
 *Stable Diffusion was made possible thanks to a collaboration with [Stability AI](https://stability.ai/) and [Runway](https://runwayml.com/) and builds upon our previous work:*
 
-[**High-Resolution Image Synthesis with Latent Diffusion Models**](https://ommer-lab.com/research/latent-diffusion-models/)<br/>
+[**High-Resolution Image Synthesis with Latent Diffusion Models**](https://ommer-lab.com/research/latent-diffusion-models/)
 [Robin Rombach](https://github.com/rromb)\*,
 [Andreas Blattmann](https://github.com/ablattmann)\*,
 [Dominik Lorenz](https://github.com/qp-qp)\,
 [Patrick Esser](https://github.com/pesser),
-[Björn Ommer](https://hci.iwr.uni-heidelberg.de/Staff/bommer)<br/>
+[Björn Ommer](https://hci.iwr.uni-heidelberg.de/Staff/bommer)
 
 **CVPR '22 Oral**
 
@@ -194,7 +194,7 @@ Details on the training procedure and data, as well as the intended use of the m
 
 ## Comments
 
-- Our codebase for the diffusion models builds heavily on [OpenAI's ADM codebase](https://github.com/openai/guided-diffusion)
+- Our code base for the diffusion models builds heavily on [OpenAI's ADM codebase](https://github.com/openai/guided-diffusion)
   and [https://github.com/lucidrains/denoising-diffusion-pytorch](https://github.com/lucidrains/denoising-diffusion-pytorch). 
   Thanks for open-sourcing!
 

diff --git a/backend/SwinIR b/backend/SwinIR
diff --git a/backend/clip-as-service b/backend/clip-as-service
diff --git a/backend/clipseg b/backend/clipseg
diff --git a/backend/dalle_flow b/backend/dalle_flow
diff --git a/backend/glid-3-xl b/backend/glid-3-xl
diff --git a/backend/latent-diffusion b/backend/latent-diffusion
diff --git a/backend/stable-diffusion b/backend/stable-diffusion
diff --git a/configs/stable-diffusion/v2-inference-v.yaml b/configs/stable-diffusion/v2-inference-v.yaml
@@ -0,0 +1,68 @@
+model:
+  base_learning_rate: 1.0e-4
+  target: ldm.models.diffusion.ddpm.LatentDiffusion
+  params:
+    parameterization: "v"
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false
+    conditioning_key: crossattn
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    use_ema: False # we set this to false because this is an inference only config
+
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        use_checkpoint: True
+        use_fp16: True
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_head_channels: 64 # need to fix for flash-attn
+        use_spatial_transformer: True
+        use_linear_in_transformer: True
+        transformer_depth: 1
+        context_dim: 1024
+        legacy: False
+
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          #attn_type: "vanilla-xformers"
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
+      params:
+        freeze: True
+        layer: "penultimate"
diff --git a/configs/stable-diffusion/v2-inference.yaml b/configs/stable-diffusion/v2-inference.yaml
@@ -0,0 +1,67 @@
+model:
+  base_learning_rate: 1.0e-4
+  target: ldm.models.diffusion.ddpm.LatentDiffusion
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "jpg"
+    cond_stage_key: "txt"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: false
+    conditioning_key: crossattn
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    use_ema: False # we set this to false because this is an inference only config
+
+    unet_config:
+      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
+      params:
+        use_checkpoint: True
+        use_fp16: True
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_head_channels: 64 # need to fix for flash-attn
+        use_spatial_transformer: True
+        use_linear_in_transformer: True
+        transformer_depth: 1
+        context_dim: 1024
+        legacy: False
+
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          #attn_type: "vanilla-xformers"
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
+      params:
+        freeze: True
+        layer: "penultimate"