index.html

<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <meta name="description"
        content="Semantify is based on a self-supervised method that utilizes the semantic power of CLIP language-vision model to build a mapping between semantic descriptors to 
3DMM model coefficients.">
  <meta name="keywords" content="3DMM, CLIP, Semantify">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>Semantify: Simplifying the Control of 3D Morphable Models using CLIP</title>

  <link rel="stylesheet" href="./static/css/bulma.min.css">
  <link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
  <link rel="stylesheet" href="./static/css/bulma-slider.min.css">
  <link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
  <link rel="stylesheet"
        href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
  <link rel="stylesheet" href="./static/css/index.css">
  <link rel="icon" href="./static/images/76_adobe_express.svg">

  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <script defer src="./static/js/fontawesome.all.min.js"></script>
  <script src="./static/js/bulma-carousel.min.js"></script>
  <script src="./static/js/bulma-slider.min.js"></script>
  <script src="./static/js/index.js"></script>
</head>
<body>

<section class="hero">
  <div class="hero-body">
    <div class="container is-max-desktop">
      <div class="columns is-centered">
        <div class="column has-text-centered">
          <h1 class="title is-1 publication-title">Semantify: Simplifying the Control of 3D Morphable Models using CLIP</h1>
          <div class="is-size-5 publication-authors">
            <span class="author-block">
              <a href="https://www.linkedin.com/in/omer-gralnik-723a71196/">Omer Gralnik</a><sup>1</sup>,</span>
            <span class="author-block">
              <a href="http://niessnerlab.org/members/guy_gafni/profile.html">Guy Gafni</a><sup>2</sup>,</span>
            <span class="author-block">
              <a href="https://faculty.runi.ac.il/arik/site/index.asp">Ariel Shamir</a><sup>1</sup>,
            </span>
          </div>

          <div class="is-size-5 publication-authors">
            <span class="author-block"><sup>1</sup>Reichman University,</span>
            <span class="author-block"><sup>2</sup>Technical University of Munich</span>
          </div>
          <div class="is-size-4 publication-authors">
            <strong>ICCV 2023, Paris</strong>
          </div>

          <div class="column has-text-centered">
            <div class="publication-links">
              <!-- PDF Link. -->
              <span class="link-block">
                <a href="https://openaccess.thecvf.com/content/ICCV2023/papers/Gralnik_Semantify_Simplifying_the_Control_of_3D_Morphable_Models_Using_CLIP_ICCV_2023_paper.pdf"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fas fa-file-pdf"></i>
                  </span>
                  <span>Paper</span>
                </a>
              </span>
              <span class="link-block">
                <a href="https://arxiv.org/abs/2308.07415"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="ai ai-arxiv"></i>
                  </span>
                  <span>arXiv</span>
                </a>
              </span>
              <!-- Code Link. -->
              <span class="link-block">
                <a href="https://github.com/Omergral/Semantify"
                   class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                      <i class="fab fa-github"></i>
                  </span>
                  <span>Code</span>
                  </a>
              </span>
            </div>

          </div>
        </div>
      </div>
    </div>
  </div>
</section>

<section class="hero teaser">
  <div class="container is-max-desktop">
    <div class="hero-body">
            <img src="./static/images/semantify_teaser.png"
                 class="teaser-image"
                 alt="teaser image."/>
      <h2 class="subtitle has-text-centered">
        <span>Semantify offers a method to create and edit a 3D parametric model using semantically meaningful descriptors. 
Semantify is based on a self-supervised method that utilizes the semantic power of CLIP language-vision model to build a mapping between semantic descriptors to 
3DMM model coefficients. This can be used in an interactive application defining a slider for each descriptor (a), or to fit a model to an image in a zero shot manner by feeding the image into CLIP and obtaining a vector of semantic scores that can be mapped to shape parameters (b)
      </h2>
    </div>
  </div>
</section>

<section class="section">
  <div class="container is-max-desktop">
    <!-- Abstract. -->
    <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Abstract</h2>
        <div class="content has-text-justified">
          <p>
            We present Semantify: a self-supervised method that utilizes the semantic power of CLIP language-vision foundation model to simplify the control of 3D morphable models. 
          </p>
          <p>
            Given a parametric model, training data is created by randomly sampling the model's parameters, creating various shapes and rendering them. The similarity between the output images and a set of word descriptors is calculated in CLIP's latent space. 
            Our key idea is first to choose a small set of semantically meaningful and disentangled descriptors that characterize the 3DMM, and then learn a non-linear mapping from scores across this set to the parametric coefficients of the given 3DMM. The non-linear mapping is defined by training a neural network without a human-in-the-loop. 
          </p>
          <p>
            We present results on numerous 3DMMs: body shape models, face shape and expression models, as well as animal shapes. We demonstrate how our method defines a simple slider interface for intuitive modeling, and show how the mapping can be used to instantly fit a 3D parametric body shape to in-the-wild images. 
          </p>
        </div>
      </div>
    </div>
    <!--/ Abstract. -->

    <!-- Paper video. -->
    <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Video</h2>
        <div class="publication-video">
         <iframe src="https://www.youtube.com/embed/umiDWAPUcL8" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
        </div>
      </div>
    </div>
    <!--/ Paper video. -->
<!--   </div>
 -->    <!-- <br/> -->
    <!-- Overview. -->
    <div class="columns is-centered has-text-centered" style="margin-top: 20px">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Method Overview</h2>
            <img src="./static/images/method.png"/>
            <div class="content has-text-justified" style="padding-top: 15px">
                <p>Learning a mapping from Semantic to Parametric space. (a) Given a coefficient vector &xi; we create the 3DMM mesh.
                The mesh is rendered from several views. Each Rendered image I' is passed into CLIP along with a set of semantic descriptors &ell;.
                The difference between each descriptor and the image in CLIP latent space is calculated and stored in the corresponding entry of the similarity vector &Omega;.
                (b) Using a large set of such random pairs of (&Omega;,&xi;), we train a network to learn the mapping from semantic space to parametric space.</p>
            </div>

        </div>
    </div>
</section>


<section class="section">
  <div class="container is-max-desktop">

    <!-- Animation. -->
    <div class="columns is-centered has-text-centered">
      <div class="column is-four-fifths">
        <h2 class="title is-3">Results</h2>

        <!-- Interpolating. -->
        <h3 class="title is-4">Interpolating states</h3>
        <div class="content has-text-justified">
          <p>
            Here we Visualize a subset of our Semantic change of basis, on some popular 3D Morphable models. Note: this applet is only an illustration and changes one axis at a time. It may take time to load so please wait until all models appear. To fully interact with our Semantic basis, we recommend using our <a href="https://github.com/Omergral/Semantify">code</a>.
          </p>
        </div>
        <div style="text-align: center; font-size: large;"><strong>SMPLX Male</strong> [Pavlakos et al. 2019]</div>

        <div class="columns is-vcentered" style="margin-top: 10px">
            <div class="column columns nested-columns is-vcentered interpolation-panel">
                <div class="column interpolation-video-column">
                    <div style="text-align: center; font-size: large;">Muscular</div>
                      <div id="interpolation-image-wrapper-muscular">
                      Loading...
                      </div>
                    <input class="slider is-fullwidth is-large is-info"
                     id="interpolation-slider-muscular"
                     step="1" min="0" max="100" value="50" type="range">
                    </div>
                </div>

            <div class="column columns nested-columns is-vcentered interpolation-panel">
                <div class="column interpolation-video-column">
                    <div style="text-align: center; font-size: large;">Long Torso</div>
                      <div id="interpolation-image-wrapper-long-torso">
                      Loading...
                      </div>
                    <input class="slider is-fullwidth is-large is-info"
                     id="interpolation-slider-long-torso"
                     step="1" min="0" max="100" value="50" type="range">
                    </div>
            </div>
            <div class="column columns nested-columns is-vcentered interpolation-panel">
                <div class="column interpolation-video-column">
                    <div style="text-align: center; font-size: large;">Petite</div>
                      <div id="interpolation-image-wrapper-petite">
                      Loading...
                      </div>
                    <input class="slider is-fullwidth is-large is-info"
                     id="interpolation-slider-petite"
                     step="1" min="0" max="100" value="50" type="range">
                    </div>
            </div>
        <br/>
      </div>
        <div style="text-align: center; font-size: large;"><strong>FLAME Expression</strong> [Li et al. 2017]</div>
        <div class="columns is-vcentered" style="margin-top: 10px">
            <div class="column columns nested-columns is-vcentered interpolation-panel">
                <div class="column interpolation-video-column">
                    <div style="text-align: center; font-size: large;">Smiling</div>
                      <div id="interpolation-image-wrapper-smiling">
                      Loading...
                      </div>
                    <input class="slider is-fullwidth is-large is-info"
                     id="interpolation-slider-smiling"
                     step="1" min="0" max="100" value="50" type="range">
                    </div>
                </div>

            <div class="column columns nested-columns is-vcentered interpolation-panel">
                <div class="column interpolation-video-column">
                    <div style="text-align: center; font-size: large;">Open Mouth</div>
                      <div id="interpolation-image-wrapper-open-mouth">
                      Loading...
                      </div>
                    <input class="slider is-fullwidth is-large is-info"
                     id="interpolation-slider-open-mouth"
                     step="1" min="0" max="100" value="50" type="range">
                    </div>
            </div>
            <div class="column columns nested-columns is-vcentered interpolation-panel">
                <div class="column interpolation-video-column">
                    <div style="text-align: center; font-size: large;">Serious</div>
                      <div id="interpolation-image-wrapper-serious">
                      Loading...
                      </div>
                    <input class="slider is-fullwidth is-large is-info"
                     id="interpolation-slider-serious"
                     step="1" min="0" max="100" value="50" type="range">
                    </div>
            </div>
        <br/>
        </div>
        <div style="text-align: center; font-size: large;"><strong>SMPL Neutral</strong> [Loper et al. 2015]</div>
        <div class="columns is-vcentered" style="margin-top: 10px">
            <div class="column columns nested-columns is-vcentered interpolation-panel">
                <div class="column interpolation-video-column">
                    <div style="text-align: center; font-size: large;">Fat</div>
                      <div id="interpolation-image-wrapper-thin">
                      Loading...
                      </div>
                    <input class="slider is-fullwidth is-large is-info"
                     id="interpolation-slider-thin"
                     step="1" min="0" max="100" value="50" type="range">
                    </div>
                </div>

            <div class="column columns nested-columns is-vcentered interpolation-panel">
                <div class="column interpolation-video-column">
                    <div style="text-align: center; font-size: large;">Small</div>
                      <div id="interpolation-image-wrapper-small">
                      Loading...
                      </div>
                    <input class="slider is-fullwidth is-large is-info"
                     id="interpolation-slider-small"
                     step="1" min="0" max="100" value="50" type="range">
                    </div>
            </div>
            <div class="column columns nested-columns is-vcentered interpolation-panel">
                <div class="column interpolation-video-column">
                    <div style="text-align: center; font-size: large;">Masculine</div>
                      <div id="interpolation-image-wrapper-masculin">
                      Loading...
                      </div>
                    <input class="slider is-fullwidth is-large is-info"
                     id="interpolation-slider-masculin"
                     step="1" min="0" max="100" value="50" type="range">
                    </div>
            </div>
        <br/>
        </div>
        <div style="text-align: center; font-size: large;"><strong>FLAME Shape</strong> [Li et al. 2017]</div>
        <div class="columns is-vcentered" style="margin-top: 10px">
            <div class="column columns nested-columns is-vcentered interpolation-panel">
                <div class="column interpolation-video-column">
                    <div style="text-align: center; font-size: large;">Fat</div>
                      <div id="interpolation-image-wrapper-fat">
                      Loading...
                      </div>
                    <input class="slider is-fullwidth is-large is-info"
                     id="interpolation-slider-fat"
                     step="1" min="0" max="100" value="50" type="range">
                    </div>
                </div>

            <div class="column columns nested-columns is-vcentered interpolation-panel">
                <div class="column interpolation-video-column">
                    <div style="text-align: center; font-size: large;">Ears Sticking Out</div>
                      <div id="interpolation-image-wrapper-ears-out">
                      Loading...
                      </div>
                    <input class="slider is-fullwidth is-large is-info"
                     id="interpolation-slider-ears-out"
                     step="1" min="0" max="100" value="50" type="range">
                    </div>
            </div>
            <div class="column columns nested-columns is-vcentered interpolation-panel">
                <div class="column interpolation-video-column">
                    <div style="text-align: center; font-size: large;">Long Neck</div>
                      <div id="interpolation-image-wrapper-long-neck">
                      Loading...
                      </div>
                    <input class="slider is-fullwidth is-large is-info"
                     id="interpolation-slider-long-neck"
                     step="1" min="0" max="100" value="50" type="range">
                    </div>
            </div>
        <br/>
        </div>
        <div style="text-align: center; font-size: large;"><strong>SMAL</strong> [Zuffi et al. 2017]</div>
        <div class="columns is-vcentered" style="margin-top: 10px">
            <div class="column columns nested-columns is-vcentered interpolation-panel">
                <div class="column interpolation-video-column">
                    <div style="text-align: center; font-size: large;">Lion</div>
                      <div id="interpolation-image-wrapper-lion">
                      Loading...
                      </div>
                    <input class="slider is-fullwidth is-large is-info"
                     id="interpolation-slider-lion"
                     step="1" min="0" max="100" value="50" type="range">
                    </div>
                </div>

            <div class="column columns nested-columns is-vcentered interpolation-panel">
                <div class="column interpolation-video-column">
                    <div style="text-align: center; font-size: large;">Cow</div>
                      <div id="interpolation-image-wrapper-cow">
                      Loading...
                      </div>
                    <input class="slider is-fullwidth is-large is-info"
                     id="interpolation-slider-cow"
                     step="1" min="0" max="100" value="50" type="range">
                    </div>
            </div>
            <div class="column columns nested-columns is-vcentered interpolation-panel">
                <div class="column interpolation-video-column">
                    <div style="text-align: center; font-size: large;">Cat</div>
                      <div id="interpolation-image-wrapper-cat">
                      Loading...
                      </div>
                    <input class="slider is-fullwidth is-large is-info"
                     id="interpolation-slider-cat"
                     step="1" min="0" max="100" value="50" type="range">
                    </div>
            </div>
        <br/>
        <!--/ Interpolating. -->

      </div>
    </div>
    <!--/ Animation. -->

  </div>
</section>


<section class="section" id="BibTeX">
  <div class="container is-max-desktop content">
    <h2 class="title">BibTeX</h2>
    <pre><code>@InProceedings{Gralnik_2023_ICCV,
    author    = {Gralnik, Omer and Gafni, Guy and Shamir, Ariel},
    title     = {Semantify: Simplifying the Control of 3D Morphable Models Using CLIP},
    booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},
    month     = {October},
    year      = {2023},
    pages     = {14554-14564}
}</code></pre>
  </div>
</section>


<footer class="footer">
  <div class="container">
    <div class="columns is-centered">
      <div class="column is-8">
        <div class="content">
          <p>
            This website is licensed under a <a rel="license"
                                                href="http://creativecommons.org/licenses/by-sa/4.0/">Creative
            Commons Attribution-ShareAlike 4.0 International License</a>.
          </p>
          <p>
            This means you are free to borrow the <a
              href="https://github.com/nerfies/nerfies.github.io">source code</a> of this website,
            we just ask that you link back to this page in the footer.
            Please remember to remove the analytics code included in the header of the website which
            you do not want on your website.
          </p>
        </div>
      </div>
    </div>
  </div>
</footer>

</body>
</html>