Merge pull request #600 from PAIR-code/dev

v0.4.1 Release
PAIR-code · Dec 21, 2021 · b46c0ca · b46c0ca
2 parents b0ff829 + 9fd2da3
commit b46c0ca
Show file tree

Hide file tree

Showing 204 changed files with 4,611 additions and 1,098 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -2,34 +2,31 @@
 # https://hub.docker.com/_/python
 FROM python:3.7-slim
 
-# Default demo app command to run.
-ENV APP_COMMAND "lit_nlp.examples.lm_demo:get_wsgi_app()"
-
-# Copy local code to the container image.
-ENV APP_HOME /app
-WORKDIR $APP_HOME
-COPY . ./
-
 # Update Ubuntu packages and install basic utils
 RUN apt-get update
 RUN apt-get install -y wget curl gnupg2 gcc g++
 
 # Install yarn
 RUN curl -sS https://dl.yarnpkg.com/debian/pubkey.gpg | apt-key add -
-RUN echo "deb https://dl.yarnpkg.com/debian/ stable main" | tee /etc/apt/sources.list.d/yarn.list
+RUN echo "deb https://dl.yarnpkg.com/debian/ stable main" | \
+    tee /etc/apt/sources.list.d/yarn.list
 RUN apt update && apt -y install yarn
 
 # Install Anaconda
 RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
-  && bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/anaconda3 \
-  && rm Miniconda3-latest-Linux-x86_64.sh
+    && bash Miniconda3-latest-Linux-x86_64.sh -b -p /opt/anaconda3 \
+    && rm Miniconda3-latest-Linux-x86_64.sh
 
 # Set path to conda
 ENV PATH /opt/anaconda3/bin:$PATH
 
+# Copy local code to the container image.
+ENV APP_HOME /app
+WORKDIR $APP_HOME
+COPY . ./
+
 # Set up conda environment with production dependencies
 # This step is slow as it installs many packages.
-COPY environment.yml .
 RUN conda env create -f environment.yml
 
 # Workaround for 'conda activate' depending on shell features
@@ -50,5 +47,15 @@ WORKDIR lit_nlp/client
 RUN yarn && yarn build && rm -rf node_modules/*
 WORKDIR $APP_HOME
 
+# Default demo app command to run.
+ARG DEFAULT_DEMO="glue_demo"
+ENV DEMO_NAME $DEFAULT_DEMO
+
+ARG DEFAULT_PORT="5432"
+ENV DEMO_PORT $DEFAULT_PORT
+
 # Run LIT server
-CMD exec gunicorn -c lit_nlp/examples/gunicorn_config.py $APP_COMMAND
+ENTRYPOINT exec gunicorn \
+           -c lit_nlp/examples/gunicorn_config.py \
+           --bind="0.0.0.0:$DEMO_PORT" \
+           "lit_nlp.examples.$DEMO_NAME:get_wsgi_app()"
diff --git a/RELEASE.md b/RELEASE.md
@@ -1,20 +1,37 @@
 # Language Interpretability Tool releases
 
+## Release 0.4.1
+
+This is a bug fix release aimed at improving visual clarity and common
+workflows.
+
+The UI has been slightly revamped, bugs have been fixed, and new capabilities
+have been added. Notable changes include:
+
+- Adds "open in new tab" feature to LIT Notebook widget
+- Adds support for `SparseMultilabelPreds` to LIME
+- Improves color consistency across the UI
+- Switching NumPy instead of SciKit Learn for PCA
+- Ensuring all built-in demos are compatible with the Docker
+- Updating the Dockerfile to support run-time `DEMO_NAME` and `DEMO_PORT` args
+- Fixed a rendering bug in the Confusion Matrix related column and row spans
+  when "hide empty labels" is turned on
+
 ## Release 0.4
 
 This release adds a lot of new features. The website and documentation have
 been updated accordingly.
 
 The UI has been slightly revamped, bugs have been fixed, and new capabilities
-have been added. Noteable changes include:
+have been added. Notable changes include:
 - Support for Google Cloud Vertex AI notebooks.
 - Preliminary support for tabular and image data, in addition to NLP models.
 - Addition of TCAV global interpretability method.
 - New counterfactual generators for ablating or flipping text tokens for
   minimal changes to flip predictions.
 - New counterfactual generator for tabular data for minimal changes to flip
   predictions.
-- Partial depdence plots for tabular input features.
+- Partial dependence plots for tabular input features.
 - Ability to set binary classification thresholds separately for different
   facets of the dataset
 - Controls to find optimal thresholds across facets given different fairness
@@ -26,12 +43,12 @@ This release adds the ability to use LIT directly in colab and jupyter
 notebooks. The website and documentation have been updated accordingly.
 
 The UI has been slightly revamped, bugs have been fixed, and new capabilities
-have been added. Noteable changes include:
+have been added. Notable changes include:
 - Notebook mode added.
 - New annotated text visualization module added.
 - Allow saving/loading of generated datapoints, and dynamic adding of new
   datasets by path in the UI.
-- Added syncronized scrolling between duplicated modules when comparing
+- Added synchronized scrolling between duplicated modules when comparing
   datapoints or models.
 - Added a focus service for visually linking focus (i.e. hover) states between
   components.

diff --git a/docs/assets/images/actor_to_actress.png b/docs/assets/images/actor_to_actress.png
diff --git a/docs/assets/images/actress_relative_cav.png b/docs/assets/images/actress_relative_cav.png
diff --git a/docs/assets/images/lit-coref-compare.png b/docs/assets/images/lit-coref-compare.png
diff --git a/docs/assets/images/lit-coref-data.png b/docs/assets/images/lit-coref-data.png
diff --git a/docs/assets/images/lit-coref-metric-top.png b/docs/assets/images/lit-coref-metric-top.png
diff --git a/docs/assets/images/lit-coref-metrics.png b/docs/assets/images/lit-coref-metrics.png
diff --git a/docs/assets/images/lit-coref-pred.png b/docs/assets/images/lit-coref-pred.png
diff --git a/docs/assets/images/lit-coref-select.png b/docs/assets/images/lit-coref-select.png
diff --git a/docs/assets/images/lit-datatable-search.png b/docs/assets/images/lit-datatable-search.png
diff --git a/docs/assets/images/lit-metrics-not.png b/docs/assets/images/lit-metrics-not.png
diff --git a/docs/assets/images/lit-not-saliency.png b/docs/assets/images/lit-not-saliency.png
diff --git a/docs/assets/images/lit-saliency.png b/docs/assets/images/lit-saliency.png
diff --git a/docs/assets/images/lit-sim-search.png b/docs/assets/images/lit-sim-search.png
diff --git a/docs/assets/images/lit-t5.png b/docs/assets/images/lit-t5.png
diff --git a/docs/assets/images/lit-toolbars.gif b/docs/assets/images/lit-toolbars.gif
diff --git a/docs/assets/images/lit-tweet.gif b/docs/assets/images/lit-tweet.gif
diff --git a/docs/assets/images/lit-workspaces.jpg b/docs/assets/images/lit-workspaces.jpg
diff --git a/docs/assets/images/lit-workspaces.png b/docs/assets/images/lit-workspaces.png
diff --git a/docs/assets/images/lit_data_table_annotated.png b/docs/assets/images/lit_data_table_annotated.png
diff --git a/docs/assets/images/lit_slice_editor_annotated.png b/docs/assets/images/lit_slice_editor_annotated.png
diff --git a/docs/assets/images/lit_tcav_screen_annotated.png b/docs/assets/images/lit_tcav_screen_annotated.png
diff --git a/docs/assets/images/lit_tcav_settings_annotated.png b/docs/assets/images/lit_tcav_settings_annotated.png
diff --git a/docs/assets/images/tcav_result.png b/docs/assets/images/tcav_result.png
diff --git a/docs/assets/images/tcav_results_2.png b/docs/assets/images/tcav_results_2.png
diff --git a/docs/demos/coref.html b/docs/demos/coref.html
@@ -1,5 +1,5 @@
 <html>
   <script>
-    window.location.replace("http://35.225.219.27/");
+    window.location.replace("http://34.86.17.245:5431/");
   </script>
 </html>
diff --git a/docs/demos/glue.html b/docs/demos/glue.html
@@ -1,5 +1,5 @@
 <html>
   <script>
-    window.location.replace("http://34.69.145.124/");
+    window.location.replace("http://34.86.17.245:5432/");
   </script>
 </html>
diff --git a/docs/demos/images.html b/docs/demos/images.html
@@ -1,5 +1,5 @@
 <html>
   <script>
-    window.location.replace("http://104.155.142.190/");
+    window.location.replace("http://34.86.17.245:5433/");
   </script>
 </html>
diff --git a/docs/demos/index.html b/docs/demos/index.html
@@ -122,12 +122,12 @@
   <div class="demo-card-title"><a href="https://colab.research.google.com/github/PAIR-code/lit/blob/main/lit_nlp/examples/notebooks/LIT_sentiment_classifier.ipynb" target="_blank">Notebook usage</a></div>
   <div class="demo-card-tags"> <span class="demo-tag"> BERT </span>  <span class="demo-tag"> binary classification </span>  <span class="demo-tag"> notebooks </span> 
   </div>
-  <div class="demo-card-data-source-title">DATA SOURCES</div>
+  <div class="demo-card-data-source-title">DATA SOURCE</div>
   <div class="demo-card-data-source">
-    Stanford Sentiment Treebank
+    <a href="" target="_blank">Stanford Sentiment Treebank</a>
   </div>
   <div class="demo-card-copy">Use LIT directly inside a Colab notebook. Explore binary classification for sentiment analysis using SST2 from the General Language Understanding Evaluation (GLUE) benchmark suite.</div>
-  <div class="demo-card-cta-button"><a href="/lithttps://colab.research.google.com/github/PAIR-code/lit/blob/main/lit_nlp/examples/notebooks/LIT_sentiment_classifier.ipynb"></a></div>
+  <div class="demo-card-cta-button"><a href="https://colab.research.google.com/github/PAIR-code/lit/blob/main/lit_nlp/examples/notebooks/LIT_sentiment_classifier.ipynb"></a></div>
 </div>
 <div class="demo-card mdl-cell mdl-cell--6-col mdl-cell--4-col-tablet mdl-cell--4-col-phone">
   <div class="demo-card-title"><a href="/lit/demos/coref.html" target="_blank">Gender bias in coreference systems</a></div>

diff --git a/docs/demos/lm.html b/docs/demos/lm.html
@@ -1,5 +1,5 @@
 <html>
   <script>
-    window.location.replace("http://104.197.40.13/");
+    window.location.replace("http://34.86.17.245:5433/");
   </script>
 </html>
diff --git a/docs/demos/penguins.html b/docs/demos/penguins.html
@@ -1,5 +1,5 @@
 <html>
   <script>
-    window.location.replace("http://35.222.78.90/");
+    window.location.replace("http://34.86.17.245:5435/");
   </script>
 </html>
diff --git a/docs/demos/t5.html b/docs/demos/t5.html
@@ -1,5 +1,5 @@
 <html>
   <script>
-    window.location.replace("http://34.68.143.28/");
+    window.location.replace("http://34.86.17.245:5436/");
   </script>
 </html>
diff --git a/docs/index.html b/docs/index.html
@@ -98,7 +98,7 @@
 <li>Does my model behave consistently if I change things like textual style, verb tense, or pronoun gender?</li>
 </ul>
 <p>LIT contains many built-in capabilities but is also customizable, with the ability to add custom interpretability techniques, metrics calculations, counterfactual generators, visualizations, and more.</p>
-<p>LIT also contains preliminary support for non-language models, working with tabular and image data. For a similar tool built to explore general-purpose machine learning models, check out the <a href="https://whatif-tool.dev">What-If Tool</a>.</p>
+<p>In addition to language, LIT also includes preliminary support for models operating on tabular and image data. For a similar tool built to explore general-purpose machine learning models, check out the <a href="https://whatif-tool.dev">What-If Tool</a>.</p>
 <p>LIT can be run as a standalone server, or inside of python notebook environments such as Colab, Jupyter, and Google Cloud Vertex AI Notebooks.</p>
 </div>
 <div class="spacer" style="height:50px;"></div>
@@ -155,7 +155,7 @@ <h3 class="home-card-title">Version 0.4</h3>
 </div>
 <div class="mdl-cell mdl-cell--6-col mdl-cell--4-col-tablet mdl-cell--8-col-phone">
   <img class="home-card-image" src="/lit/assets/images/LIT_Contribute.png"/>
-  <div class="home-card-action">CODE</div>
+  <div class="home-card-action">DOCS</div>
   <h3 class="home-card-title">Documentation</h3>
   <div class="home-card-desc">LIT is open-source and easily extensible to new models, tasks, and more.</div>
   <div class="home-card-cta-button"><a href="https://github.com/PAIR-code/lit/wiki" target="_blank" >View documentation</a><img class="external-arrow" src="/lit/assets/images/arrow-link-out.png"/></div>

diff --git a/docs/tutorials/generation/index.html b/docs/tutorials/generation/index.html
@@ -90,7 +90,7 @@ <h2>Debugging a Text Generator</h2>
 from the T5 decoder. With one click, we retrieve the 25 nearest neighbors to our datapoint of interest from the training set and add them to the LIT UI for inspection. We can see through the search capability in the data table that the words “captain” and “former” appear 34 and 16 times in these examples–along with 3 occurrences of “replaced by” and two occurrences of “by former”. This suggests a strong prior toward our erroneous phrase from the training data most related to our datapoint of interest.</p>
 <div class="mdl-cell mdl-cell--12-col mdl-cell--6-col-tablet mdl-cell--4-col-phone">
   <img class="tutorial-image" src="/lit/assets/images/lit-datatable-search.png"/>
-  <div class="tutorial-caption">Above: An example of the existance of "by former" in a similar datapoint from the training set.</div>
+  <div class="tutorial-caption">Above: An example of the existence of "by former" in a similar datapoint from the training set.</div>
 </div>
 
           </div>

diff --git a/docs/tutorials/tcav/index.html b/docs/tutorials/tcav/index.html
@@ -118,9 +118,9 @@ <h4>Create a Concept</h4>
   <div class="tutorial-caption">1: Selecting a slice as a concept and 2: running TCAV with the specified options</div>
 </div>
 <h4>Interpreting TCAV scores</h4>
-<p>Once we run TCAV, we see an entry in the table in the TCAV module for each concept tested. Each concept gets a CAV (“Concept Activation Vector”) score between 0 and 1 describing the concept’s effect on the prediction of the class in question. What you want to look at is where the blue bar (CAV score) is relative to the black line (reference point). The reference point indicates the effect that slices made of randomly-chosen datapoints outside of the concept being tested has on prediction of the class. For a well-calibrated classifier, the reference point will usually be near 0.5 (i.e. no effect).</p>
-<p>A blue bar extending beyond or falling short of the black line means the concept is influencing the prediction. If the blue bar extends beyond the black line, the concept is positively influencing the prediction; if it falls short, it is negatively influencing.</p>
-<p>In our example, the CAV score of ~.85 indicates that our “acting” concept has a strong positive effect on the prediction of this class. So we have found that this concept has a positive effect on predicting positive sentiment for our classifier.</p>
+<p>Once we run TCAV, we see an entry in the table in the TCAV module for each concept tested. Each concept gets a CAV (“Concept Activation Vector”) score between 0 and 1 describing the concept’s effect on the prediction of the class in question. What matters is where the blue bar (CAV score) is relative to the black line (reference point). The reference point indicates the effect that slices made of randomly-chosen datapoints outside of the concept being tested has on prediction of the class. For a well-calibrated classifier, the reference point will usually be near 0.5 (i.e. no effect).</p>
+<p>A blue bar extending right or left of the black line means the concept is influencing the prediction. If the blue bar extends to the right of the black line, the concept is positively influencing the prediction. Conversely, if the bar extended to the left, it is negatively influencing. In either case, the larger the bar, the greater the influence.</p>
+<p>In our example, the CAV score of ~0.91 indicates that our “acting” concept has a strong positive effect on the prediction of this class. So we have found that this concept has a positive effect on predicting positive sentiment for our classifier.</p>
 <div class="mdl-cell mdl-cell--12-col mdl-cell--6-col-tablet mdl-cell--4-col-phone">
   <img class="tutorial-image" src="/lit/assets/images/tcav_result.png"/>
   <div class="tutorial-caption">TCAV results for our “acting” concept</div>

diff --git a/docs/tutorials/tour/index.html b/docs/tutorials/tour/index.html
@@ -84,7 +84,7 @@ <h2>A Quick Tour of the Language Interpretability Tool</h2>
 <h3>Building blocks - modules, groups, and workspaces</h3>
 <p><strong>Modules, groups, and workspaces</strong> form the building blocks of LIT. Modules are discrete windows in which you can perform a specific set of tasks or analyses. Workspaces display combinations of modules known as groups, so you can view different visualizations and interpretability methods side-by-side.</p>
 <div class="mdl-cell mdl-cell--12-col mdl-cell--6-col-tablet mdl-cell--4-col-phone">
-  <img class="tutorial-image" src="/lit/assets/images/lit-workspaces.jpg"/>
+  <img class="tutorial-image" src="/lit/assets/images/lit-workspaces.png"/>
   <div class="tutorial-caption">Above: Building blocks of the Language Interpretability Tool: (1) Modules, (2) Groups, (3) Static workspace, (4) Group-based workspace.</div>
 </div>
 <p>LIT is divided into two workspaces - a Main workspace in the upper half of the interface, and a Group-based workspace in the lower half.</p>
@@ -108,7 +108,7 @@ <h3>Toolbars</h3>
 <ul>
 <li>Select data points by relationship, or by slice.</li>
 <li>Choose a feature to color data points, across all modules.</li>
-<li>Track the datapoint you’re looking at, navigate to the next, mark a datapoint as a favorite, or clear your selection.</li>
+<li>Track the datapoint you are looking at, navigate to the next, mark a datapoint as a favorite, or clear your selection.</li>
 <li>Select the active models and dataset, including multiple models to compare.</li>
 </ul>
 <div class="mdl-cell mdl-cell--12-col mdl-cell--6-col-tablet mdl-cell--4-col-phone">
@@ -123,7 +123,7 @@ <h3>Using Modules</h3>
   <div class="info-box-title">Visualizations that scale</div>
   <div class="info-box-text">Visualizations and results within modules can scale depending on if you're looking at one data point, multiple data points, one or multiple models. For instance, turning on the “compare datapoints” toggle allows you to compare a selected datapoint (known as the reference datapoint) to others by presenting individual results side-by-side within relevant modules.</div>
 </div>
-<p>Now that you’re familiar with LIT’s interface, take LIT for a spin in our <a href="../../demos">demos</a> or explore different <a href="../">case studies</a>.</p>
+<p>Now that you are familiar with LIT’s interface, take LIT for a spin in our <a href="../../demos">demos</a> or explore different <a href="../">case studies</a>.</p>
 
           </div>
           <div class="mdl-cell--4-col hide-me">

diff --git a/documentation/api.md b/documentation/api.md
@@ -803,7 +803,13 @@ passing in a dict of models and a dict of datasets, similar to the
 parameter that specifies the height in pixels to render the LIT UI.
 
 Then, in its own output cell, call the `render` method on the widget object to
-render the LIT UI. The LIT UI can be rendered in multiple cells if desired.
+render the LIT UI. The LIT UI can be rendered in multiple cells if desired. The
+LIT UI can also be rendered in its own browser tab, outside of the notebook, by
+passing the parameter `open_in_new_tab=True` to the `render` method. The
+`render` method can optionally take in a configuration object to specify
+certain options to render the LIT UI using, such as the selected layout,
+current display tab, dataset, and models. See
+[notebook.py](../lit_nlp/notebook.py) for details.
 
 The widget has a `stop` method which shuts down the widget's server. This can be
 important for freeing up resources if you plan to create multiple LIT widget