In [None]:
# Copyright 2021 Google LLC
# Use of this source code is governed by an MIT-style
# license that can be found in the LICENSE file or at
# https://opensource.org/licenses/MIT.
# Notebook authors: Kevin P. Murphy (murphyk@gmail.com)
# and Mahmoud Soliman (mjs@aucegypt.edu)

# This notebook reproduces figures for chapter 21 from the book
# "Probabilistic Machine Learning: An Introduction"
# by Kevin Murphy (MIT Press, 2021).
# Book pdf is available from http://probml.ai

<a href="https://opensource.org/licenses/MIT" target="_parent"><img src="https://img.shields.io/github/license/probml/pyprobml"/></a>

<a href="https://colab.research.google.com/github/probml/pml-book/blob/main/pml1/figure_notebooks/chapter21_clustering_figures.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Figure 21.1:<a name='21.1'></a> <a name='purity'></a> 


 Three clusters with labeled objects inside

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

<img src="https://raw.githubusercontent.com/probml/pml-book/main/pml1/figures/images/Figure_21.1.png" width="256"/>

## Figure 21.2:<a name='21.2'></a> <a name='agglom'></a> 


(a) An example of single link clustering using city block distance. Pairs (1,3) and (4,5) are both distance 1 apart, so get merged first. (b) The resulting dendrogram. Adapted from Figure 7.5 of <a href='#Alpaydin04'>[Alp04]</a> .  
Figure(s) generated by [agglomDemo.py](https://github.com/probml/pyprobml/blob/master/scripts/agglomDemo.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

In [None]:
try_deimport()
%run -n agglomDemo.py

## Figure 21.3:<a name='21.3'></a> <a name='hclust'></a> 


 Illustration of (a) Single linkage. (b) Complete linkage. (c) Average linkage

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

<img src="https://raw.githubusercontent.com/probml/pml-book/main/pml1/figures/images/Figure_21.3_A.png" width="256"/>

<img src="https://raw.githubusercontent.com/probml/pml-book/main/pml1/figures/images/Figure_21.3_B.png" width="256"/>

<img src="https://raw.githubusercontent.com/probml/pml-book/main/pml1/figures/images/Figure_21.3_C.png" width="256"/>

## Figure 21.4:<a name='21.4'></a> <a name='yeastHier'></a> 


 Hierarchical clustering of yeast gene expression data. (a) Single linkage. (b) Complete linkage. (c) Average linkage.  
Figure(s) generated by [hclust_yeast_demo.py](https://github.com/probml/pyprobml/blob/master/scripts/hclust_yeast_demo.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

In [None]:
try_deimport()
%run -n hclust_yeast_demo.py

## Figure 21.5:<a name='21.5'></a> <a name='yeast'></a> 


(a) Some yeast gene expression data plotted as a heat map. (b) Same data plotted as a time series.  
Figure(s) generated by [yeast_data_viz.py](https://github.com/probml/pyprobml/blob/master/scripts/yeast_data_viz.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

In [None]:
try_deimport()
%run -n yeast_data_viz.py

## Figure 21.6:<a name='21.6'></a> <a name='yeastHierClustering'></a> 


 Hierarchical clustering applied to the yeast gene expression data. (a) The rows are permuted according to a hierarchical clustering scheme (average link agglomerative clustering), in order to bring similar rows close together. (b) 16 clusters induced by cutting the average linkage tree at a certain height.  
Figure(s) generated by [hclust_yeast_demo.py](https://github.com/probml/pyprobml/blob/master/scripts/hclust_yeast_demo.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

In [None]:
try_deimport()
%run -n hclust_yeast_demo.py

## Figure 21.7:<a name='21.7'></a> <a name='kmeansVoronoi'></a> 


 Illustration of K-means clustering in 2d. We show the result of using two different random seeds. Adapted from Figure 9.5 of <a href='#Geron2019'>[Aur19]</a> .  
Figure(s) generated by [kmeans_voronoi.py](https://github.com/probml/pyprobml/blob/master/scripts/kmeans_voronoi.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

In [None]:
try_deimport()
%run -n kmeans_voronoi.py

## Figure 21.8:<a name='21.8'></a> <a name='yeastClustering'></a> 


 Clustering the yeast data from \cref fig:yeast  using K-means clustering with $K=16$. (a) Visualizing all the time series assigned to each cluster. (b) Visualizing the 16 cluster centers as prototypical time series.  
Figure(s) generated by [kmeans_yeast_demo.py](https://github.com/probml/pyprobml/blob/master/scripts/kmeans_yeast_demo.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

In [None]:
try_deimport()
%run -n kmeans_yeast_demo.py

## Figure 21.9:<a name='21.9'></a> <a name='vqDemo'></a> 


 An image compressed using vector quantization with a codebook of size $K$. (a) $K=2$. (b) $K=4$.  
Figure(s) generated by [vqDemo.py](https://github.com/probml/pyprobml/blob/master/scripts/vqDemo.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

In [None]:
try_deimport()
%run -n vqDemo.py

## Figure 21.10:<a name='21.10'></a> <a name='kmeansMinibatch'></a> 


 Illustration of batch vs mini-batch K-means clustering on the 2d data from \cref fig:kmeansVoronoi . Left: distortion vs $K$. Right: Training time vs $K$. Adapted from Figure 9.6 of <a href='#Geron2019'>[Aur19]</a> .  
Figure(s) generated by [kmeans_minibatch.py](https://github.com/probml/pyprobml/blob/master/scripts/kmeans_minibatch.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

In [None]:
try_deimport()
%run -n kmeans_minibatch.py

## Figure 21.11:<a name='21.11'></a> <a name='kmeansVsK'></a> 


 Performance of K-means and GMM vs $K$ on the 2d dataset from \cref fig:kmeansVoronoi . (a) Distortion on validation set vs $K$.  
Figure(s) generated by [kmeans_silhouette.py](https://github.com/probml/pyprobml/blob/master/scripts/kmeans_silhouette.py) [gmm_2d.py](https://github.com/probml/pyprobml/blob/master/scripts/gmm_2d.py) [kmeans_silhouette.py](https://github.com/probml/pyprobml/blob/master/scripts/kmeans_silhouette.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

In [None]:
try_deimport()
%run -n kmeans_silhouette.py

In [None]:
try_deimport()
%run -n gmm_2d.py

In [None]:
try_deimport()
%run -n kmeans_silhouette.py

## Figure 21.12:<a name='21.12'></a> <a name='kmeansSilhouetteVoronoi'></a> 


 Voronoi diagrams for K-means for different $K$ on the 2d dataset from \cref fig:kmeansVoronoi .  
Figure(s) generated by [kmeans_silhouette.py](https://github.com/probml/pyprobml/blob/master/scripts/kmeans_silhouette.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

In [None]:
try_deimport()
%run -n kmeans_silhouette.py

## Figure 21.13:<a name='21.13'></a> <a name='kmeansSilhouetteDiagram'></a> 


 Silhouette diagrams for K-means for different $K$ on the 2d dataset from \cref fig:kmeansVoronoi .  
Figure(s) generated by [kmeans_silhouette.py](https://github.com/probml/pyprobml/blob/master/scripts/kmeans_silhouette.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

In [None]:
try_deimport()
%run -n kmeans_silhouette.py

## Figure 21.14:<a name='21.14'></a> <a name='gmm2dShapes'></a> 


 Some data in 2d fit using a GMM with $K=5$ components. Left column: marginal distribution $p( \bm x  )$. Right column: visualization of each mixture distribution, and the hard assignment of points to their most likely cluster. (a-b) Full covariance. (c-d) Tied full covariance. (e-f) Diagonal covairance, (g-h) Spherical covariance. Color coding is arbitrary.  
Figure(s) generated by [gmm_2d.py](https://github.com/probml/pyprobml/blob/master/scripts/gmm_2d.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

In [None]:
try_deimport()
%run -n gmm_2d.py

## Figure 21.15:<a name='21.15'></a> <a name='gmmIdentifiabilityData'></a> 


 Some 1d data, with a kernel density estimate superimposed. Adapted from Figure 6.2 of <a href='#Martin2018'>[Mar18]</a> .  
Figure(s) generated by [gmm_identifiability_pymc3.py](https://github.com/probml/pyprobml/blob/master/scripts/gmm_identifiability_pymc3.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

In [None]:
try_deimport()
%run -n gmm_identifiability_pymc3.py

## Figure 21.16:<a name='21.16'></a> <a name='gmmIdentifiability'></a> 


 Illustration of the label switching problem when performing posterior inference for the parameters of a GMM. We show a KDE estimate of the posterior marginals derived from 1000 samples from 4 HMC chains. (a) Unconstrained model. Posterior is symmetric. (b) Constrained model, where we add a penalty to ensure $\mu _0 < \mu _1$. Adapted from Figure 6.6-6.7 of <a href='#Martin2018'>[Mar18]</a> .  
Figure(s) generated by [gmm_identifiability_pymc3.py](https://github.com/probml/pyprobml/blob/master/scripts/gmm_identifiability_pymc3.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

In [None]:
try_deimport()
%run -n gmm_identifiability_pymc3.py

## Figure 21.17:<a name='21.17'></a> <a name='gmmChooseKkde'></a> 


 Fitting GMMs with different numbers of clusters $K$ to the data in \cref fig:gmmIdentifiabilityData . Black solid line is KDE fit. Solid blue line is posterior mean; feint blue lines are posterior samples. Dotted lines show the individual Gaussian mixture components, evaluated by plugging in their posterior mean parameters. Adapted from Figure 6.8 of <a href='#Martin2018'>[Mar18]</a> . 

To reproduce this figure, click the open in colab button: <a href="https://colab.research.google.com/github/probml/probml-notebooks/blob/master/notebooks/gmm_chooseK_pymc3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

<img src="https://raw.githubusercontent.com/probml/pml-book/main/pml1/figures/images/Figure_21.17.png" width="256"/>

## Figure 21.18:<a name='21.18'></a> <a name='gmmChooseKwaic'></a> 


 WAIC scores for the different GMMs. The empty circle is the posterior mean WAIC score for each model, and the black lines represent the standard error of the mean. The solid circle is the in-sample deviance of each model, i.e., the unpenalized log-likelihood. The dashed vertical line corresponds to the maximum WAIC value. The gray triangle is the difference in WAIC score for that model compared to the best model. Adapted from Figure 6.10 of <a href='#Martin2018'>[Mar18]</a> . 

To reproduce this figure, click the open in colab button: <a href="https://colab.research.google.com/github/probml/probml-notebooks/blob/master/notebooks/gmm_chooseK_pymc3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

<img src="https://raw.githubusercontent.com/probml/pml-book/main/pml1/figures/images/Figure_21.18.png" width="256"/>

## Figure 21.19:<a name='21.19'></a> <a name='scRings'></a> 


 Results of clustering some data. (a) K-means. (b) Spectral clustering.  
Figure(s) generated by [spectral_clustering_demo.py](https://github.com/probml/pyprobml/blob/master/scripts/spectral_clustering_demo.py) 

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

In [None]:
try_deimport()
%run -n spectral_clustering_demo.py

## Figure 21.20:<a name='21.20'></a> <a name='IRManimals'></a> 


 Illustration of biclustering. We show 5 of the 12 organism clusters, and 6 of the 33 feature clusters. The original data matrix is shown, partitioned according to the discovered clusters. From Figure 3 of <a href='#Kemp06'>[KTU06]</a> . Used with kind permission of Charles Kemp

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

<img src="https://raw.githubusercontent.com/probml/pml-book/main/pml1/figures/images/Figure_21.20.png" width="256"/>

## Figure 21.21:<a name='21.21'></a> <a name='biclustering'></a> 


(a) Example of biclustering. Each row is assigned to a unique cluster, and each column is assigned to a unique cluster. (b) Example of multi-clustering using a nested partition model. The rows can belong to different clusters depending on which subset of column features we are looking at

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

<img src="https://raw.githubusercontent.com/probml/pml-book/main/pml1/figures/images/Figure_21.21_A.png" width="256"/>

<img src="https://raw.githubusercontent.com/probml/pml-book/main/pml1/figures/images/Figure_21.21_B.png" width="256"/>

## Figure 21.22:<a name='21.22'></a> <a name='crosscat'></a> 


 MAP estimate produced by the crosscat system when applied to a binary data matrix of animals (rows) by features (columns). See text for details. From Figure 7 of <a href='#Shafto06'>[Sha+06]</a> . Used with kind permission of Vikash Mansingkha

In [None]:
#@title Click me to run setup { display-mode: "form" }
try:
  if PYPROBML_SETUP_ALREADY_RUN:
    print('skipping setup')
except:
  PYPROBML_SETUP_ALREADY_RUN = True
  print('running setup...')
  !git clone --depth 1 https://github.com/probml/pyprobml  /pyprobml &> /dev/null 
  %cd -q /pyprobml/scripts
  %reload_ext autoreload 
  %autoreload 2
  !pip install superimport deimport -qqq
  import superimport
def try_deimport():
  try: 
    from deimport.deimport import deimport
    deimport(superimport,verbose=True)
  except Exception as e:
    print(e)
  print('finished!')

<img src="https://raw.githubusercontent.com/probml/pml-book/main/pml1/figures/images/Figure_21.22.png" width="256"/>

## References:
 <a name='Alpaydin04'>[Alp04]</a> E. Alpaydin "Introduction to machine learning". (2004). 

<a name='Geron2019'>[Aur19]</a> G. Aur'elien "Hands-On Machine Learning with Scikit-Learn and TensorFlow: Concepts, Tools, and Techniques for BuildingIntelligent Systems (2nd edition)". (2019). 

<a name='Kemp06'>[KTU06]</a> C. Kemp, J. Tenenbaum and T. Y. Ueda. "Learning systems of concepts with an infinite relational model". (2006). 

<a name='Martin2018'>[Mar18]</a> O. Martin "Bayesian analysis with Python". (2018). 

<a name='Shafto06'>[Sha+06]</a> P. Shafto, C. Kemp, V. Mansinghka, M. Gordon and J. Tenenbaum. "Learning cross-cutting systems of categories". (2006). 

