Skip to content

Commit

Permalink
Add images on indexing to docs
Browse files Browse the repository at this point in the history
  • Loading branch information
J535D165 committed Dec 4, 2019
1 parent 9fa23a8 commit 7492073
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 0 deletions.
Binary file added docs/images/indexing_basic.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
60 changes: 60 additions & 0 deletions docs/images/indexing_plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import numpy as np
import matplotlib.colors as mlc
import matplotlib.pyplot as mlp

figure, axes = mlp.subplots(nrows=1, ncols=2, figsize=(8, 5))

# linking
db_a = ['A1', 'A2', 'A3', 'A4', 'A5', 'A6']
db_b = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6']

img = np.ones((len(db_a), len(db_b)), dtype=float)

color_map = mlc.LinearSegmentedColormap.from_list('ColorMap',
[(0.984, 0.501, 0.447),
(1.000, 1.000, 1.000)])
axes[0].imshow(img, cmap=color_map, interpolation='none')

axes[0].set_xlabel('Dataset A', fontsize=13)
axes[0].set_xticks(np.arange(0, len(db_b), 1))
axes[0].set_xticks(np.arange(-0.5, len(db_b), 1), minor=True)
axes[0].set_xticklabels(db_a)

axes[0].set_ylabel('Dataset B', fontsize=13)
axes[0].set_yticks(np.arange(0, len(db_a), 1))
axes[0].set_yticks(np.arange(-.5, len(db_a), 1), minor=True)
axes[0].set_yticklabels(db_b)

axes[0].grid(which='minor', color='k')

axes[0].set_title('Linking A and B', fontsize=15, fontweight='bold')

# dedup
db_a = ['A1', 'A2', 'A3', 'A4', 'A5', 'A6']
db_b = ['A1', 'A2', 'A3', 'A4', 'A5', 'A6']

img = np.ones((len(db_a), len(db_b)), dtype=float)
img = np.triu(img, 1)

color_map = mlc.LinearSegmentedColormap.from_list('ColorMap',
[(1.000, 1.000, 1.000),
(0.984, 0.501, 0.447)])
axes[1].imshow(img, cmap=color_map, interpolation='none')

axes[1].set_xlabel('Dataset A', fontsize=13)
axes[1].set_xticks(np.arange(0, len(db_b), 1))
axes[1].set_xticks(np.arange(-0.5, len(db_b), 1), minor=True)
axes[1].set_xticklabels(db_a)

axes[1].set_ylabel('Dataset A', fontsize=13)
axes[1].set_yticks(np.arange(0, len(db_a), 1))
axes[1].set_yticks(np.arange(-.5, len(db_a), 1), minor=True)
axes[1].set_yticklabels(db_b)

axes[1].grid(which='minor', color='k')

axes[1].set_title('Duplicate detection A', fontsize=15, fontweight='bold')

figure.tight_layout()

mlp.savefig("indexing_basic.png", dpi=150)
10 changes: 10 additions & 0 deletions docs/ref-index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@ references for background information about indexation.
.. [christen2008] Christen, P. (2008). Febrl - A Freely Available Record
Linkage System with a Graphical User Interface.
The indexing module can be used for both linking and duplicate detection. In
case of duplicate detection, only pairs in the upper triangular part of the
matrix are returned. This means that the first record in each record pair is
the largest identifier. For example, `("A2", "A1")`, `(5, 2)` and `("acb",
"abc")`. The following image shows the record pairs for a complete set of
record pairs.

.. figure:: /images/indexing_basic.png
:width: 100%

:class:`recordlinkage.Index` object
===================================

Expand Down

0 comments on commit 7492073

Please sign in to comment.