In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [2]:
import performance_benchmark as pbench

# Intro

There are a number of python packages to work with FCA. In this notebook we will compare their performances in the basic FCA task: constructing the concept lattice from a formal context.

We consider two packages: FCApy and Concepts

// More packages can be compared in the future

# Install competitors libraries

`FCApy` package (by Egor Dudyrev, HSE Moscow): https://github.com/EgorDudyrev/FCApy 

In [3]:
!pip -q install -U fcapy[context,lattice] --user

`Concepts` package (by Sebastian Bank, University of Leipzig): https://github.com/xflr6/concepts

In [4]:
!pip -q install -U concepts --user

`fcapsy` package (by Tomáš Mikula, Palacký University): https://github.com/mikulatomas/fcapsy

Upd. We drop `fcapsy` package from the benchmark since now it uses `concepts` package under the hood

# Load data

First we load some classic FCA contexts (datasets)

In [5]:
contexts_to_test = ['animal_movement', 'digits', 'gewaesser','lattice', 'liveinwater', 'tealady']
frames_classic = pbench.load_classic_context(contexts_to_test)

Add Bob-Ross dataset which has more objects and attributes than the classic FCA datasets

In [6]:
frames_classic['bob_ross'] = pbench.load_bob_ross_dataframe()

These classic real world contexts are small so we add some big random contexts to our examination

In [7]:
n_objects_vars = [10, 30, 100]
n_attributes_vars = [10, 30, 50]
densities_vars = [0.1, 0.5, 0.9]

frames_random = pbench.generate_random_contexts(n_objects_vars, n_attributes_vars, densities_vars)

In [8]:
frames = dict(frames_classic, **frames_random)
#frames = dict(frames_classic)

# Run benchmarks

## Default lattice visualizations

Let us take one classic FCA context 'animal movement' and a bigger one 'bob ross' dataset

The description of Animals context:
* objects (rows) are Animals
* attributes (columns) are Actions
* the table shows whether an Animal can perform an Action

The description of Bob Ross dataset:
* objects (rows) are paintings by Bob Ross
* attributes (columns) are specific elements in these paintings
* the table shows whether an element is on a painting

In [9]:
K_names = ['animal_movement', 'tealady']#'bob_ross']

### Visualization by `concepts`

In [10]:
pbench.visualize_by_concepts(K_names, frames, "imgs/lattice_visualization/concepts")

animal_movement
Lattice constructed in 0.001809 seconds
Executed in 0.146577 seconds
tealady
Lattice constructed in 0.034803 seconds
Executed in 0.287551 seconds


### Visualization by `fcapy`

In [11]:
pbench.visualize_by_fcapy(K_names, frames, "imgs/lattice_visualization/fcapy")

animal_movement
Lattice constructed in 0.016397 seconds
Visualizer constructed in 0.018139 seconds




Png saved in 0.437169 seconds
tealady
Lattice constructed in 0.059656 seconds
Visualizer constructed in 0.060847 seconds
Png saved in 1.252423 seconds


## Time to construct a lattice

Run the benchmarks

In [12]:
n_runs = 10
timeout_secs = 5*60

In [13]:
frames_order = sorted(frames, key=lambda K_name: pbench.get_context_stat(frames[K_name])['n_connections'])

In [14]:
ctx_names_vals = frames_order
lib_names_vals = ['concepts', 'fcapy']#, 'fcapsy']
n_runs*len(ctx_names_vals)*len(lib_names_vals)

680

In [15]:
%%time
stats_df = pbench.compute_stats(ctx_names_vals, lib_names_vals, n_runs, timeout_secs, frames)
print(f"%done: {stats_df['is_computed'].mean():.1%}")

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=680.0), HTML(value='')))

Process test_lattice_fcapy:
Traceback (most recent call last):
  File "/home/egor/anaconda3/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/home/egor/anaconda3/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/egor/Documents/FCApy_benchmarks/FCA_python_packages/performance_benchmark.py", line 232, in test_lattice_time_by_lib_multiprocess
    L_time.value = test_lattice_time_by_lib(K, lib_name)
  File "/home/egor/Documents/FCApy_benchmarks/FCA_python_packages/performance_benchmark.py", line 220, in test_lattice_time_by_lib
    L_time = test_lattice_time_by_func(K, lambda ctx: ConceptLattice.from_context(ctx))
  File "/home/egor/Documents/FCApy_benchmarks/FCA_python_packages/performance_benchmark.py", line 210, in test_lattice_time_by_func
    L_func(K)
  File "/home/egor/Documents/FCApy_benchmarks/FCA_python_packages/performance_benchmark.py", line 220, in <lambda>
    L_time

MemoryError
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/lattice.py", line 113, in __init__
    bottom_elements = super(LowerSemiLattice, self).bottom_elements
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/poset.py", line 123, in bottom_elements
    return [el_i for el_i in range(len(self)) if len(self.sub_elements(el_i)) == 0]
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/poset.py", line 123, in <listcomp>
    return [el_i for el_i in range(len(self)) if len(self.sub_elements(el_i)) == 0]
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/poset.py", line 155, in _sub_elements_cache
    res = self._sub_elements_nocache(element_index)
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/poset.py", line 148, in _sub_elements_nocache
    sub_indexes = {i for i in range(len(self)) if self.leq_elements(i, element_index) and i != element_index}
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/

  File "/home/egor/Documents/FCApy_benchmarks/FCA_python_packages/performance_benchmark.py", line 210, in test_lattice_time_by_func
    L_func(K)
  File "/home/egor/Documents/FCApy_benchmarks/FCA_python_packages/performance_benchmark.py", line 220, in <lambda>
    L_time = test_lattice_time_by_func(K, lambda ctx: ConceptLattice.from_context(ctx))
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/lattice/concept_lattice.py", line 237, in from_context
    ltc = algo_func(context, **kwargs_used)
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/algorithms/concept_construction.py", line 497, in lindig_algorithm
    lattice = ConceptLattice(concepts, subconcepts_dict=subconcepts_dict, superconcepts_dict=superconcepts_dict)
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/lattice/concept_lattice.py", line 75, in __init__
    super(ConceptLattice, self).__init__(concepts, self.concepts_leq_func, use_cache=True)
  File "/home/egor/.local/lib/python3.8/site-packag

  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/poset.py", line 148, in <setcomp>
    sub_indexes = {i for i in range(len(self)) if self.leq_elements(i, element_index) and i != element_index}
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/poset.py", line 264, in _leq_elements_cache
    self._cache_leq[key] = res
MemoryError
Process test_lattice_fcapy:
Traceback (most recent call last):
  File "/home/egor/anaconda3/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/home/egor/anaconda3/lib/python3.8/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/egor/Documents/FCApy_benchmarks/FCA_python_packages/performance_benchmark.py", line 232, in test_lattice_time_by_lib_multiprocess
    L_time.value = test_lattice_time_by_lib(K, lib_name)
  File "/home/egor/Documents/FCApy_benchmarks/FCA_python_packages/performance_benchmark.py", line 220, in test_lattice_time_by_

  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/lattice.py", line 35, in __init__
    super(UpperSemiLattice, self).__init__(elements, leq_func, use_cache, direct_subelements_dict)
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/lattice.py", line 113, in __init__
    bottom_elements = super(LowerSemiLattice, self).bottom_elements
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/poset.py", line 123, in bottom_elements
    return [el_i for el_i in range(len(self)) if len(self.sub_elements(el_i)) == 0]
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/poset.py", line 123, in <listcomp>
    return [el_i for el_i in range(len(self)) if len(self.sub_elements(el_i)) == 0]
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/poset.py", line 155, in _sub_elements_cache
    res = self._sub_elements_nocache(element_index)
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/poset.py", line 148, in _sub_

  File "/home/egor/Documents/FCApy_benchmarks/FCA_python_packages/performance_benchmark.py", line 220, in test_lattice_time_by_lib
    L_time = test_lattice_time_by_func(K, lambda ctx: ConceptLattice.from_context(ctx))
  File "/home/egor/Documents/FCApy_benchmarks/FCA_python_packages/performance_benchmark.py", line 210, in test_lattice_time_by_func
    L_func(K)
  File "/home/egor/Documents/FCApy_benchmarks/FCA_python_packages/performance_benchmark.py", line 220, in <lambda>
    L_time = test_lattice_time_by_func(K, lambda ctx: ConceptLattice.from_context(ctx))
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/lattice/concept_lattice.py", line 237, in from_context
    ltc = algo_func(context, **kwargs_used)
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/algorithms/concept_construction.py", line 497, in lindig_algorithm
    lattice = ConceptLattice(concepts, subconcepts_dict=subconcepts_dict, superconcepts_dict=superconcepts_dict)
  File "/home/egor/.local/lib/pyt

  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/poset.py", line 155, in _sub_elements_cache
    res = self._sub_elements_nocache(element_index)
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/poset.py", line 148, in _sub_elements_nocache
    sub_indexes = {i for i in range(len(self)) if self.leq_elements(i, element_index) and i != element_index}
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/poset.py", line 148, in <setcomp>
    sub_indexes = {i for i in range(len(self)) if self.leq_elements(i, element_index) and i != element_index}
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/poset.py", line 264, in _leq_elements_cache
    self._cache_leq[key] = res
MemoryError
Process test_lattice_fcapy:
Traceback (most recent call last):
  File "/home/egor/anaconda3/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/home/egor/anaconda3/lib/python3.8/multiprocessing/process.py", line 10

  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/algorithms/concept_construction.py", line 497, in lindig_algorithm
    lattice = ConceptLattice(concepts, subconcepts_dict=subconcepts_dict, superconcepts_dict=superconcepts_dict)
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/lattice/concept_lattice.py", line 75, in __init__
    super(ConceptLattice, self).__init__(concepts, self.concepts_leq_func, use_cache=True)
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/lattice.py", line 35, in __init__
    super(UpperSemiLattice, self).__init__(elements, leq_func, use_cache, direct_subelements_dict)
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/lattice.py", line 113, in __init__
    bottom_elements = super(LowerSemiLattice, self).bottom_elements
  File "/home/egor/.local/lib/python3.8/site-packages/fcapy/poset/poset.py", line 123, in bottom_elements
    return [el_i for el_i in range(len(self)) if len(self.sub_elements(el_i)) == 0]
 


%done: 100.0%
CPU times: user 14.9 s, sys: 7.49 s, total: 22.4 s
Wall time: 13h 23s


In [16]:
stats_df[stats_df['is_computed']].to_csv('benchmark_stats.csv')

# Analyze the results

In [17]:
import pandas as pd

In [18]:
stats_df = pd.read_csv('benchmark_stats.csv', index_col=0)
print(stats_df.shape)
stats_df.head()

(680, 13)


Unnamed: 0,run_number,ctx_name,lib_name,is_computed,lattice_construction_time (secs),intent_time (secs),extent_time (secs),timeout_seconds,n_objects,n_attributes,n_connections,density,is_random
0,0,random_10_10_0.1,concepts,True,0.000805,1.2e-05,1.2e-05,300.0,10.0,10.0,9.0,0.09,True
1,0,random_10_10_0.1,fcapy,True,0.015417,2.5e-05,3.2e-05,300.0,10.0,10.0,9.0,0.09,True
2,0,animal_movement,concepts,True,0.00088,8e-06,1.2e-05,300.0,16.0,4.0,24.0,0.375,False
3,0,animal_movement,fcapy,True,0.002034,6e-06,1.4e-05,300.0,16.0,4.0,24.0,0.375,False
4,0,gewaesser,concepts,True,0.001318,6e-06,1.3e-05,300.0,8.0,6.0,24.0,0.5,False


In [19]:
stats_df = stats_df.fillna(timeout_secs)

In [20]:
context_stat_feats = ['n_objects', 'n_attributes', 'n_connections', 'density']

In [21]:
pbench.save_context_stats(stats_df, context_stat_feats)

In [22]:
pbench.save_lattice_time_plot('imgs/lattice_construction_time/classic_contexts.png', context_stat_feats, stats_df[~stats_df['is_random']], timeout_secs)
pbench.save_lattice_time_plot('imgs/lattice_construction_time/random_contexts.png', context_stat_feats, stats_df[stats_df['is_random']], timeout_secs)

In [23]:
pbench.save_extent_intent_time_plot('imgs/intent_extent_time/all_data.png', context_stat_feats, stats_df)