# Intro

There are a number of python packages to work with FCA. In this notebook we will compare their performances in the basic FCA task: constructing the concept lattice from a formal context.

We consider three packages: FCApy, fcapsy and Concepts

// More packages can be compared in the future

# Install competitors libraries

The current `FCApy` library (by Egor Dudyrev, HSE Moscow): https://github.com/EgorDudyrev/FCApy 

In [1]:
from fcapy import LIB_INSTALLED
from fcapy.context import FormalContext, converters
from fcapy.lattice import ConceptLattice
from fcapy.visualizer import Visualizer

`Concepts` package (by Sebastian Bank, University of Leipzig): https://github.com/xflr6/concepts

In [2]:
import concepts

`fcapsy` package (by Tomáš Mikula, Palacký University): https://github.com/mikulatomas/fcapsy

In [3]:
import fcapsy

# Load data

First we load some classic FCA contexts (datasets)

In [4]:
frames_classic = {}
contexts_to_test = ['animal_movement', 'digits', 'gewaesser',
                    'lattice', 'liveinwater', 'tealady']

!rm -rf tmp
!mkdir tmp

for ctx_name in contexts_to_test:
    fname = f'tmp/{ctx_name}.cxt'
    !wget -O {fname} -q https://raw.githubusercontent.com/EgorDudyrev/FCApy/main/data/{ctx_name}.cxt
    ctx = converters.read_cxt(fname)
    df = ctx.to_pandas()
    df.name = ctx_name
    frames_classic[ctx_name] = df
!rm -rf tmp

These classic real world contexts are small so we add some big random contexts to our examination

In [5]:
import numpy as np
import pandas as pd
from itertools import product

np.random.seed(42)
n_objects_vars = [10, 15,]
n_attributes_vars = [10, 15]
densities_vars = [0.1, 0.5, 0.9]
frames_random = {}
for comb in product(n_objects_vars, n_attributes_vars, densities_vars):
    n_objects, n_attributes, density = comb

    frame = pd.DataFrame(np.random.binomial(1, density, size=(n_objects,n_attributes)))
    frame.columns = [f"m_{i}" for i in frame.columns]
    frame.index = [f"g_{i}" for i in frame.index]
    frame = frame.astype(bool)

    frame.name = f"random_{n_objects}_{n_attributes}_{density}"
    frames_random[frame.name] = frame

In [6]:
frames = dict(frames_classic, **frames_random)

# Run benchmarks

## Default lattice visualizations

Let us take one of the classic FCA context: 'animal movement'

* the object (rows) are Animals
* the attributes (columns) are Actions
* the table shows whether an Animal can perform an Action

In [7]:
df = frames['animal_movement']
df

Unnamed: 0,fly,hunt,run,swim
dove,True,False,False,False
hen,False,False,False,False
duck,True,False,False,True
goose,True,False,False,True
owl,True,True,False,False
hawk,True,True,False,False
eagle,True,True,False,False
fox,False,True,True,False
dog,False,False,True,False
wolf,False,True,True,False


### Visualization by `concepts`

The visualization can be found in the file
* _lattice_visualization_concepts.png_

In [8]:
ctx_concepts = concepts.Context(df.index, df.columns, df.values)
ctx_concepts.lattice.graphviz('lattice_visualization_concepts.png');

### Visualization by `fcapy`

Visualizations can be found in the files
* _lattice_visualization_fcapy_networkx.png_ 
* _lattice_visualization_fcapy_plotly.png_

In [9]:
import matplotlib.pyplot as plt

ctx_fcapy = FormalContext.from_pandas(df)
ltc_fcapy = ConceptLattice.from_context(ctx_fcapy)
vsl_fcapy = Visualizer(ltc_fcapy)

plt.title('Networkx lattice')
vsl_fcapy.draw_networkx()
plt.savefig('lattice_visualization_fcapy_networkx.png')
plt.close()

fig = vsl_fcapy.get_plotly_figure(title='Plotly lattice')
fig.write_image('lattice_visualization_fcapy_plotly.png')

## Time to construct a lattice

Functions to run the same lattice construction task with different libraries

In [10]:
from datetime import datetime
def run_concepts(frame):
    ctx = concepts.Context(frame.index, frame.columns, frame.values)
    
    t1 = datetime.now()
    ltc = ctx.lattice
    t2 = datetime.now()
    dt = (t2-t1).total_seconds()
    
    stat = {'lattice_construction_time (secs)': dt}
    return stat

def run_fcapy(frame):
    LIB_INSTALLED['numpy'] = False
    ctx = FormalContext.from_pandas(frame)
    
    t1 = datetime.now()
    ltc = ConceptLattice.from_context(ctx, algo='CbO')
    t2 = datetime.now()
    dt = (t2-t1).total_seconds()
    
    stat = {'lattice_construction_time (secs)': dt}
    return stat

def run_fcapsy(frame):
    ctx = fcapsy.Context.from_pandas(frame)
    
    t1 = datetime.now()
    ltc = fcapsy.Lattice(ctx)
    t2 = datetime.now()
    dt = (t2-t1).total_seconds()
    
    stat = {'lattice_construction_time (secs)': dt}
    return stat

In [11]:
def get_context_stat(frame):
    ctx_stat = {
        'ctx_name': frame.name,
        'n_objects': frame.shape[0], 'n_attributes': frame.shape[1],
        'n_connections': frame.sum().sum(),
        'density': frame.sum().sum()/(frame.shape[0]*frame.shape[1]),
    }
    return ctx_stat

In [12]:
from tqdm.notebook import tqdm
import seaborn as sns

Run the benchmarks

In [None]:
n_runs = 100

stats_total = []
for ctx_name, frame in tqdm(frames.items(), total=len(frames)):
    ctx_stat = get_context_stat(frame)
    if ctx_stat['ctx_name'] == 'random_15_15_0.9':
        # works to slow on this context
        continue
    
    stats_per_ctx = []
    for lib_name, run_func in [
        ('concepts', run_concepts),
        ('fcapy', run_fcapy),
        ('fcapsy', run_fcapsy)
    ]:
        for run_number in tqdm(range(n_runs), leave=False, desc=lib_name):
            stat = run_func(frame)
            stat = dict(stat, library=lib_name, run_number=run_number, **ctx_stat)
            stats_per_ctx.append(stat)
    stats_total += stats_per_ctx

HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, description='concepts', style=ProgressStyle(description_width='initial…

HBox(children=(FloatProgress(value=0.0, description='fcapy', style=ProgressStyle(description_width='initial'))…

HBox(children=(FloatProgress(value=0.0, description='fcapsy', style=ProgressStyle(description_width='initial')…

HBox(children=(FloatProgress(value=0.0, description='concepts', style=ProgressStyle(description_width='initial…

HBox(children=(FloatProgress(value=0.0, description='fcapy', style=ProgressStyle(description_width='initial'))…

HBox(children=(FloatProgress(value=0.0, description='fcapsy', style=ProgressStyle(description_width='initial')…

HBox(children=(FloatProgress(value=0.0, description='concepts', style=ProgressStyle(description_width='initial…

HBox(children=(FloatProgress(value=0.0, description='fcapy', style=ProgressStyle(description_width='initial'))…

HBox(children=(FloatProgress(value=0.0, description='fcapsy', style=ProgressStyle(description_width='initial')…

HBox(children=(FloatProgress(value=0.0, description='concepts', style=ProgressStyle(description_width='initial…

HBox(children=(FloatProgress(value=0.0, description='fcapy', style=ProgressStyle(description_width='initial'))…

HBox(children=(FloatProgress(value=0.0, description='fcapsy', style=ProgressStyle(description_width='initial')…

HBox(children=(FloatProgress(value=0.0, description='concepts', style=ProgressStyle(description_width='initial…

HBox(children=(FloatProgress(value=0.0, description='fcapy', style=ProgressStyle(description_width='initial'))…

HBox(children=(FloatProgress(value=0.0, description='fcapsy', style=ProgressStyle(description_width='initial')…

HBox(children=(FloatProgress(value=0.0, description='concepts', style=ProgressStyle(description_width='initial…

HBox(children=(FloatProgress(value=0.0, description='fcapy', style=ProgressStyle(description_width='initial'))…

HBox(children=(FloatProgress(value=0.0, description='fcapsy', style=ProgressStyle(description_width='initial')…

HBox(children=(FloatProgress(value=0.0, description='concepts', style=ProgressStyle(description_width='initial…

HBox(children=(FloatProgress(value=0.0, description='fcapy', style=ProgressStyle(description_width='initial'))…

HBox(children=(FloatProgress(value=0.0, description='fcapsy', style=ProgressStyle(description_width='initial')…

HBox(children=(FloatProgress(value=0.0, description='concepts', style=ProgressStyle(description_width='initial…

HBox(children=(FloatProgress(value=0.0, description='fcapy', style=ProgressStyle(description_width='initial'))…

HBox(children=(FloatProgress(value=0.0, description='fcapsy', style=ProgressStyle(description_width='initial')…

HBox(children=(FloatProgress(value=0.0, description='concepts', style=ProgressStyle(description_width='initial…

HBox(children=(FloatProgress(value=0.0, description='fcapy', style=ProgressStyle(description_width='initial'))…

HBox(children=(FloatProgress(value=0.0, description='fcapsy', style=ProgressStyle(description_width='initial')…

HBox(children=(FloatProgress(value=0.0, description='concepts', style=ProgressStyle(description_width='initial…

HBox(children=(FloatProgress(value=0.0, description='fcapy', style=ProgressStyle(description_width='initial'))…

HBox(children=(FloatProgress(value=0.0, description='fcapsy', style=ProgressStyle(description_width='initial')…

HBox(children=(FloatProgress(value=0.0, description='concepts', style=ProgressStyle(description_width='initial…

HBox(children=(FloatProgress(value=0.0, description='fcapy', style=ProgressStyle(description_width='initial'))…

HBox(children=(FloatProgress(value=0.0, description='fcapsy', style=ProgressStyle(description_width='initial')…

HBox(children=(FloatProgress(value=0.0, description='concepts', style=ProgressStyle(description_width='initial…

HBox(children=(FloatProgress(value=0.0, description='fcapy', style=ProgressStyle(description_width='initial'))…

HBox(children=(FloatProgress(value=0.0, description='fcapsy', style=ProgressStyle(description_width='initial')…

HBox(children=(FloatProgress(value=0.0, description='concepts', style=ProgressStyle(description_width='initial…

HBox(children=(FloatProgress(value=0.0, description='fcapy', style=ProgressStyle(description_width='initial'))…

HBox(children=(FloatProgress(value=0.0, description='fcapsy', style=ProgressStyle(description_width='initial')…

HBox(children=(FloatProgress(value=0.0, description='concepts', style=ProgressStyle(description_width='initial…

HBox(children=(FloatProgress(value=0.0, description='fcapy', style=ProgressStyle(description_width='initial'))…

HBox(children=(FloatProgress(value=0.0, description='fcapsy', style=ProgressStyle(description_width='initial')…

HBox(children=(FloatProgress(value=0.0, description='concepts', style=ProgressStyle(description_width='initial…

HBox(children=(FloatProgress(value=0.0, description='fcapy', style=ProgressStyle(description_width='initial'))…

HBox(children=(FloatProgress(value=0.0, description='fcapsy', style=ProgressStyle(description_width='initial')…

HBox(children=(FloatProgress(value=0.0, description='concepts', style=ProgressStyle(description_width='initial…

HBox(children=(FloatProgress(value=0.0, description='fcapy', style=ProgressStyle(description_width='initial'))…

HBox(children=(FloatProgress(value=0.0, description='fcapsy', style=ProgressStyle(description_width='initial')…

HBox(children=(FloatProgress(value=0.0, description='concepts', style=ProgressStyle(description_width='initial…

HBox(children=(FloatProgress(value=0.0, description='fcapy', style=ProgressStyle(description_width='initial'))…

HBox(children=(FloatProgress(value=0.0, description='fcapsy', style=ProgressStyle(description_width='initial')…

Benchmark results can be found in the file:
* _latice_construction_statistics.csv_

Contexts statistics (num. of objects, attributes, e.t.c) is in the file:
* context_statistics.csv

In [None]:
stats_df = pd.DataFrame(stats_total)
stats_df['is_random'] = [x.startswith('random') for x in stats_df['ctx_name']]
stats_df.to_csv('latice_construction_statistics.csv')
stats_df.head()

In [None]:
context_stat_feats = ['n_objects', 'n_attributes', 'n_connections', 'density']
context_stat_df = stats_df[['ctx_name',]+context_stat_feats].drop_duplicates().reset_index(drop=True)
context_stat_df.to_csv('context_statistics.csv')

In [None]:
plt.rcParams['figure.facecolor'] = (1,1,1,1)  # (1,1,1,0)

We do not draw any plots in this notebook in order for Github to render it.

Benchmark plot can be found in the files:
* _lattice_construction_time_for_classic_contexts.png_
* _lattice_construction_time_for_random_contexts.png_
* _lattice_construction_time_all_data.png_

In [None]:
y_feat = 'lattice_construction_time (secs)'
width = 2

plt.figure(figsize=(10,7))
for idx, x_feat in enumerate(context_stat_feats):
    plt.subplot(len(context_stat_feats)//width+1, width, idx+1)
    sns.lineplot(x=x_feat, y=y_feat, hue='library', data=stats_df[~stats_df['is_random']])
    plt.xlabel(''); plt.ylabel('')
    plt.title(x_feat)
    plt.xlabel(x_feat)
    plt.ylabel('time (secs) (log scale)')
    plt.legend()
plt.tight_layout()
plt.subplots_adjust(top=0.9)
plt.suptitle(f"{y_feat} based on context statistics\n(for classic fca contexts)")
plt.savefig('lattice_construction_time_for_classic_contexts.png')
plt.close()

In [None]:
context_stat_feats = ['n_objects', 'n_attributes', 'n_connections', 'density']
y_feat = 'lattice_construction_time (secs)'
width = 2

plt.figure(figsize=(10,10))
for idx, x_feat in enumerate(context_stat_feats):
    plt.subplot(len(context_stat_feats)//width+1, width, idx+1)
    sns.lineplot(x=x_feat, y=y_feat, hue='library', data=stats_df[stats_df['is_random']])
    plt.xlabel(''); plt.ylabel('')
    plt.title(x_feat)
    plt.xlabel(x_feat)
    plt.ylabel('time (secs) (log scale)')
    plt.legend()
plt.tight_layout()
plt.subplots_adjust(top=0.9)
plt.suptitle(f"{y_feat} based on context statistics\n(for random contexts)")
plt.savefig('lattice_construction_time_for_random_contexts.png')
plt.close()

In [None]:
context_stat_feats = ['n_objects', 'n_attributes', 'n_connections', 'density']
y_feat = 'lattice_construction_time (secs)'
width = 2

plt.figure(figsize=(10,7))
for idx, x_feat in enumerate(context_stat_feats):
    plt.subplot(len(context_stat_feats)//width+1, width, idx+1)
    sns.lineplot(x=x_feat, y=y_feat, hue='library', data=stats_df)
    plt.xlabel(''); plt.ylabel('')
    plt.title(x_feat)
    plt.yscale('log')
    plt.xlabel(x_feat)
    plt.ylabel('time (secs) (log scale)')
    plt.legend()
plt.tight_layout()
plt.subplots_adjust(top=0.9)
plt.suptitle(f"{y_feat} based on context statistics\n(for random and classic contexts)")
plt.savefig('lattice_construction_time_all_data.png')
plt.close()