In [1]:
%reload_ext autoreload
%autoreload 2

# Build DLMI simple
- Using user-defined changes to the structure (no database orchestrator)

## Goal
Example of how to build and search in DLMI with synthetic data.

## Steps:
1. Prepare the data
2. Instantiate LMI
3. Insert data
4. Change structure
5. Search in the structure

##### Author: Terezia Slaninakova, xslanin@fi.muni.cz
##### Date: 23.10.2022

In [2]:
from dlmi.utils import *
from dlmi.Logger import logging, get_logger_config
import logging
logging.basicConfig(level=logging.DEBUG, format=get_logger_config())
logging.debug('Initialized logger')

[2022-10-23 12:04:32,199][DEBUG][root] Initialized logger


## 1. Prepare the data

In [3]:
import numpy as np
import pandas as pd
from dlmi.LMI import LMI, Inconsistencies
data = pd.DataFrame(np.random.random((10_000, 128)))
data.head(2)

[2022-10-23 12:04:45,467][INFO ][faiss.loader] Loading faiss with AVX2 support.
[2022-10-23 12:04:45,472][INFO ][faiss.loader] Could not load library with AVX2 support due to:
ModuleNotFoundError("No module named 'faiss.swigfaiss_avx2'",)
[2022-10-23 12:04:45,475][INFO ][faiss.loader] Loading faiss.
[2022-10-23 12:04:45,542][INFO ][faiss.loader] Successfully loaded faiss.


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,118,119,120,121,122,123,124,125,126,127
0,0.174146,0.636025,0.052366,0.066236,0.833853,0.225025,0.934428,0.264884,0.791498,0.799136,...,0.212472,0.74006,0.567989,0.24877,0.5906,0.046706,0.35711,0.49438,0.508003,0.844378
1,0.922731,0.938299,0.587256,0.016207,0.495015,0.168374,0.345242,0.506989,0.963526,0.366027,...,0.46414,0.727747,0.643786,0.215733,0.987642,0.653069,0.660295,0.848699,0.605542,0.000983


## 2. Instantiate LMI

In [4]:
from dlmi.utils import load_yaml
config = load_yaml('../config.yml')

In [5]:
lmi = LMI(**config['LMI'])

## 3. Insert data into LMI

In [6]:
lmi.insert(data.iloc[:1000])

[2022-10-23 12:04:45,817][DEBUG][LeafNode] Created leaf node at `(0,)`
[2022-10-23 12:04:45,821][DEBUG][LMI] Added inconsistency: Inconsistencies.OVERFLOW at (0,)
[2022-10-23 12:04:45,824][DEBUG][LMI] Inserted node `(0,)` into LMI


In [7]:
lmi.dump_structure()

Unnamed: 0_level_0,type,children
key,Unnamed: 1_level_1,Unnamed: 2_level_1
"(0,)",LeafNode,1000


In [8]:
Inconsistencies.OVERFLOW.value

'overflow'

In [9]:
lmi.inconsistencies

{'InnerNode': {'overflow': {}, 'underflow': {}},
 'LeafNode': {'overflow': {(0,): 1000}, 'underflow': {}}}

## 4. Change the structure

In [10]:
_ = lmi.deepen(lmi.nodes[(0, )], 2)

[2022-10-23 12:04:45,972][DEBUG][LMI] ==== Deepen with (0,)
[2022-10-23 12:04:48,792][DEBUG][LMI] Removed node at `(0,)`
[2022-10-23 12:04:48,796][DEBUG][LMI] Removed inconsistency: Inconsistencies.OVERFLOW from (0,)
[2022-10-23 12:04:48,798][DEBUG][InnerNode] Created inner node at `(0,)`
[2022-10-23 12:04:48,801][DEBUG][LMI] Inserted node `(0,)` into LMI
[2022-10-23 12:04:48,804][DEBUG][LeafNode] Created leaf node at `(0, 0)`
[2022-10-23 12:04:48,807][DEBUG][LMI] Added inconsistency: Inconsistencies.OVERFLOW at (0, 0)
[2022-10-23 12:04:48,810][DEBUG][LMI] Inserted node `(0, 0)` into LMI
[2022-10-23 12:04:48,812][DEBUG][LeafNode] Created leaf node at `(0, 1)`
[2022-10-23 12:04:48,815][DEBUG][LMI] Added inconsistency: Inconsistencies.OVERFLOW at (0, 1)
[2022-10-23 12:04:48,816][DEBUG][LMI] Inserted node `(0, 1)` into LMI
[2022-10-23 12:04:48,817][DEBUG][LMI] Added inconsistency: Inconsistencies.UNDERFLOW at (0,)


In [11]:
lmi.dump_structure()

Unnamed: 0_level_0,type,children
key,Unnamed: 1_level_1,Unnamed: 2_level_1
"(0,)",InnerNode,2
"(0, 0)",LeafNode,433
"(0, 1)",LeafNode,567


In [12]:
_ = lmi.deepen(lmi.nodes[(0, 0)], 2)

[2022-10-23 12:04:48,893][DEBUG][LMI] ==== Deepen with (0, 0)
[2022-10-23 12:04:48,957][DEBUG][LMI] Removed node at `(0, 0)`
[2022-10-23 12:04:48,961][DEBUG][LMI] Removed inconsistency: Inconsistencies.OVERFLOW from (0, 0)
[2022-10-23 12:04:48,964][DEBUG][InnerNode] Created inner node at `(0, 0)`
[2022-10-23 12:04:48,966][DEBUG][LMI] Inserted node `(0, 0)` into LMI
[2022-10-23 12:04:48,969][DEBUG][LeafNode] Created leaf node at `(0, 0, 0)`
[2022-10-23 12:04:48,972][DEBUG][LMI] Added inconsistency: Inconsistencies.OVERFLOW at (0, 0, 0)
[2022-10-23 12:04:48,975][DEBUG][LMI] Inserted node `(0, 0, 0)` into LMI
[2022-10-23 12:04:48,977][DEBUG][LeafNode] Created leaf node at `(0, 0, 1)`
[2022-10-23 12:04:48,980][DEBUG][LMI] Inserted node `(0, 0, 1)` into LMI
[2022-10-23 12:04:48,981][DEBUG][LMI] Added inconsistency: Inconsistencies.UNDERFLOW at (0, 0)


In [13]:
lmi.dump_structure()

Unnamed: 0_level_0,type,children
key,Unnamed: 1_level_1,Unnamed: 2_level_1
"(0,)",InnerNode,2
"(0, 1)",LeafNode,567
"(0, 0)",InnerNode,2
"(0, 0, 0)",LeafNode,290
"(0, 0, 1)",LeafNode,143


In [14]:
lmi.retrain(lmi.nodes[(0, 0)], 4)

[2022-10-23 12:04:49,054][DEBUG][LMI] ==== Retrain with (0, 0)
[2022-10-23 12:04:49,117][DEBUG][LMI] Removed node at `(0, 0, 1)`
[2022-10-23 12:04:49,120][DEBUG][LMI] Removed node at `(0, 0, 0)`
[2022-10-23 12:04:49,123][DEBUG][LMI] Removed inconsistency: Inconsistencies.OVERFLOW from (0, 0, 0)
[2022-10-23 12:04:49,126][DEBUG][LeafNode] Created leaf node at `(0, 0, 0)`
[2022-10-23 12:04:49,129][DEBUG][LMI] Inserted node `(0, 0, 0)` into LMI
[2022-10-23 12:04:49,131][DEBUG][LeafNode] Created leaf node at `(0, 0, 1)`
[2022-10-23 12:04:49,134][DEBUG][LMI] Added inconsistency: Inconsistencies.UNDERFLOW at (0, 0, 1)
[2022-10-23 12:04:49,137][DEBUG][LMI] Inserted node `(0, 0, 1)` into LMI
[2022-10-23 12:04:49,140][DEBUG][LeafNode] Created leaf node at `(0, 0, 2)`
[2022-10-23 12:04:49,141][DEBUG][LMI] Inserted node `(0, 0, 2)` into LMI
[2022-10-23 12:04:49,142][DEBUG][LeafNode] Created leaf node at `(0, 0, 3)`
[2022-10-23 12:04:49,146][DEBUG][LMI] Inserted node `(0, 0, 3)` into LMI


In [15]:
lmi.dump_structure()

Unnamed: 0_level_0,type,children
key,Unnamed: 1_level_1,Unnamed: 2_level_1
"(0,)",InnerNode,2
"(0, 1)",LeafNode,567
"(0, 0)",InnerNode,4
"(0, 0, 0)",LeafNode,117
"(0, 0, 1)",LeafNode,12
"(0, 0, 2)",LeafNode,134
"(0, 0, 3)",LeafNode,170


## 5. Search

In [16]:
res = lmi.search(data.iloc[0], 500)
res

[2022-10-23 12:05:14,831][DEBUG][LMI] Processing: (0,)
[2022-10-23 12:05:14,835][DEBUG][LMI] Top 5 entries in PQ: [[(0, 1) 0.8267986178398132]
 [(0, 0) 0.173201322555542]]


([(0, 1)], 567)

In [17]:
lmi.pq

array([[(0, 0), 0.173201322555542]], dtype=object)

In [18]:
res = lmi.search(data.iloc[0], 500)
res

[2022-10-23 12:05:18,904][DEBUG][LMI] Processing: (0,)
[2022-10-23 12:05:18,909][DEBUG][LMI] Top 5 entries in PQ: [[(0, 1) 0.8267986178398132]
 [(0, 0) 0.173201322555542]]


([(0, 1)], 567)