# Demonstration of the Soft-Query-Evaluation-System

## Python & Schema Imports

In [1]:
import json

from db.db import DBConnector

from db.operators import *
from db.criteria import *

from models import ModelMgr
from models.embedding import GenericEmbeddingModel
from models.semantic_validation import LLaMAValidationModel

In [2]:
db = DBConnector("config.ini")

m = ModelMgr("config.ini")
em = GenericEmbeddingModel(m)
sv = LLaMAValidationModel(m)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
with open("evaluation/schema.json", encoding="utf-8") as f:
    database = json.load(f)

def cast_schema_type(schema_type: str):
    schema_type = schema_type.lower().strip()
    if schema_type == "string" or schema_type == "text":
        return "TEXT"
    if schema_type == "integer":
        return "INTEGER"
    if schema_type == "boolean":
        return "BOOLEAN"
    if schema_type == "number":
        return "DOUBLE PRECISION"
    raise Exception("Unknown schema type: " + schema_type)

def sanitize_table_name(table_name: str):
    return table_name.replace("/", "_").replace(" ", "_")\
        .replace("(", "").replace(")", "").replace(".", "")\
        .replace("-", "_").lower()

with db.get_cursor() as cursor:
    cursor.execute("DROP SCHEMA IF EXISTS demo CASCADE ;")
    cursor.execute("CREATE SCHEMA demo;")

    for table in database:
        table_name = f"demo.{table}"
        column_map = {x['name']: sanitize_table_name(x['name']) for x in database[table]["schema"]}
        columns = [column_map[x['name']] for x in database[table]["schema"]]
        types = [cast_schema_type(x['type']) for x in database[table]["schema"]]
        table_column = [f"{col} {col_type}" for col, col_type in zip(columns, types)]

        cursor.execute(f"CREATE TABLE {table_name} ({', '.join(table_column)})")

        print("Creating Table ", table_name)

        for data in database[table]["data"]:
            param_names = [f"%({col})s" for col in columns]
            params = {v: data[k] for k, v in column_map.items()}
            cursor.execute(f"INSERT INTO {table_name} VALUES ({', '.join(param_names)})", params)

db.conn.commit()

Creating Table  demo.language_detection
Creating Table  demo.chemicals
Creating Table  demo.elements
Creating Table  demo.elements_phase
Creating Table  demo.movies
Creating Table  demo.movies_de
Creating Table  demo.actors
Creating Table  demo.plays_in
Creating Table  demo.top_artists_2023
Creating Table  demo.diseases
Creating Table  demo.diseases_symptom
Creating Table  demo.human_vital_sign
Creating Table  demo.companies_1
Creating Table  demo.companies_2
Creating Table  demo.user_data
Creating Table  demo.random_countries
Creating Table  demo.countries


In [3]:
db = DBConnector("config.ini", load_db=True)

## Soft Scan

Demonstrate the semantic search for database containing

$Scan(\text{})$

In [6]:
op = Scan("demo.actors", db=db, em=em, sv=sv, threshold=0.8).open()
op.fetch_all()

[RealDictRow([('name', 'Johnny Depp'), ('birth_year', 1963)]),
 RealDictRow([('name', 'Orlando Bloom'), ('birth_year', 1977)]),
 RealDictRow([('name', 'Keira Knightley'), ('birth_year', 1985)]),
 RealDictRow([('name', 'Elijah Wood'), ('birth_year', 1981)]),
 RealDictRow([('name', 'Ian McKellen'), ('birth_year', 1939)]),
 RealDictRow([('name', 'Freddie Highmore'), ('birth_year', 1992)]),
 RealDictRow([('name', 'Viggo Mortensen'), ('birth_year', 1958)]),
 RealDictRow([('name', 'Leonardo DiCaprio'), ('birth_year', 1974)]),
 RealDictRow([('name', 'Joseph Gordon-Levitt'), ('birth_year', 1981)]),
 RealDictRow([('name', 'Elliot Page'), ('birth_year', 1987)]),
 RealDictRow([('name', 'Keanu Reeves'), ('birth_year', 1964)]),
 RealDictRow([('name', 'Carrie-Anne Moss'), ('birth_year', 1967)]),
 RealDictRow([('name', 'Laurence Fishburne'), ('birth_year', 1961)])]

In [10]:
op = Scan("demo.langauges", db=db, em=em, sv=sv, threshold=0.8).open()
op.fetch_one()

Exception: Table 'demo.actors' seams not to contain the information