# Evaluation


In [87]:
import json

import sklearn.cluster
from sklearn.cluster import DBSCAN

from db.criteria import *
from db.operators import *
from db.operators.Aggregate import *
from db.structure import *

from models.embedding import SentenceTransformerEmbeddingModel
from models.semantic_validation import LLaMAValidationModel
from models.text_generation.LLaMA import LLaMATextGenerationModel

# logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)

In [2]:
# Load Models
m = ModelMgr()
em = SentenceTransformerEmbeddingModel(m)
sv = LLaMAValidationModel(m, temperature=0.0001)
gm = LLaMATextGenerationModel(m)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## Load Data


In [72]:
def load_dummy_operators():
    with open("schema.json", "r") as schema:
        schema = json.load(schema)

    operators = {}
    for relation, data in schema.items():
        operators[relation] = Dummy(relation, [x["name"] for x in data["schema"]], data["data"])
    return operators

ops = load_dummy_operators()

# Evaluation

In [137]:
def evaluate(gt, result):
    tps, fns, fps = gt & result, gt - result, result - gt
    tp, fn, fp = len(tps), len(fns), len(fps)
    precision = round((tp / (tp + fp) if (tp + fp) > 0 else 0) * 100.0)
    recall = round((tp / (tp + fn) if (tp + fn) > 0 else 0) * 100.0)
    f1_score = round(((2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0))
    print("False Positives:", "\n".join(map(lambda x: f"\t{x}", fps)), "", sep="\n")
    print("False Negatives:", "\n".join(map(lambda x: f"\t{x}", fns)), "", sep="\n")
    return {"precision": precision, "recall": recall, "f1_score": f1_score}

## Test Case 1: Match chemical descriptions with thier common names

In [5]:
gt1 = {("H2O", "Water"), ("H2SO4", "Sulfuric Acid"), ("CH3OH", "Methanol"), ("HCl", "Hydrochloric Acid"), ("NH3", "Ammonia"), ("C2H5OH", "Ethanol"), ("C6H6", "Benzene"), ("Cl2", "Chlorine"), ("C3H6O", "Acetone"), ("NaOH", "Sodium Hydroxide"), ("C8H10N4O2", "Caffeine")}

In [6]:
op1, op2 = ops["chemicals"], ops["chemical_warnings"]
sv_template = "Is {scientific_name} the scientific name for {name}"
join_chemicals = InnerSoftJoin(op1, op2, Column("scientific_name"), Column("name"), em=em, sv=sv, threshold=0.3, use_semantic_validation=True, sv_template=sv_template)
result1 = {(x["scientific_name"], x["name"]) for x in join_chemicals}

In [7]:
evaluate(gt1, result1)

{'precision': 75, 'recall': 82, 'f1_score': 78}

## Test Case 2: Movies & Actors
### 2.1 Search for release disjunction

In [8]:
gt2_1 = {("Pirates of the Caribbean: Dead Man's Chest", ), ("Charlie and the Chocolate Factory", ), ("Inception", ), ("The Matrix", )}

In [9]:
crit1 = SoftEqual(Column("release"), Constant("2006"), em=em, threshold=0.5)
crit2 = SoftEqual(Column("release"), Constant("July"), em=em, threshold=0.5)
crit3 = SoftEqual(Column("release"), Constant("Before 2000"), em=em, threshold=0.5)
result2_2 = {(x["name"], ) for x in Select(ops["movies"], DisjunctiveCriteria([crit1, crit2, crit3]))}
evaluate(gt2_1, result2_2)

{'precision': 60, 'recall': 75, 'f1_score': 67}

In [10]:
result2_2 = {(x["name"], ) for x in Select(ops["movies"], SoftValidate("Is {release} in 2006 or is {release} in July or is {release} before 2000?", sv=sv, full_record=False))}
evaluate(gt2_1, result2_2)

{'precision': 50, 'recall': 25, 'f1_score': 33}

In [11]:
crit11 = SoftValidate("Is {release} in 2006?", sv=sv, full_record=False)
crit22 = SoftValidate("Is {release} in July?", sv=sv, full_record=False)
crit33 = SoftValidate("Is {release} before 2000?", sv=sv, full_record=False)
op = Select(ops["movies"], DisjunctiveCriteria([crit11, crit22, crit33]))

result2_2 = {(x["name"], ) for x in op}
evaluate(gt2_1, result2_2)

{'precision': 67, 'recall': 100, 'f1_score': 80}

### 2.2 Search for release conjunction

In [12]:
gt2_2 = {("Charlie and the Chocolate Factory", ), ("Inception", )}

In [13]:
crit1 = SoftEqual(Column("release"), Constant("July"), em=em, threshold=0.5)
crit2 = SoftEqual(Column("release"), Constant("After 2000"), em=em, threshold=0.5)
result2_2 = {(x["name"], ) for x in Select(ops["movies"], ConjunctiveCriteria([crit1, crit2]))}
evaluate(gt2_2, result2_2)

{'precision': 33, 'recall': 50, 'f1_score': 40}

In [14]:
result2_2 = {(x["name"], ) for x in Select(ops["movies"], SoftValidate("Is {release} in July and is {release} after 2000?", sv=sv, full_record=False))}
evaluate(gt2_2, result2_2)

{'precision': 40, 'recall': 100, 'f1_score': 57}

In [15]:
op = Select(ops["movies"], ConjunctiveCriteria([
    SoftValidate("Is {release} in July?", sv=sv, full_record=False),
    SoftValidate("Is {release} after 2000?", sv=sv, full_record=False),
]))

result2_2 = {(x["name"], ) for x in op}
evaluate(gt2_2, result2_2)

{'precision': 100, 'recall': 100, 'f1_score': 100}

### 2.3 Match german Titles

In [16]:
gt2_3 = {
    ('The Lord of the Rings: The Fellowship of the Ring', 'Der Herr der Ringe: Die Gefährten'),
    ("Pirates of the Caribbean: Dead Man's Chest", 'Pirates of the Caribbean – Fluch der Karibik 2'),
    ('The Lord of the Rings: The Return of the King', 'Der Herr der Ringe: Die Rückkehr des Königs'),
    ('Charlie and the Chocolate Factory', 'Charlie und die Schokoladenfabrik'),
    ('Inception', 'Inception'),
    ('The Matrix', 'Matrix')
}

In [17]:
result2_3 = {(x["movies.name"], x["movies_de.name"]) for x in InnerSoftJoin(ops["movies"], ops["movies_de"], None, None, em=em, threshold=0.6)}
evaluate(gt2_3, result2_3)

{'precision': 50, 'recall': 100, 'f1_score': 67}

In [18]:
result2_3 = {(x["movies.name"], x["movies_de.name"]) for x in InnerSoftJoin(ops["movies"], ops["movies_de"], Column("movies.name"), Column("movies_de.name"), em=em, threshold=0.4)}
evaluate(gt2_3, result2_3)

{'precision': 45, 'recall': 83, 'f1_score': 58}

### 2.4 Match artists

In [135]:
gt2_4 =  {
  ('Carrie-Anne Moss', 'Carrie-Anne Moss'),
  ('Elijah Wood', 'Elijah Jordan Wood'),
  ('Elijah Wood', 'Elijah Wood'),
  ('Elliot Page', 'Elliot Page'),
  ('Freddie Highmore', 'Alfred Highmore'),
  ('Ian McKellen', 'Sir Ian Murray McKellen'),
  ('Johnny Depp', 'John Christopher "Johnny" Depp II'),
  ('Johnny Depp', 'John Christopher Depp II'),
  ('Johnny Depp', 'The Mad Hatter in Alice in Wonderland Actor'),
  ('Joseph Gordon-Levitt', 'Joseph Gordon-Levitt'),
  ('Keanu Reeves', 'Keanu Charles Reeves'),
  ('Keira Knightley', 'Keira Christina Knightley'),
  ('Keira Knightley', 'Keira Knightley'),
  ('Laurence Fishburne', 'Laurence Fishburne'),
  ('Leonardo DiCaprio', 'Jack Dawson in Titanic'),
  ('Orlando Bloom', '@orlandobloom'),
  ('Orlando Bloom', 'Orlando Bloom'),
  ('Orlando Bloom', 'Orlando Jonathan Blanchard Copeland Bloom'),
  ('Viggo Mortensen', 'Viggo Mortensen')
}

In [143]:
ops = load_dummy_operators()

result2_4 = {(row["name"], row["actor/actress"]) for row in InnerSoftJoin(ops["actors"], ops["plays_in"], Column("name"), Column("actor/actress"), threshold=0.7, em=em)}

evaluate(gt2_4, result2_4)

False Positives:
	('Viggo Mortensen', 'Elijah Wood')
	('Leonardo DiCaprio', 'Elijah Wood')
	('Elijah Wood', 'Joseph Gordon-Levitt')
	('Joseph Gordon-Levitt', 'Keira Christina Knightley')
	('Laurence Fishburne', 'John Christopher Depp II')
	('Keira Knightley', 'Orlando Bloom')
	('Keira Knightley', 'Keanu Charles Reeves')
	('Keira Knightley', 'The Mad Hatter in Alice in Wonderland Actor')
	('Joseph Gordon-Levitt', 'Elijah Jordan Wood')
	('Carrie-Anne Moss', 'Elijah Wood')
	('Freddie Highmore', 'Keira Christina Knightley')
	('Freddie Highmore', 'Orlando Bloom')
	('Keira Knightley', 'Elijah Jordan Wood')
	('Freddie Highmore', 'Keanu Charles Reeves')
	('Elijah Wood', 'Jack Dawson in Titanic')
	('Freddie Highmore', 'Sean Bean')
	('Laurence Fishburne', 'The Mad Hatter in Alice in Wonderland Actor')
	('Freddie Highmore', 'The Mad Hatter in Alice in Wonderland Actor')
	('Elijah Wood', 'Keira Christina Knightley')
	('Keira Knightley', 'Laurence Fishburne')
	('Keira Knightley', 'John Christopher 

{'precision': 9, 'recall': 100, 'f1_score': 17}

# Experiments
## Soft Aggregation

In [20]:
x = SoftAggregateFaissKMeans(ops["movies"], ["name"], [StringAggregation("name", "movies")], em=em, num_clusters=5)
print([a for a in x])


Clustering 7 points in 768D to 5 clusters, redo 1 times, 20 iterations
  Preprocessing in 0.00 s
[{'name': 'The Matrix', 'movies': 'The Matrix'}, {'name': 'Inception', 'movies': 'Inception'}, {'name': 'Charlie and the Chocolate Factory', 'movies': 'Charlie and the Chocolate Factory'}, {'name': 'Pirates of the Caribbean: The Curse of the Black Pearl', 'movies': "Pirates of the Caribbean: The Curse of the Black Pearl, Pirates of the Caribbean: Dead Man's Chest"}, {'name': 'The Lord of the Rings: The Fellowship of the Ring', 'movies': 'The Lord of the Rings: The Fellowship of the Ring, The Lord of the Rings: The Return of the King'}]




In [22]:
x = SoftAggregateScikit(ops["movies"], ["name"], [StringAggregation("name", "movies")], em=em, cluster_class=sklearn.cluster.DBSCAN, cluster_params={"eps":3, "min_samples": 2})
print([a for a in x])

[{'name': 'Pirates of the Caribbean: The Curse of the Black Pearl', 'movies': "Pirates of the Caribbean: The Curse of the Black Pearl, The Lord of the Rings: The Fellowship of the Ring, Pirates of the Caribbean: Dead Man's Chest, The Lord of the Rings: The Return of the King, Charlie and the Chocolate Factory, Inception, The Matrix"}]


In [26]:
x = SoftAggregateScikit(ops["movies"], ["name"], [CountAggregation("name", "movies")], em=em, cluster_class=sklearn.cluster.SpectralClustering, cluster_params={"n_clusters": 5, "assign_labels" :'discretize', "random_state": 0})
print([a for a in x])

[{'name': 'Pirates of the Caribbean: The Curse of the Black Pearl', 'movies': 2}, {'name': 'The Lord of the Rings: The Fellowship of the Ring', 'movies': 2}, {'name': 'Charlie and the Chocolate Factory', 'movies': 1}, {'name': 'Inception', 'movies': 1}, {'name': 'The Matrix', 'movies': 1}]
