# Entity Abstractor and Recombination Step

## Load table

In [3]:
import pandas as pd

In [4]:
csv_file = "/home/pandavid/uni/BA/time_slots.csv"
table_data = pd.read_csv(csv_file, sep=",", dtype="string").fillna("-")
table_data

Unnamed: 0,Date,Slot,Name,Surname,Email,Phone,Private Notes
0,01.04.22,1000-1015,Juergen,Bieber,JurgenBieber@rhyta.com,6138848100,first meeting
1,01.04.22,1015-1030,Ralph,Eberhardt,RalphEberhardt@jourrapide.com,2622449580,wants to know about programming basics
2,01.04.22,1045-1100,Dennis,Braun,DennisBraun@teleworm.us,6020797114,interesting project advice
3,01.04.22,1100-1145,Julia,Farber,JuliaFarber@teleworm.us,5323961496,kick off
4,01.04.22,1200-1215,Sophie,Boehm,SophieBoehm@teleworm.us,3940528208,regular meet
5,04.04.22,0900-0915,Maria,Schmitz,MariaSchmitz@armyspy.com,7252323962,questions about exams
6,04.04.22,1330-1400,Steffen,Schulze,SteffenSchulze@jourrapide.com,9383460916,-
7,04.04.22,1415-1430,Katharina,Nacht,KatharinaNacht@jourrapide.com,7255610166,regular meet
8,05.04.22,1600-1615,Robert,Pfeiffer,RobertPfeiffer@armyspy.com,9519884353,questions about project
9,05.04.22,1615-1700,Laura,Freytag,LauraFreytag@rhyta.com,3417252072,kick off


## Internal tabular representation

In [5]:
from src.datamodel.Table import Table

In [6]:
table = Table(table_data, "time_slots", "meeting planning")

## Use Entity Abstractor

In [7]:
from src.entity_abstractor.Abstractor import Abstractor

In [8]:
abstractor = Abstractor()
string = "Select the slot, surname and private notes."

In [9]:
abstractor_out = abstractor.extract_sentence_instances_from(string)

In [10]:
from src.datamodel.Sentence import Sentence
from typing import Tuple, List


def print_abstractor_output_sentence_data(abstractor_output: Tuple[Sentence, List[Sentence]], table: Table = None):
    sentence, subsentences = abstractor_output
    if len(subsentences) == 0:
        print_sentence_data(sentence, table)
    else:
        [print_sentence_data(subsentence, table) for subsentence in subsentences]

def print_sentence_data(sentence: Sentence, table: Table):
    print(sentence.dependency_tree.as_conll(), "\n")
    print("Lifted sentence: ", sentence.lifted(table))
    print("Lifted column names: ", sentence.get_lifted_column_names(table))
    print("Lifted table names: ", sentence.get_lifted_table_names(table))
    print("Lifted cases: ", sentence.case_lifted(table))
    print("Lifted column names in conditions: ", sentence.get_case_lifted_column_names(table))
    print("Lifted values in conditions: ", sentence.get_lifted_values(), "\n")

In [11]:
print_abstractor_output_sentence_data(abstractor_out, table)
print(abstractor_out[0].get_input_dict(table))

Select VB 0 ROOT LiftableDependencyTreeRootNode
the DT 3 det LiftableStopwordDependencyTreeNode
slot NN 1 obj LiftableObjectDependencyTreeNode
, , 3 punct LiftableStopwordDependencyTreeNode
surname NN 3 conj LiftableObjectDependencyTreeNode
and CC 8 cc LiftableStopwordDependencyTreeNode
private JJ 8 amod LiftableValueDependencyTreeNode
notes NNS 3 conj LiftableObjectDependencyTreeNode
. . 1 punct LiftableStopwordDependencyTreeNode 

Lifted sentence:  Select [,column] notes
Lifted column names:  [['slot', 'surname']]
Lifted table names:  [['time_slots']]
Lifted cases:  None
Lifted column names in conditions:  []
Lifted values in conditions:  [] 

{'column': [['slot', 'surname']], 'table': [['time_slots']]}


## Recombine with lifted program

In [12]:
from src.combination.FunctionTemplate import FunctionTemplate

In [13]:
lifted_program = "CREATE([table], [,column])"
lifted_condition = ""
print(FunctionTemplate.ground_lifted_program(
    lifted_program, abstractor_out[0].get_input_dict(table), lifted_condition
))

CREATE(['time_slots'], ['slot', 'surname'])


# Entity Abstraction and Recombination using the data set

In [14]:
from src.entity_abstractor.MockAbstractor import MockAbstractor

In [19]:
mock_abstractor_out = MockAbstractor.abstract_utterance("Count all rows where date is later than today.")
print(mock_abstractor_out)

[('Count rows [table] [condition]', {'table': [['time_slots']], 'condition': [{'column': 'date', 'value': 'today'}]}, '[column] later [value]')]


In [20]:
print(
    FunctionTemplate.ground_lifted_program("SELECT([table], [condition]); COUNT([table])", mock_abstractor_out[0][1], "[column] <= [value]")
)

SELECT(['time_slots'], date <= today); COUNT(['time_slots'])
