In [1]:
import scripts.utils.nlp_utils as nlp
import scripts.utils.grammar as gra
import scripts.utils.string_handling as string_hand
import scripts.utils.data_handler as data_hand

Loading spacy ...
Finished


# Load Test Data

In [58]:
#test_data = data_hand.read_test_data("textProcessing_testKaldi.csv")
test_data = data_hand.read_test_data("kaldi_test_data_real_v1.tsv")

# Read Reference Grammar and Diff Grammar

In [50]:
reference_grammar = gra.read_grammar_and_create_map('referenceGrammar.xml')
diff_grammar = gra.read_grammar_and_create_map('diff_rg_1.xml')
grammar = gra.merge_grammars(reference_grammar, diff_grammar)

In [51]:
string_hand.clear_sentence("no i will still have a ticket for billy elliot please")

'i will still have a ticket for billy elliot'

# Meaning Map

In [52]:
def is_slice_in_list(s,l):
    len_s = len(s) #so we don't recompute length of s on every iteration
    return any(s == l[i:len_s+i] for i in range(len(l) - len_s+1))

def is_slice_in_list2(s,l):
    len_s = len(s) #so we don't recompute length of s on every iteration
    for i in range(len(l) - len_s+1):
        if s==l[i:len_s+i]:
            return i
        
def extract_by_pattern(patterns, tags, words):
    extracted_words = []
    tags_string = " ".join(tags)
    for pattern in patterns:
        pattern_string = " ".join(pattern)
        if is_slice_in_list(pattern, tags):
            pattern_start_index = is_slice_in_list2(pattern, tags)
            if len(pattern) > 1:
                pattern_end_index = tags.index(pattern[-1], pattern_start_index+1)
                if pattern_end_index - pattern_start_index + 1 == len(pattern):
                        extracted_part = ""
                        for i in range(pattern_start_index, pattern_end_index+1):
                            extracted_part += words[i] + " "
                        extracted_words.append(extracted_part[:-1])
            else:
                extracted_words.append(words[pattern_start_index])
            """
            if len(pattern) > 1:
                pattern_end_index = tags.index(pattern[-1], pattern_start_index+1)
                if pattern_end_index - pattern_start_index +1 != len(pattern):
                    pattern_end_index = tags.index(pattern[-1], pattern_end_index+1)
                    
                if pattern_end_index - pattern_start_index + 1 == len(pattern):
                    extracted_part = ""
                    for i in range(pattern_start_index, pattern_end_index+1):
                        extracted_part += words[i] + " "
                        

                    extracted_words.append(extracted_part[:-1])
            else:
                extracted_words.append(words[pattern_start_index])
            """
    return extracted_words
    

def extract_key_dt_nouns(nlp_sent):
    tags = nlp_sent[2]
    words = nlp.spacy_words_to_string_array(nlp_sent[0])
    sentence = " ".join(words)
    #words = " ".join(nlp_sent[0])
    #[['DT', 'NN', 'NN']]
    prio_one_patterns = [['DT', 'NN', 'NN'], ['DT', 'NN', 'NNS'],
                         ['DT', 'NN']]

    extracted_words = extract_by_pattern(prio_one_patterns, tags, words)
    return extracted_words
    
def extract_key_nouns(nlp_sent):
    tags = nlp_sent[2]
    words = nlp.spacy_words_to_string_array(nlp_sent[0])
    sentence = " ".join(words)
    #words = " ".join(nlp_sent[0])
    #[['DT', 'NN', 'NN']]
    prio_one_patterns = [['PRP$', 'NN', 'NN'], ['PRP$', 'NN', 'NNS'], ['PRP$', 'NNS', 'NNS'],\
                         ['PRP$', 'NN'], ['PRP$', 'NNS'], ['NN', 'NNS'], ['NN', 'NN'], ['RB', 'NN'], ['JJ', 'NNS'],\
                         ['JJ', 'NN'], ['NN'], ['NNS']]
    
    # Only when no prio_one_pattern fits
    prio_sec_patterns = [['NN'], ['NNS']]
    extracted_words = extract_by_pattern(prio_one_patterns, tags, words)
    if len(extracted_words) < 1:
        extracted_words = extract_by_pattern(prio_sec_patterns, tags, words)
    return extracted_words


def generate_dt_nouns_by_key_nouns(nlp_nouns):
    dts = ['a', 'the']
    patterns = [['NN', 'NN'], ['NN', 'NNS'], ['NN']]
    tags = nlp_nouns[2]
    words = nlp.spacy_words_to_string_array(nlp_nouns[0])
    
    generated_words = []
    for pattern in patterns:
        if pattern == tags:
            for dt in dts:
                sentence = dt + " " +" ".join(words)
                generated_words.append(sentence)
    return generated_words

def generalise_aux_verb(nlp_sent):
    tags = nlp_sent[2]
    words = nlp.spacy_words_to_string_array(nlp_sent[0])
    sentence = " ".join(words)
    patterns = [['PRP', 'MD', 'VB', 'TO'], ['PRP', 'MD', 'VB', 'DT'], ['PRP', 'VBP', 'TO']]

    extracted_words = extract_by_pattern(patterns, tags, words)
    return extracted_words 
    
def create_meaning_map(grammar):
    prompt_noun_map = {}
    for prompt in grammar:
        try:
            extracted_nouns = []
            for response in grammar[prompt]:
                nlp_s = nlp.nlp_sentence(response)
                nouns = extract_key_nouns(nlp_s)
                if len(nouns) > 0:
                    extracted_nouns.extend(nouns)
            prompt_noun_map[prompt] = list(set(extracted_nouns))
        except:
            print(prompt)
    return prompt_noun_map

## Extract Aux verbs from total reference grammar

In [75]:
found_aux_verb = []
for key in grammar:
    for item in grammar[key]:
        nlp_s = nlp.nlp_sentence(item)
        aux_verb = generalise_aux_verb(nlp_s)
        if len(aux_verb) > 0:
            found_aux_verb.extend(aux_verb)
print(len(set(found_aux_verb)))

35


In [76]:
list(set(found_aux_verb))

['i will buy these',
 'i will have these',
 "i 'd need a",
 "i 'd like the",
 'i would like a',
 "i 'd like some",
 'i wish to',
 'i should like a',
 'i will take a',
 'i should like to',
 'i will have the',
 'i need to',
 'i would like the',
 'i would like an',
 "i 'd want a",
 'i would want a',
 "i 'd like a",
 'i would need a',
 'i can buy some',
 'i have to',
 'i should like an',
 'i can buy a',
 'i will have a',
 'i will take the',
 'i would like some',
 'i will have an',
 'i would like to',
 'i will have some',
 'i want to',
 'i will take some',
 'i should like some',
 'i will take an',
 'i would need to',
 "i 'd like to",
 "i 'd like an"]

In [60]:
generalise_aux_verb(nlp.nlp_sentence("I wish to pay by card"))

['I wish to pay']

In [6]:
prompt_noun_map = create_meaning_map(grammar)

# Apply Reference Grammar And Preprocessing And Unqiue

In [83]:
false_counter = 0
correct_counter = 0

false_prompts = {}
safe_prompts = []
for prompt_unit in test_data:
    
    for dict_prompt in test_data[prompt_unit]: 
        transcript = dict_prompt['transcript']
        sentence = transcript
        if "***" in sentence:
            sentence = sentence.replace("***", "")
        
        processed = sentence
        
        
        try:
            processed = string_hand.clear_sentence(transcript)
        except:
            print(dict_prompt)


        unique_sentence = string_hand.get_unique_sentence(sentence)
        if dict_prompt['id'] == '3796':
            print(transcript)
            print(unique_sentence)

        if sentence not in grammar[prompt_unit] and  \
            processed not in grammar[prompt_unit] and \
            unique_sentence not in grammar[prompt_unit]:
                
            item = {"id": dict_prompt['id'], "prompt": str(prompt_unit),"transcript": transcript, "processed": processed, "unique": unique_sentence}
            false_counter += 1
            if prompt_unit in false_prompts:
                false_prompts[prompt_unit].append(item)
            else:
                arr = []
                arr.append(item)
                false_prompts[prompt_unit] = arr
            #false_prompts[dict_prompt['id']] = 
                #writer.write(prompt_unit + "\t" + sentence['transcript'] + "\n")
        else:
            item = {"id": dict_prompt['id'], "prompt": str(prompt_unit), "transcript": transcript, "processed": processed, "unique": unique_sentence, "method": "RG", "language": True, "meaning": True}
            safe_prompts.append(item)
print("Correct: %s" % str(len(safe_prompts)))
print("False: %s" % str(false_counter))

i would like to pay with master with master card
i would like to pay with master card
Correct: 577
False: 419


In [84]:
safe_prompts

[{'id': '3883',
  'language': True,
  'meaning': True,
  'method': 'RG',
  'processed': 'i would like the bill',
  'prompt': 'Frag: Ich möchte die Rechnung',
  'transcript': 'i would like the bill',
  'unique': 'i would like the bill'},
 {'id': '4339',
  'language': True,
  'meaning': True,
  'method': 'RG',
  'processed': 'i would like the check',
  'prompt': 'Frag: Ich möchte die Rechnung',
  'transcript': 'i would like the check',
  'unique': 'i would like the check'},
 {'id': '4581',
  'language': True,
  'meaning': True,
  'method': 'RG',
  'processed': 'can i have the bill',
  'prompt': 'Frag: Ich möchte die Rechnung',
  'transcript': 'can i have the bill',
  'unique': 'can i have the bill'},
 {'id': '4604',
  'language': True,
  'meaning': True,
  'method': 'RG',
  'processed': 'can i have the bill',
  'prompt': 'Frag: Ich möchte die Rechnung',
  'transcript': 'can i have the bill',
  'unique': 'can i have the bill'},
 {'id': '4619',
  'language': True,
  'meaning': True,
  'met

# Cluster Approach

In [85]:
def count_false_items():
    counter = 0
    for key in false_prompts:
        counter += len(false_prompts[key])
    return counter

In [86]:
count_false_items()

419

In [87]:
print(len(safe_prompts))

577


In [88]:
false_prompts['Sag: Ich möchte mit Dollars bezahlen'][0]

{'id': '3599',
 'processed': 'can i buy by dollars',
 'prompt': 'Sag: Ich möchte mit Dollars bezahlen',
 'transcript': 'can i buy by dollars',
 'unique': 'can i buy by dollars'}

## Credit Card Cluster

In [41]:
import scripts.credit_card as credit_card

In [107]:
def meaning_is_correct(prompt_unit, transcript, clear_transcript, unique_sentence):
    nlp_transcript = nlp.nlp_sentence(transcript)
    nlp_clear = nlp.nlp_sentence(transcript)
    nlp_unqiue = nlp.nlp_sentence(unique_sentence)
    extracted_nouns_t = extract_key_nouns(nlp_transcript)
    extracted_nouns_c = extract_key_nouns(nlp_clear)
    extracted_nouns_u = extract_key_nouns(nlp_unqiue)
    
    for noun in extracted_nouns_t:
        try:
            if noun in prompt_noun_map[prompt_unit]:
                return True
        except:
            print(prompt_unit)
            print(prompt_noun_map[prompt_unit])
            print(noun)
    for noun in extracted_nouns_c:
        if noun in prompt_noun_map[prompt_unit]:
            return True
    for noun in extracted_nouns_u:
        if noun in prompt_noun_map[prompt_unit]:
            return True
    return False
        #remove_from_false_prompts(false_prompts, key, item['id'], method="magic", meaning="correct", language="correct")
    

meaning_is_correct("Sag: Ich möchte mit Mastercard bezahlen", "i want pay with the master card", "i want pay with the master card", "i want pay with the master card")
#prompt_noun_map["Sag: Ich möchte mit Kreditkarte bezahlen"]

True

In [93]:
test_prompts = ["Sag: Ich möchte mit Dollars bezahlen",
"Sag: Ich möchte mit Euros bezahlen",
"Sag: Ich möchte mit Kreditkarte bezahlen",
"Sag: Ich möchte mit Mastercard bezahlen",
"Sag: Ich möchte mit Postkarte bezahlen",
"Sag: Ich möchte mit Visa bezahlen",
"Sag: Ich möchte mit Pfund bezahlen",
"Sag: Ich möchte mit Schweizer Franken bezahlen"]

false_credit_card_prompts = data_hand.get_test_data_by_false_prompts(test_prompts, false_prompts, grammar)

In [103]:
def remove_from_false_prompts(false_prompts, key, id_, method="credit card cluster", meaning=False, language=False):
    if key in false_prompts:
        items = false_prompts[key]
        for index in range(0, len(items)):
            if items[index]['id'] == id_:
                item = {"id": id_, "prompt": str(key), "transcript": items[index]['transcript'], "processed": items[index]["processed"], "method": method, "language": str(language), "meaning": str(meaning), "unique": items[index]["unique"]}
                safe_prompts.append(item)
                #del false_prompts[key][index]
                return item 

In [108]:
false_counter = 0
correct_counter = 0

print(len(false_prompts))

correct_items = []
for prompt_unit in test_prompts:
    for item in false_credit_card_prompts[prompt_unit]:
        meaning = False
        language = False
        try:
            processed =item["processed"]
            unique_sentence = item["unique"]
            transcript = item["transcript"]
        except:
            print(unique_sentence)
            continue
        if credit_card.accept_credit_card(transcript) == False and \
            credit_card.accept_credit_card(processed) == False and \
            credit_card.accept_credit_card(unique_sentence) == False:
            
            false_counter += 1
            #remove_from_false_prompts(false_prompts, key, item['id'])
        else:
            #
            
            language = True
        
        if meaning_is_correct(prompt_unit, transcript, clear_item, unique_sentence):
            meaning = True
            
        
        item = remove_from_false_prompts(false_prompts, prompt_unit, item['id'], meaning=meaning, language=language)
        if item:
            #print(item)
            if item['meaning'] == 'True' and item['language'] == 'True':
                correct_counter += 1
            correct_items.append(item)
            
print("Correct: %s" % str(correct_counter))
print("False: %s" % str(false_counter))
print(len(safe_prompts))

162
Correct: 5
False: 44
852


In [109]:
for item in correct_items:
    if item['meaning'] == str(True) and item['language']== str(True):
        print(item['id'] + ";accepted;"  + item['language'] + ";" + item['meaning'])
    else:
        print(item['id'] + ";rejected;"  + item['language'] + ";" + item['meaning'])

3599;rejected;False;True
4147;rejected;False;True
4010;rejected;False;True
4534;rejected;False;False
3728;rejected;False;False
3729;rejected;False;False
4034;accepted;True;True
4127;accepted;True;True
4128;rejected;False;True
4134;rejected;False;True
4136;rejected;False;True
4211;rejected;False;True
4212;rejected;False;True
4340;rejected;False;True
4373;rejected;False;True
4384;rejected;False;True
4520;rejected;False;True
4582;rejected;False;True
4605;rejected;False;True
4620;rejected;False;True
4672;rejected;False;True
3597;rejected;False;True
3891;rejected;True;False
3892;rejected;False;False
4058;rejected;False;False
4296;accepted;True;True
4311;rejected;False;True
4312;rejected;False;False
4681;rejected;False;True
3639;rejected;False;True
3640;rejected;False;True
3641;rejected;True;False
3716;rejected;False;False
3738;rejected;True;False
3801;rejected;False;True
4165;rejected;False;True
4222;rejected;False;False
4527;rejected;False;True
3592;rejected;False;True
4146;accepted;True;T

In [51]:
nouns = []
dt_nouns = []
for prompt in test_prompts:
    for response in grammar[prompt]:
        string_it =" ".join(extract_key_nouns(nlp.nlp_sentence(response)))
        if "ma " in string_it:
            print(response)
            print(prompt)
        else:
            nouns.extend(extract_key_nouns(nlp.nlp_sentence(response)))   
            dt_nouns.extend(extract_key_dt_nouns(nlp.nlp_sentence(response)))   

generated_nouns = []
for noun in list(set(nouns)):
    gen = generate_dt_nouns_by_key_nouns(nlp.nlp_sentence(noun))
    if len(gen) > 0:
        generated_nouns.extend(gen)

In [53]:
list(set(generated_nouns))

['the credit card',
 'the card',
 'the post card',
 'a master card',
 'a visa',
 'the credit',
 'a post',
 'a master',
 'a credit card',
 'the credit cards',
 'the postcard',
 'a post card',
 'the master',
 'the master master',
 'a mastercard',
 'the mastercard',
 'a card',
 'a master master',
 'the visa card',
 'the visa',
 'a visa card',
 'a credit cards',
 'a credit',
 'a postcard',
 'the post',
 'the master card']

In [110]:
for index in range(0, len(correct_items)):
    if correct_items[index]['id'] == '3796':
        print(correct_items[index])

{'processed': 'i would like to pay with master with master card', 'meaning': 'False', 'prompt': 'Sag: Ich möchte mit Schweizer Franken bezahlen', 'transcript': 'i would like to pay with master with master card', 'unique': 'i would like to pay with master card', 'language': 'True', 'id': '3796', 'method': 'credit card cluster'}


In [56]:
nlp.nlp_sentence("i wish to pay with visa")

([i, wish, to, pay, with, visa],
 ['i', 'wish', 'to', 'pay', 'with', 'visa'],
 ['PRP', 'VBP', 'TO', 'VB', 'IN', 'NN'],
 ['PRP', 'VBP', 'TO', 'VB', 'IN', 'NN'])

In [28]:
safe_prompts[-1]

{'id': '4529',
 'language': 'False',
 'meaning': 'True',
 'method': 'credit card cluster',
 'processed': 'i like buy with swiss francs',
 'prompt': 'Sag: Ich möchte mit Schweizer Franken bezahlen',
 'transcript': 'i like buy with swiss francs'}

## Restarant Cluster

In [277]:
import scripts.restarant as resta

In [316]:
test_prompts = ["Frag: Ich möchte die Rechnung", "Frag: Ich möchte die Dessertkarte"]
false_restarant_prompts = data_hand.get_test_data_by_false_prompts(test_prompts, false_prompts, grammar)

In [317]:
false_counter = 0
correct_counter = 0

print(len(false_prompts))
for key in test_prompts:
    for item in false_restarant_prompts[key]:
        try:
            clear_item = string_hand.clear_sentence(item["processed"])
            transcript =string_hand.clear_sentence(item["transcript"])
        except:
            continue
        if resta.accept_restarant(clear_item) == False and resta.accept_restarant(transcript) == False:
            false_counter += 1  
            remove_from_false_prompts(false_prompts, key, item['id'], method="Restarant Cluster")
        else:
            remove_from_false_prompts(false_prompts, key, item['id'], method="Restarant Cluster", meaning="correct", language="correct")
            correct_counter += 1
print("Correct: %s" % str(correct_counter))
print("False: %s" % str(false_counter))
print(len(safe_prompts))

164
Correct: 0
False: 13
641


In [318]:
false_restarant_prompts = data_hand.get_test_data_by_false_prompts(test_prompts, false_prompts, grammar)

In [319]:
count_false_items()

355

In [282]:
safe_prompts

[{'id': '3700',
  'language': 'correct',
  'meaning': 'correct',
  'method': 'RG',
  'processed': 'i want a ticket to london',
  'prompt': 'Frag: ein Ticket nach London',
  'transcript': 'i want a ticket to london'},
 {'id': '3743',
  'language': 'correct',
  'meaning': 'correct',
  'method': 'RG',
  'processed': 'i want a ticket to london',
  'prompt': 'Frag: ein Ticket nach London',
  'transcript': 'i want a ticket to london'},
 {'id': '4398',
  'language': 'correct',
  'meaning': 'correct',
  'method': 'RG',
  'processed': 'i want one ticket to london',
  'prompt': 'Frag: ein Ticket nach London',
  'transcript': 'i want one ticket to london'},
 {'id': '3995',
  'language': 'correct',
  'meaning': 'correct',
  'method': 'RG',
  'processed': 'i would like to sit in the fifth row',
  'prompt': 'Sag: Ich möchte in der fünften Reihe sitzen',
  'transcript': 'i would like to sit in the fifth row'},
 {'id': '4420',
  'language': 'correct',
  'meaning': 'correct',
  'method': 'RG',
  'proce

# Magic

In [175]:
import copy

In [176]:
def insert(tree, key, value):
    #print(key)
    if key:
        first, rest = key[0], key[1:]
        if first not in tree:
            tree[first] = {}
        insert(tree[first], rest, value)
    else:
        tree['key'] = True

In [177]:
tree = {}
for prompt_unit in grammar:
    
    for response in grammar[prompt_unit]:
        tags = nlp.nlp_sentence(response)[2]
        insert(tree, tags, "true")

In [23]:
def existintree(tree, array, rest_tree):
    if len(array) == 0:
        return False
    if array[0] not in rest_tree:
        return False
    if array[0] in rest_tree:
        if 'key' in rest_tree[array[0]] and len(array) == 1:
            return True
    #print(tree[array[0]])
    
    return existintree(tree, array[1:], rest_tree[array[0]])

In [302]:
def sing_plu(nlp_sent):
    words = nlp_sent[0]
    tags = nlp_sent[2]
    if 'CD' in tags:
        index_cd = tags.index('CD')
        if str(words[index_cd]) == 'one':
            if 'NNS' not in tags:
                return True
            else:
                return False
        else:
            if len(words) > index_cd + 1 and str(tags[index_cd+1]) == 'NNS':
                return True
            
            elif 'NN' not in tags:
                return True
            else:
                return False 
    return True

def iter_items(prompts, prompt_unit):
    magic_accepted_prompts = []
    counter = 0
    for prompt in prompts:
        processed = prompt["processed"]
        transcript = prompt["transcript"]
        id_ = prompt["id"]
        nlp_processed = nlp.nlp_sentence(processed)
        nlp_processed_tags = nlp_processed[2]
        nlp_transcript = nlp.nlp_sentence(transcript)
        nlp_transcript_tags = nlp_transcript[2]
        if existintree(tree, nlp_processed_tags, copy.deepcopy(tree)) == True or \
        existintree(tree, nlp_transcript_tags, copy.deepcopy(tree)) == True:
            if sing_plu(nlp_processed) == True or sing_plu(nlp_transcript) == True:
                item = {"id": id_ , "transcript": transcript, "processed": processed}
                
                magic_accepted_prompts.append(item)
                extracted_nouns = extract_key_nouns(nlp_processed)
                for noun in extracted_nouns:
                    if noun in prompt_noun_map[prompt_unit]:
                        print(item)
                        counter += 1
                        remove_from_false_prompts(false_prompts, key, item['id'], method="magic", meaning="correct", language="correct")
                        
                
    return magic_accepted_prompts, counter

In [320]:
correct_counter = 0
magic_accepted_prompts_map = {}
for key in false_prompts:
    accepted_prompts, counter = iter_items(false_prompts[key], key)
    magic_accepted_prompts_map[key] = accepted_prompts
    correct_counter += counter
print(correct_counter)

{'transcript': 'where is the london eye room', 'processed': 'where is the london eye room', 'id': '4628'}
{'transcript': 'i want a ticket for mamma mia', 'processed': 'i want a ticket for mamma mia', 'id': '3838'}
{'transcript': 'i want a ticket for mamma mia', 'processed': 'i want a ticket for mamma mia', 'id': '3838'}
{'transcript': 'i need a ticket for thursday night', 'processed': 'i need a ticket for thursday night', 'id': '3828'}
{'transcript': 'i need a ticket for thursday night', 'processed': 'i need a ticket for thursday night', 'id': '3828'}
{'transcript': 'room for seven nights', 'processed': 'room for seven nights', 'id': '4132'}
{'transcript': 'room for seven nights', 'processed': 'room for seven nights', 'id': '4132'}
{'transcript': 'the capital of france is madrid', 'processed': 'the capital of france is madrid', 'id': '4493'}
{'transcript': 'where is the big ben', 'processed': 'where is the big ben', 'id': '3864'}
{'transcript': 'where is the big ben', 'processed': 'whe

In [238]:
print(len(safe_prompts))
safe_prompts

515


[{'id': '3700',
  'language': 'correct',
  'meaning': 'correct',
  'method': 'RG',
  'processed': 'i want a ticket to london',
  'prompt': 'Frag: ein Ticket nach London',
  'transcript': 'i want a ticket to london'},
 {'id': '3743',
  'language': 'correct',
  'meaning': 'correct',
  'method': 'RG',
  'processed': 'i want a ticket to london',
  'prompt': 'Frag: ein Ticket nach London',
  'transcript': 'i want a ticket to london'},
 {'id': '4398',
  'language': 'correct',
  'meaning': 'correct',
  'method': 'RG',
  'processed': 'i want one ticket to london',
  'prompt': 'Frag: ein Ticket nach London',
  'transcript': 'i want one ticket to london'},
 {'id': '3995',
  'language': 'correct',
  'meaning': 'correct',
  'method': 'RG',
  'processed': 'i would like to sit in the fifth row',
  'prompt': 'Sag: Ich möchte in der fünften Reihe sitzen',
  'transcript': 'i would like to sit in the fifth row'},
 {'id': '4420',
  'language': 'correct',
  'meaning': 'correct',
  'method': 'RG',
  'proce

In [321]:
with open("annotated_kaldi_data_v1.csv", "w") as writer:
    for item in safe_prompts:
        writer.write(item['id'] + "\t" + item['method'] + "\t" + item["prompt"] + "\t" +item['processed'] + "\t" +item['language']+ "\t" + item['meaning'] + "\n" )
    for key in false_prompts:
        for item in false_prompts[key]:
            writer.write(item['id'] + "\t" + "no method" + "\t"+ key + "\t" + item['processed'] + "\t" +'incorrect'+ "\t" + 'incorrect' + "\n" )

In [127]:
prompt_noun_map['Frag: 1 Musical-Ticket']

['ticket']

In [126]:
t_prompt = magic_accepted_prompts_map['Frag: 1 Musical-Ticket'][0]["transcript"]
print(t_prompt)
nlp_t_prompt = nlp.nlp_sentence(t_prompt)
print(nlp_t_prompt)
extract_key_nouns(nlp_t_prompt)

a ticket for wednesday evening
([a, ticket, for, wednesday, evening], ['a', 'ticket', 'for', 'wednesday', 'evening'], ['DT', 'NN', 'IN', 'NN', 'NN'], ['DT', 'NN', 'IN', 'NN', 'NN'])


['wednesday evening', 'ticket']

In [85]:
print(false_prompts['Frag: 1 Musical-Ticket'])
iter_items(false_prompts['Frag: 1 Musical-Ticket'])

[{'transcript': 'i need one musical tickets', 'processed': 'i need one musical tickets', 'id': '3835'}, {'transcript': 'can i have one musical tickets', 'processed': 'can i have one musical tickets', 'id': '3898'}, {'transcript': 'a ticket for wednesday evening', 'processed': 'a ticket for wednesday evening', 'id': '3899'}, {'transcript': 'can i have one musical tickets', 'processed': 'can i have one musical tickets', 'id': '4415'}, {'transcript': 'can i have one musical tickets', 'processed': 'can i have one musical tickets', 'id': '4548'}]
{'transcript': 'i need one musical tickets', 'processed': 'i need one musical tickets', 'id': '3835'}
i need one musical tickets
i need one musical tickets
{'transcript': 'can i have one musical tickets', 'processed': 'can i have one musical tickets', 'id': '3898'}
can i have one musical tickets
can i have one musical tickets
{'transcript': 'a ticket for wednesday evening', 'processed': 'a ticket for wednesday evening', 'id': '3899'}
a ticket for w

([{'id': '3899',
   'processed': 'a ticket for wednesday evening',
   'transcript': 'a ticket for wednesday evening'}],
 1)