# Lielo valodas modeļu izmantošana CQL (Corpus Query Language) vaicājumu ģenerēšanai marķētos teksta korpusos

## Testa kopas izveidošana
### CQL vaicājumu ģenerēšana ar skripta failu
Ar skriptu uzģenerēt dažādu tipu CQL vaicājumus.

In [21]:
import random
import json
# import csv

LEMMAS = {
    'noun': [
        'māja', 'darbs', 'valsts', 'cilvēks', 'gads', 'diena', 'sirds', 'jūra', 'mežs', 'piens',
        'brīvība', 'mieži', 'brilles', 'krievi', 'eksāmens', 'valsts', 'laiks', 'bēdas', 'gads',
        'jautājums', 'darbs', 'bērns', 'veids', 'Latvija', 'Jānis', 'Saeima', 'amats', 'biedrība',
        'gals', 'novembris', 'pētījums', 'nauda', 'koks', 'kolēģis', 'saruna', 'dānija', 'spilgts',
        'laboratorija', 'garantija', 'jēga', 'slepkavība', 'tulkotāja', 'rabīns', 'trusis', 'lūsis',
        'bojāgājušais', 'dienvidaustrumi', 'čuguns', 'maršs', 'reformācija', 'daļa', 'projekts',
        
    ],
    'verb': [
        'būt', 'iet', 'darīt', 'redzēt', 'zināt', 'nākt', 'sākt', 'dzīvot', 'mīlēt', 'rakstīt', 'noteikt', 
        'ņemt', 'pieņemt', 'sākt', 'gūt', 'iegūt', 'atbalstīt', 'saņemt', 'izmantot', 'veikt', 'precizēt',
        'nodrošināt', 'atrasties', 'piederēt'
    ],
    'adj':  [
        'liels', 'mazs', 'jauns', 'balts', 'melns', 'skaists', 'augsts', 'labs', 'slikts', 'garš',
        'nepieciešams', 'svarīgs', 'vecs', 'auksts', 'karsts', 'daļējs', 'vasarīgs'
    ],
    'adv':  [
        'ātri', 'lēni', 'šodien', 'rīt', 'tagad', 'tur', 'kur', 'kāpēc', 'ļoti', 'pārāk', 'bieži', 
        'vienkārši'
    ]
}

TAG_MAP = [
    {"cql": "n.*", "nl": "noun", "lv": "lietvārds"},
    {"cql": "n.m.*", "nl": "masculine noun", "lv": "vīriešu dzimtes lietvārds"},
    {"cql": "n.f.*", "nl": "feminine noun", "lv": "sieviešu dzimtes lietvārds"},
    {"cql": "v.*", "nl": "verb", "lv": "darbības vārds"},
    {"cql": "v..i.*", "nl": "indicative verb", "lv": "īstenības izteiksmes darbības vārds"},
    {"cql": "a.*", "nl": "adjective", "lv": "īpašības vārds"},
    {"cql": "r.*", "nl": "adverb", "lv": "apstākļa vārds"},
    {"cql": "c.*", "nl": "conjunction", "lv": "saiklis"},
    {"cql": "p.*", "nl": "pronoun", "lv": "vietniekvārds"},
    {"cql": "m.*", "nl": "numeral", "lv": "skaitļa vārds"}
]

PARTS = {
    'suffix': ['nieks', 'īgs', 'ot', 'oties', 'ams', 'āms', 'usi', 'eklis', 'dams', 'damies' ],
    'prefix': ['uz', 'sa', 'pret', 'pie', 'pēc', 'pār', 'pa', 'no', 'ne', 'iz', 'ie', 'caur', 'bez', 'at', 'ap', 'aiz'],
    'substring': ['simt', 'smit', 'māt', 'tēv', 'šan', 'pil']
}

# STRUCTURES = ["s", "p", "doc", "text"] 

templates = [
   {
        "type": "simple",
        "pattern": '[lemma="{lemma}"]',
        "nl_en": "Find all occurrences of the lemma '{lemma}'.",
        "nl_lv": "Atrast visus lemmas '{lemma}' gadījumus."
    },
    {
        "type": "simple",
        "pattern": '[word="{word}"]',
        "nl_en": "Search for the exact word form '{word}'.",
        "nl_lv": "Meklēt precīzu vārdformu '{word}'."
    },
    
    {
        "type": "simple",
        "pattern": '[tag="{tag}"]',
        "nl_en": "Find any {tag_desc}.",
        "nl_lv": "Atrast jebkuru {tag_desc_lv}."
    },
    {
        "type": "simple",
        "pattern": '[lemma="{lemma}" & tag="{tag}"]',
        "nl_en": "Find the lemma '{lemma}' specifically used as a {tag_desc}.",
        "nl_lv": "Atrast lemmu '{lemma}', kas lietota kā {tag_desc_lv}."
    },
    {
        "type": "sequence",
        "pattern": '[lemma="{lemma}"] [lemma="{lemma1}"]',
        "nl_en": "Find '{lemma}' followed immediately by '{lemma1}'.",
        "nl_lv": "Atrast '{lemma}', kam tieši seko '{lemma1}'."
    },
    {
        "type": "sequence",
        "pattern": '[tag="{tag1}"] [tag="{tag2}"]',
        "nl_en": "Find a {tag_desc1} followed immediately by a {tag_desc2}.",
        "nl_lv": "Atrast {tag_desc1_lv}, kam seko {tag_desc2_lv}."
    },
    {
        "type": "sequence",
        "pattern": '[tag="{tag1}"] [tag="{tag2}"] [tag="{tag3}"]',
        "nl_en": "Find a {tag_desc1} followed by a {tag_desc2} and then a {tag_desc3}.",
        "nl_lv": "Atrast {tag_desc1_lv}, kam seko {tag_desc2_lv} un tad {tag_desc3_lv}."
    },
    {
        "type": "sequence",
        "pattern": '[lemma="{adj}"] [lemma="{noun}"]',
        "nl_en": "Find the adjective '{adj}' followed immediately by the noun '{noun}'.",
        "nl_lv": "Atrast īpašības vārdu '{adj}', kam tieši seko lietvārds '{noun}'."
    },
    {
        "type": "sequence",
        "pattern": '[lemma="{adv}"] [lemma="{noun}"]',
        "nl_en": "Find the adverb '{adv}' followed immediately by the noun '{noun}'.",
        "nl_lv": "Atrast apstākļa vārdu '{adv}', kam tieši seko lietvārds '{noun}'."
    },
    {
        "type": "sequence",
        "pattern": '[tag="a.*"] [lemma="{noun}"]',
        "nl_en": "Find any adjective followed by the lemma '{noun}'.",
        "nl_lv": "Atrast jebkuru īpašības vārdu, kam seko lemma '{noun}'."
    },
    {
        "type": "sequence",
        "pattern": '[lemma="{verb}"] []{{1,3}} [lemma="{noun}"]',
        "nl_en": "Find the verb '{verb}' followed by the noun '{noun}' with 1 to 3 words in between.",
        "nl_lv": "Atrast darbības vārdu '{verb}', kam pēc 1 līdz 3 vārdiem seko lietvārds '{noun}'."
    },
    {
        "type": "sequence",
        "pattern": '[tag="v.*"] []{{0,2}} [tag="n.*"]',
        "nl_en": "Find any verb followed by any noun with up to 2 words in between.",
        "nl_lv": "Atrast jebkuru darbības vārdu, kam līdz 2 vārdiem starpā seko lietvārds."
    },
    {
        "type": "sequence",
        "pattern": '[lemma="{lemma}"] []{{2,4}}',
        "nl_en": "Find '{lemma}' followed by 2 to 4 words.",
        "nl_lv": "Atrast '{lemma}', kam seko 2 līdz 4 vārdi."
    },
    {
        "type": "sequence",
        "pattern": '[]{{1,5}} [lemma="{lemma}"]',
        "nl_en": "Find '{lemma}' preceded by 1 to 5 words.",
        "nl_lv": "Atrast '{lemma}', kam pirms tā ir 1 līdz 5 vārdi."
    },
    {
        "type": "complex",
        "pattern": '[tag="n.*" & lemma!="{noun}"]',
        "nl_en": "Find all nouns except the lemma '{noun}'.",
        "nl_lv": "Atrast visus lietvārdus, izņemot lemmu '{noun}'."
    },
    {
        "type": "complex",
        "pattern": '[lemma="{lemma}" & tag!="v.*"]',
        "nl_en": "Find usages of '{lemma}' where it is not a verb.",
        "nl_lv": "Atrast '{lemma}' lietojumus, kur tas nav darbības vārds."
    },
    {
        "type": "wildcard",
        "pattern": '[word="{prefix}.*"]',
        "nl_en": "Find any word starting with '{prefix}'.",
        "nl_lv": "Atrast jebkuru vārdu, kas sākas ar '{prefix}'."
    },
    {
        "type": "wildcard",
        "pattern": '[word=".*{suffix}"]',
        "nl_en": "Find any word ending with '{suffix}'.",
        "nl_lv": "Atrast jebkuru vārdu, kas beidzas ar '{suffix}'."
    },
    {
        "type": "wildcard",
        "pattern": '[lemma=".*{substring}.*"]',
        "nl_en": "Find any lemma containing '{substring}'.",
        "nl_lv": "Atrast jebkuru lemmu, kas satur '{substring}'."
    }
]

# {
#     "type": "complex",
#     "pattern": '<{struct}> containing [lemma="{lemma}"]',
#     "nl_en": "Find {struct} structures that contain the lemma '{lemma}'.",
#     "nl_lv": "Atrast <{struct}> struktūras, kas satur lemmu '{lemma}'."
# }

# {
#     "type": "complex",
#     "pattern": '[lemma="{lemma}"] within <{struct}/>',
#     "nl_en": "Find the lemma '{lemma}' inside a {struct} structure.",
#     "nl_lv": "Atrast lemmu '{lemma}' struktūrā <{struct}>."
# },


def generate_dataset(n=1200):
    dataset = []
    seen_queries = set()
    
    attempts = 0
    while len(dataset) < n and attempts < n * 5:
        attempts += 1
        
        tmpl = random.choice(templates)
        
        noun = random.choice(LEMMAS['noun'])
        noun1 = random.choice(LEMMAS['noun'])
        verb = random.choice(LEMMAS['verb'])
        adj = random.choice(LEMMAS['adj'])
        adv = random.choice(LEMMAS['adv'])
        word_form = noun + "" 
        
        tag_obj1 = random.choice(TAG_MAP)
        tag_obj2 = random.choice(TAG_MAP)
        tag_obj3 = random.choice(TAG_MAP)
        
        suffix = random.choice(PARTS['suffix'])
        substring = random.choice(PARTS['substring'])
        prefix = random.choice(PARTS['prefix'])
        # struct = random.choice(STRUCTURES)
        
        cql = tmpl['pattern'].format(
            lemma=noun,
            lemma1=noun1,
            word=word_form,
            tag=tag_obj1['cql'],
            tag1=tag_obj1['cql'],
            tag2=tag_obj2['cql'],
            tag3=tag_obj3['cql'],
            noun=noun,
            verb=verb,
            suffix=suffix,
            substring=substring,
            prefix=prefix,
            adv=adv,
            adj=adj
        )
        
        nl_en = tmpl['nl_en'].format(
            lemma=noun,
            lemma1=noun1,
            word=word_form,
            tag_desc=tag_obj1['nl'],
            tag_desc1=tag_obj1['nl'],
            tag_desc2=tag_obj2['nl'],
            tag_desc3=tag_obj3['nl'],
            tag_desc_lv=tag_obj1['lv'],
            noun=noun,
            verb=verb,
            suffix=suffix,
            substring=substring,
            prefix=prefix,
            adv=adv,
            adj=adj
        )

        nl_lv = tmpl['nl_lv'].format(
            lemma=noun,
            lemma1=noun1,
            word=word_form,
            tag_desc_lv=tag_obj1['lv'],
            tag_desc1_lv=tag_obj1['lv'],
            tag_desc2_lv=tag_obj2['lv'],
            tag_desc3_lv=tag_obj3['lv'],
            noun=noun,
            verb=verb,
            suffix=suffix,
            substring=substring,
            prefix=prefix,
            adv=adv,
            adj=adj
        )
        
        if cql not in seen_queries:
            seen_queries.add(cql)
            dataset.append({
                "id": len(dataset) + 1,
                "category": tmpl['type'],
                "prompt_en": nl_en,
                "prompt_lv": nl_lv,
                "cql": cql
            })
                
    return dataset


if __name__ == "__main__":
    # print("Generating Corpus Query Language dataset...")
    data = generate_dataset(1500)
    
    # Save as JSON
    with open('latvian_cql_dataset.json', 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=2, ensure_ascii=False)

    print("Generating complete")
    # # Save as CSV (often better for simple viewing)
    # keys = data[0].keys()
    # with open('latvian_cql_dataset.csv', 'w', newline='', encoding='utf-8') as output_file:
    #     dict_writer = csv.DictWriter(output_file, fieldnames=keys)
    #     dict_writer.writeheader()
    #     dict_writer.writerows(data)

    # print(f"Successfully generated {len(data)} unique queries.")
    # print("Files saved: latvian_cql_dataset.json, latvian_cql_dataset.csv")
    
    # Preview
    # print("\n--- Preview (First 3) ---")
    # for i in range(3):
    #     print(f"NL (EN): {data[i]['natural_language_en']}")
    #     print(f"CQL:     {data[i]['cql_query']}")
    #     print("---")

Generating complete


## Ģenerēto vaicājumu validācija
Validēt vaicājumus izpildot pieprasījumus uz teksta korpusa serveri un saglabā rezultātu kā "sagaidāmo rezultātu"
Ja vaicājums nestrādā, tad tas netiek saglabāts

In [28]:
import requests
import time
import json
import ijson
import os

API_URL = 'https://nosketch.korpuss.lv/bonito/run.cgi/concordance'
SLEEP_TIME = 0.2 # Time to wait between requests
PAGESIZE = 5

def stream_process_dataset(infile, outfile):
    if os.path.exists(outfile):
        os.remove(outfile)

    session = requests.Session()
    first_item_written = False

    with open(infile, 'rb') as input_f, \
            open(outfile, 'w', encoding='utf-8') as output_f:
                
                output_f.write('[\n')

                objects = ijson.items(input_f, 'item')
                
                for index, item in enumerate(objects):
                    if (index + 1) % 100 == 0:
                        print(f"-> Processed {index + 1:,} items...")
                    cql_query = item.get("cql")
                     
                    if not cql_query:
                        # Skip invalid items but log it
                        print(f"Skipping item {index}: No 'cql' found.")
                        continue
                    payload_data = {
                        "concordance_query": [
                            {
                                "queryselector": "cqlrow",
                                "cql": cql_query,
                                "default_attr": "lemma"
                            }
                        ],
                        "mlsort_options": [
                            {
                                "skey": "kw",
                                "attr": "doc.id",
                                "ctx": "0",
                                "bward": "",
                                "icase": ""
                            }
                        ]
                    }
                    params = {
                        'corpname': 'LVK2022',
                        'pagesize': PAGESIZE,
                        'json': json.dumps(payload_data, separators=(',', ':'))
                    }

                    kwic_results = []

                    try:
                        response = session.get(API_URL, params=params, timeout=15)
                        response.raise_for_status()
                        api_response = response.json()
                        
                        lines_list = api_response.get('Lines')
                        if lines_list and isinstance(lines_list, list) and len(lines_list) > 0:
                            for line in lines_list:
                                #  'Kwic' tie ir [{"str": "Pie"}, {"str": "mācību"}] iekš json pieprasijuma atbilde
                                kwic_tokens = [token.get('str', '') for token in line.get('Kwic', []) if isinstance(token, dict)]
                                full_phrase = " ".join(kwic_tokens)
                                if full_phrase:
                                    kwic_results.append(full_phrase)
                    
                    except requests.exceptions.RequestException as e:
                        print(f"Error requesting item {index}: {e}. Setting 'response' to empty list.")
                    except json.JSONDecodeError:
                        print(f"Error parsing JSON response for item {index}. Setting 'response' to empty list.")

                    if kwic_results:
                        item['expected'] = kwic_results
                        item['cql'] = cql_query
                        item['prompt_lv'] = item.get("prompt_lv")
                        item['prompt_en'] = item.get("prompt_en")
                        
                        if first_item_written:
                            output_f.write(',\n')
                        json.dump(item, output_f, ensure_ascii=False, indent=4)
                        first_item_written = True
                        
                    time.sleep(SLEEP_TIME)
                 
                output_f.write('\n]')
    session.close()


stream_process_dataset("latvian_cql_dataset.json", "final_dataset.json")

-> Processed 100 items...
-> Processed 200 items...
-> Processed 300 items...
-> Processed 400 items...
-> Processed 500 items...
-> Processed 600 items...
-> Processed 700 items...
-> Processed 800 items...
-> Processed 900 items...
-> Processed 1,000 items...
-> Processed 1,100 items...
-> Processed 1,200 items...
-> Processed 1,300 items...
-> Processed 1,400 items...
-> Processed 1,500 items...


## Pielabot promptus ar roku

Ar angļu valodā sistēmas prompts, tas helpful assistan utt., īsa CQL dokumentācija un arī tag (morfoloģijas) elementa noteikumi.
### 1. solis -- LLM ģenerētie vaicājumi
Testa kopu nokopē un katram vienības testam pieraksta uzģenerēto vaicājumu.
### 2. solis -- LLM ģenerēto vaicājumu rezultātu iegūšana no korpusa
Uz korpusu veic LLM vaicājumu pieprasījumus un saglabā iegūto rezultātu.
### 3. solis -- Rezultātu analīze
Pildīt kaut kādu vērtēšanu un ar matplot diagrammas un tabulas uzģenerēt, idk.

## Ollama

In [None]:
data =  {
    "role": "user",
    "system": """
      You are a Latvian language corpora Sketch Engine Corpus Query Language or CQL query generator.
      Given a user request you will generate a CQL query.
      Rules:
      Ony output only the CQL query with no comments, explanation or extra text.
      Use the exact syntax and style shown in the example:
      Use square brackets [] for token specifications
      Query attributes: [word="VALUE] for word forms, [lemma="VALUE"] for dictionary forms,
      [tag="VALUE"] for morphological tags.

      Follow these rules to for tag criteria: 
      Position 1 (Part of Speech):
      n (noun / lietvārds), v (verb / darbības vārds), 
      a (Adjective / īpašības vārds), p (Pronoun / vietniekvārds), 
      m (Numeral / skaitļa vārds), r (Adverb / apstākļa vārds),
      q (Particle / partikulu), i (Interjection / izsauksmes vārds), 
      y (abbreviation / saīsinājums), s (preposition / prepozīcija), 
      c (Conjunction / saiklis), z (punctuation / pieturzīme),
      x (residual / bezmorfoloģijas elements)

      Position 2 (Type):
      noun: c (Common / sugas vārds), p (Proper / īpašvārds), 
      verb: m (Main / patstāvīgs), o (modal / modāls), p (phasal / fāzes), e (expression, izpausmes veida), c (to be as auxiliary or copula / palīgverbs "būt"), t (other copulas / saitiņverbi "kļūt"), a (other auxiliaries / palīgverbi “tikt”, “tapt” )
      adjective: f (qualificative / kādības), r (relative / attieksmes)
      numeral: c (cardinal / pamata), o (ordinal / kārtas), f (fractal / daļskaitlis)
      pronoun: p (personal / personas), x (reflexive / atgriezeniskais), s (possesive / piederības), d (demonstrative / norādāmais), i (indefinite / nenoteiktais), q (interrogative / jautājamais), r (relative / attieksmes), g (definite / noteiktais)   
      adverb: p (positive / pamata), c (comparative / pārākā), s (superlative / vispārākā), 0 (not applicable / nepiemīt)
      preposition: p (pre / pirms), t (post / pēc)
      conjunction: c (coordinating / sakārtojuma), s (subordinating / pakārtojuma) 
      abbreviation: n (common noun / sugasvārds), p (proper noun / īpašvārds), a (adjective / īpašības vārds), v (verbal / verbāls), r (adverb / apstāklis), d (discourse / diskursa iezīmētāji)
      punctuation: c (comma / komats), q (quote / pēdiņa), s (stop / punkts), b (bracket / iekava), d (hyphen or dash / defise vai domu zīme), o (colon / kols), x (other / citi)
      residual: f (foreign / vārds svešvalodā), n (numeral / skaitlis cipariem), o (ordinal / kārtas skaitlis cipariem), u (URI), x (other / citi)

      Position 3:
      noun (Gender / Dzimte): m (Masculine / vīriešu), f (Feminine / sieviešu), 0 (not applicable / nepiemīt)
      verb (Reflexive / Atgriezeniskums): n (no / nē), y (yes / jā)
      adjective (Gender / Dzimte):  m (Masculine / vīriešu), f (Feminine / sieviešu), 0 (not applicable / nepiemīt)
      numeral (Structure / Uzbūve): s (simple / vienkāršs), c (compound / saliktenis)
      pronoun (Person / Persona): 1 (1 / 1), 2 (2 / 2), 3 (3 / 3), 0 (not applicable, nepiemīt)
      adverb (group / grupa): q (quantitative / mēra), m (manner / veida), p (place / vietas), t (time / laika)
      preposition (number / skaitlis): s (singular / vienskaitlis), p (plural / daudzskaitlis), 0 (not applicable / nepiemīt)         

      Position 4:
      noun (Number / Skaitlis): s (singular / vienskaitlis), p (plural / daudzskaitlis), v (singulare tantum / vienskaitlinieks), d (plurale tantum / daudzskaitlinieks), 0 (not applicable / nepiemīt) 
      verb (Mood / Izteiksme): i (indicative / īstenības), r (relative / atstāstījuma), c (conditional / vēlējuma), d (debitive / vajadzības), m (imperative / pavēles), n (infinitive / nenoteiksme), p (participle / divdabis)
      adjective (Number / Skaitlis): s (singular / vienskaitlis), p (plural / daudzskaitlis), 0 ( / nepiemīt) 
      numeral (Gender / Dzimte): m (masculine / vīriešu), f (feminine / sieviešu), 0 (not applicable / nepiemīt)
      pronoun (Gender / Dzimte): m (masculine / vīriešu), f (feminine / sieviešu), 0 (not applicable / nepiemīt)
      adverb (prievārdisks): n (no / nē), y (yes / jā)
      preposition (rekcija): g (genitive / ģenitīvs), d (dative / datīvs), a (accusative / akuzatīvs), 0 (not applicable / nepiemīt)

      Position 5:
      noun (Case / Locījums): n (nominative / nominatīvs), g (genitive / ģenitīvs), d (dative / datīvs), a (accusative / akuzatīvs), l (locative / lokatīvs), v (vocative / vokatīvs), 0 (not applicable / nepiemīt)
      verb (Tense / Laiks): p (present / tagadne), f (future / nākotne), s (past / pagātne), 0 (not applicable / nepiemīt)
      adjective (Case / Locījums): n (nominative / nominatīvs), g (genitive / ģenitīvs), d (dative / datīvs), a (accusative / akuzatīvs), l (locative / lokatīvs), v (vocative / vokatīvs), 0 (not applicable / nepiemīt) 
      numeral (number / Skaitlis): s (singular / vienskaitlis), p (plural / daudzskaitlis)
      pronoun: (Skaitlis): s (singular / vienskaitlis), p (plural / daudzskaitlis), 0 (not applicable / nepiemīt)

      Position 6:
      noun (Declension / Deklinācija): 1 (1st declension / 1. deklinācija), 2 (2nd declension / 2. deklinācija), 3 (3rd declension / 3. deklinācija), 4 (4th declension / 4. deklinācija), 5 (5th declension / 5. deklinācija), 6 (6th declension / 6. deklinācija), 0 (not applicable / nepiemīt), g (genitive / ģenitīvenis), r (reflexive / atgriezenisks)
      verb (Transitivity / Pārejamība): t (transitive / pārejošs), i (intransitive / nepārejošs) 
      adjective (Definiteness / Noteiktība): n (indefinite / nenoteiktais), y -  (definite / noteiktais)
      numeral (Case / Locījums): n (nominative / nominatīvs), g (genitive / ģenitīvs), d (dative / datīvs), a (accusative / akuzatīvs), l (locative / lokatīvs), v (vocative / vokatīvs), 0 (not applicable / nepiemīt) 
      pronoun (Locījums): n (nominative / nominatīvs), g (genitive / ģenitīvs), d (dative / datīvs), a (accusative / akuzatīvs), l (locative / lokatīvs)

      Position 7:
      verb (Conjugation / Konjugācija): 1, 2, 3, i (irregular / nekārtnais)
      adjective (degree / pakāpe): p (positive / pamata)  c (comparative / pārākā) s (superlative / vispārākā)
      pronoun (Negation / Noliegums): n (no / nē), y (yes / jā)

      Position 8:
      verb (Person / Persona): 1, 2, 3, 0 (not applicable / nepiemīt)

      Position 9:
      verb (Number / Skaitlis): s (singular / vienskaitlis), p (plural / daudzskaitlis), 0 (not applicable / nepiemīt)

      Position 10:
      verb (Voice / Kārta): a (active / darāmā), 0 (not applicable / nepiemīt)

      Position 11:
      verb (Negation / Noliegums): n (no / nē), y (yes / jā) 

      Implementation Guidelines:
      * Use "." to skip a specific position.
      * Use ".*" to match any character sequence following a specific position.
      * Use "[...]" for multiple values in one position.
      * Ensure all queries are contained within [attribute=" and "] markers.
      * Use [] to match any word
      * Use logical statements & (and), | (or), ! (not) for more than one criteria for one token in brackets
  
      Examples:
        User input: Find examples of “went”
        Result: [word="went"]

        User input: Find examples of all forms of "teapot" 
        Result: [lemma="teapot"]

        User input: Find 'Jānis' followed immediately by 'gads'.
        Result: [lemma=\"Jānis\"] [lemma=\"gads\"]

        User input: Find any masculine noun.
        Result: [tag="n.m.*"]

        User input: Find any verb followed by any noun with up to 2 words in between.
        Result: [tag="v.*"] []{0,2} [tag="n.*"]

        User input: Find all forms of the word ‘test’ which is a noun
        Result: [lemma="test" & tag="n.*"]

        User input: Find the word round tagged as a noun or verb
        Result: [word="round" & ( tag="n.*" | tag="v.*" )]

        User input: Find word ‘test’ which is NOT a verb
        Result: [word="test" & !tag="V.*"]

    """,
    "content": question,
}

print(response['message']['content'])