In [49]:
import pandas as pd
import numpy as np
from utils import data
import openai
import os
from absl import app, flags, logging
import time
language = 'tur'
openai.ap_key = os.getenv("OPENAI_API_KEY")

train_path = f"data/{language}/{language}.train"
dev_path = f"data/{language}/{language}.dev"
test_path = f"data/{language}/{language}.test"

In [50]:
data=pd.read_csv(train_path, sep='\t', header=None, 
                 names=["input", "output", "tags"])
data.head()

Unnamed: 0,input,output,tags
0,teklif,teklifi,N;NOM;SG;PSS3S
1,atışmak,atışmış mısın,V;IRR;SG;2;POS;PST;INTR
2,otostop,otostobun,N;NOM;SG;PSS2S
3,türemek,türememiş miydi,V;PRF;IRR;SG;3;NEG;PST;INTR
4,demek,diyor muymuş,V;PROG;IRR;SG;3;NEG;FUT;INTR


In [51]:
grouped_tags = data.groupby("tags")

In [52]:
with open("prompts/inflection_train_base.txt") as handle:
    base_text = handle.read()
with open("prompts/inflection_test_base.txt") as handle:
    test_text = handle.read()

In [53]:
test_data=pd.read_csv(test_path, sep='\t', header=None, 
                 names=["input", "output", "tags"])
test_data

Unnamed: 0,input,output,tags
0,abacı,abacıda,N;LOC;SG
1,abacı,abacılarımızdan,N;ABL;PL;PSS1P
2,abacı,abacılarımız,N;NOM;PL;PSS1P
3,abacı,abacılarında,N;LOC;PL;PSS3S
4,abacı,abacılarını,N;ACC;PL;PSS3S
...,...,...,...
28880,zurna,zurnanızda,N;LOC;SG;PSS2P
28881,zurna,zurnanızdan,N;ABL;SG;PSS2P
28882,zurna,zurnanızı,N;ACC;SG;PSS2P
28883,zurna,zurnasında,N;LOC;SG;PSS3S


In [54]:
def get_prompt(grouped_tags, test_item):
    prompt = []
    prompt_data = grouped_tags.sample(n=1)[:10]
    for index, row in prompt_data.iterrows():
        prompt.append(base_text.format(language=language, inp=row['input'],
                              tags=row['tags'],
                              output=row['output']))
        
    prompt.append(test_text.format(language=language, inp=test_item['input'], tags=test_item['tags']))
    
    return "\n".join(prompt)

In [55]:
for index, test_item in test_data.iterrows():
    prompt = get_prompt(grouped_tags, test_item)
    print(prompt)
    
    if index == 2:
        break
    response = openai.Completion.create(
                        engine="text-davinci-002",
                        prompt=prompt,
                        temperature=0.7,
                        max_tokens=100,
                        top_p=1,
                        frequency_penalty=0,
                        presence_penalty=0,
                    )
    gold = test_item['output']
    print(gold)
    current_outputs = response["choices"][0]["text"]
    print(f"This is the current output: {current_outputs}\nThis is the gold: {gold}")
    print('\n')
    time.sleep(5)
    

Q: Inflect the tur word 'hızlı' with the morphological tags of ADJ;DECL;PL;1;NEG;LGSPEC1;LGSPEC2
A: hızlı değilmişiz
Q: Inflect the tur word 'becerikli' with the morphological tags of ADJ;DECL;PL;1;NEG;PRS;LGSPEC1
A: becerikli değiliz
Q: Inflect the tur word 'küçük' with the morphological tags of ADJ;DECL;PL;1;NEG;PST;LGSPEC1
A: küçük değildik
Q: Inflect the tur word 'adsız' with the morphological tags of ADJ;DECL;PL;1;POS;LGSPEC1;LGSPEC2
A: adsızmışız
Q: Inflect the tur word 'coşkusuz' with the morphological tags of ADJ;DECL;PL;1;POS;PRS;LGSPEC1
A: coşkusuzuz
Q: Inflect the tur word 'hızlı' with the morphological tags of ADJ;DECL;PL;1;POS;PST;LGSPEC1
A: hızlıdık
Q: Inflect the tur word 'yanlış' with the morphological tags of ADJ;DECL;PL;2;NEG;LGSPEC1;LGSPEC2
A: yanlış değilmişsiniz
Q: Inflect the tur word 'mantıklı' with the morphological tags of ADJ;DECL;PL;2;NEG;PRS;LGSPEC1
A: mantıklı değilsiniz
Q: Inflect the tur word 'umutsuz' with the morphological tags of ADJ;DECL;PL;2;NEG;PST;

In [127]:
grouped_tags.sample(n=1)

Unnamed: 0,input,output,tags
453,kɬәŋqsxi,kɬәŋqsxiˀin,ADJ;FOC;SG
706,iˀɬuq,iˀɬuqeˀn,ADJ;PL
596,fsakij,fsakij,ADJ;SG
590,iχɬ,iχɬčˀin,N
1131,eˀɲɬon,eˀɲɬonxˀal,N;ABL;SG
...,...,...,...
414,čʼuf,čʼufskinen,V;NO3S+BE3S;FIN;IND;PRS
1087,ilfs,ilfskinen,V;NO3S+BE3S;FIN;IND;PST
828,χaqaŋe,әnχaqaŋeqzuznen,V;NO3S+BE3S;IPFV;FIN;IND;PRS;CAUS
1011,fiva,fivateskinen,V;NO3S+BE3S;ITER;FIN;IND;PRS
