In [1]:
from datasets import load_dataset

data = load_dataset("universal_dependencies", "ar_padt")

Downloading builder script:   0%|          | 0.00/87.8k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/2.33M [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/191k [00:00<?, ?B/s]

Downloading and preparing dataset universal_dependencies/ar_padt to /home/zaid/.cache/huggingface/datasets/universal_dependencies/ar_padt/2.7.0/1ac001f0e8a0021f19388e810c94599f3ac13cc45d6b5b8c69f7847b2188bdf7...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/8.04M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.07M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.03M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/6075 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/909 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/680 [00:00<?, ? examples/s]

Dataset universal_dependencies downloaded and prepared to /home/zaid/.cache/huggingface/datasets/universal_dependencies/ar_padt/2.7.0/1ac001f0e8a0021f19388e810c94599f3ac13cc45d6b5b8c69f7847b2188bdf7. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [2]:
data

DatasetDict({
    train: Dataset({
        features: ['idx', 'text', 'tokens', 'lemmas', 'upos', 'xpos', 'feats', 'head', 'deprel', 'deps', 'misc'],
        num_rows: 6075
    })
    validation: Dataset({
        features: ['idx', 'text', 'tokens', 'lemmas', 'upos', 'xpos', 'feats', 'head', 'deprel', 'deps', 'misc'],
        num_rows: 909
    })
    test: Dataset({
        features: ['idx', 'text', 'tokens', 'lemmas', 'upos', 'xpos', 'feats', 'head', 'deprel', 'deps', 'misc'],
        num_rows: 680
    })
})

In [3]:
TAGS = [
    "NOUN",
    "PUNCT",
    "ADP",
    "NUM",
    "SYM",
    "SCONJ",
    "ADJ",
    "PART",
    "DET",
    "CCONJ",
    "PROPN",
    "PRON",
    "X",
    "_",  # we need to drop this class is it is meant to refer to unsplit words. For instance, the word وكانت is split into و and كانت but it is assigned this class as a whole.
    "ADV",
    "INTJ",
    "VERB",
    "AUX",
]
tag_names_to_labels = {tag_name: tag_label for tag_label, tag_name in enumerate(TAGS)}
tag_labels_to_names = {
    tag_label: tag_name for tag_name, tag_label in tag_names_to_labels.items()
}


In [5]:
# prompt without fewshots
sys_msg = """
I wish you can generate a table of Arabic POS tags following Universal Dependencies tagset in the following format:
TOKEN:POS
Please note that I tokenized the sentence for you. Please do not change, add, combine, merge or remove any of these tokens such as ب and ه. Please consider punctuation marks as separate tokens, always. Split them as two separate tokens if they come together and classify each of them independently.
Please give me the generated table and that is it. No further discussion, explanation or extrapolation required.
""".strip()


# prompt with fewshots
# sys_msg_with_fewshots = lambda fewshots: """
# I wish you can generate a table of Arabic POS tags following Universal Dependencies tagset in the following format:
# WORD  |  POS
# Please do not forget to split punctuation marks such as quotes and classify them.
# Please, also, do not forget to split words that have CCONJ tokens such as و and ف.
# Please give me the generated table and that is it. No further discussion, explanation or extrapolation required.
# here are some examples to consider:
# """+'\n'.join(f"""
# sentence: {sentence}
# tags:
# {tags}""" for (sentence,tags) in fewshots)+"""

# Now, My sentence is:
# {input_sentence}
# """


In [6]:
def construct_tags_map_str(sentence_tokens, tags):
    assert len(sentence_tokens)==len(tags), "lengths of the tokens and its tags does not match!"
    tags_map = ""
    for word, tag_label in zip(sentence_tokens, tags):
        if tag_label == 13:  # see why in the comment in TAGS list
            continue
        tags_map += f"{word}:{tag_labels_to_names[tag_label]}"
        tags_map += "\n"
    return tags_map

In [6]:
# testing fewshots prompt
# sentences = [
#     'السلام عليكم ورحمة الله',
#     'كيف الحال'
# ]
# fewshots = [(
#     sentence,
#     construct_tags_map_str(sentence,[random.choice(range(len(TAGS))) for _ in sentence.split()])
# )for sentence in sentences]
# print(sys_msg_with_fewshots(fewshots).format(input_sentence="وكانت خسائر المجموعة الاسبانية الرسمية تراكمت في السنوات العشر الاخيرة لتبلغ 920 مليون يورو"))


In [7]:
def create_chat_prompt(input_text):
    return [
        {"role": "system", "content": sys_msg},
        {"role": "user", "content": input_text},
    ]


def create_chat_example(sentence, tags):
    return [
        {"role": "system", "content": sentence, "name": "example_user"},
        {"role": "system", "content": tags, "name": "example_assistant"},
    ]


In [8]:
!mkdir -p ../registry/data/padt

In [9]:
small_samples_data = data["train"].filter(lambda example: len(example['tokens']) < 100)
small_samples_data

Filter:   0%|          | 0/6075 [00:00<?, ? examples/s]

Dataset({
    features: ['idx', 'text', 'tokens', 'lemmas', 'upos', 'xpos', 'feats', 'head', 'deprel', 'deps', 'misc'],
    num_rows: 5774
})

In [10]:
small_samples_data = data['train'].to_pandas()
small_samples_data = small_samples_data.loc[small_samples_data['tokens'].map(len) < 50]
small_samples_data

Unnamed: 0,idx,text,tokens,lemmas,upos,xpos,feats,head,deprel,deps,misc
0,afp.20000715.0075:p1u1,برلين ترفض حصول شركة اميركية على رخصة تصنيع دب...,"[برلين, ترفض, حصول, شركة, اميركية, على, رخصة, ...","[بَرلِين, رَفَض, حُصُول, شَرِكَة, أَمِيرِكِيّ,...","[12, 16, 0, 0, 6, 2, 0, 0, 0, 1, 12, 1, 6]","[X---------, VIIA-3FS--, N------S4R, N------S2...","[{'Foreign': 'Yes'}, {'Aspect': 'Imp', 'Gender...","[2, 0, 2, 3, 4, 7, 3, 7, 8, 11, 9, 11, 9]","[nsubj, root, obj, nmod, amod, case, obl:arg, ...","[[('nsubj', 2)], [('root', 0)], [('obj', 2)], ...","[{'Vform': 'بَرلِين', 'Gloss': 'Berlin', 'Root..."
2,afp.20000715.0075:p3u1,وفي نيسان/ابريل الماضي، تخلت الدولة الاسبانية ...,"[وفي, و, في, نيسان, /, ابريل, الماضي, ،, تخلت,...","[_, وَ, فِي, نِيسَان, /, أَبرِيل, مَاضِي, ،, ت...","[13, 9, 2, 0, 1, 0, 6, 1, 16, 0, 6, 2, 0, 1, 1...","[None, C---------, P---------, N------S2I, G--...","[None, None, {'AdpType': 'Prep'}, {'Case': 'Ge...","[None, 0, 3, 8, 5, 3, 3, 6, 1, 8, 9, 12, 8, 15...","[_, root, case, obl, punct, appos, amod, punct...","[None, [('root', 0)], [('case', 3)], [('obl:فِ...","[None, {'Vform': 'وَ', 'Gloss': 'and', 'Root':..."
3,afp.20000715.0075:p4u1,وكانت خسائر المجموعة الاسبانية الرسمية تراكمت ...,"[وكانت, و, كانت, خسائر, المجموعة, الاسبانية, ا...","[_, وَ, كَان, خَسَارَة, مَجمُوعَة, إِسبَانِيّ,...","[13, 9, 16, 0, 0, 6, 6, 16, 2, 0, 3, 6, 13, 9,...","[None, C---------, VP-A-3FS--, N------P1R, N--...","[None, None, {'Aspect': 'Perf', 'Gender': 'Fem...","[None, 0, 1, 2, 3, 4, 3, 2, 9, 7, 9, 9, None, ...","[_, root, parataxis, nsubj, nmod, amod, amod, ...","[None, [('root', 0)], [('parataxis', 1)], [('n...","[None, {'Vform': 'وَ', 'Gloss': 'and', 'Root':..."
4,afp.20000715.0075:p5u1,واشارت صحيفة الاحد الى ان المستشار شرودر يعتبر...,"[واشارت, و, أشارت, صحيفة, الاحد, الى, ان, المس...","[_, وَ, أَشَار, صَحِيفَة, أَحَد, إِلَى, أَنَّ,...","[13, 9, 16, 0, 0, 2, 5, 0, 12, 16, 5, 0, 0, 6,...","[None, C---------, VP-A-3FS--, N------S1R, N--...","[None, None, {'Aspect': 'Perf', 'Gender': 'Fem...","[None, 0, 1, 2, 3, 9, 9, 9, 7, 2, 19, 19, 11, ...","[_, root, parataxis, nsubj, nmod, mark, mark, ...","[None, [('root', 0)], [('parataxis', 1)], [('n...","[None, {'Vform': 'وَ', 'Gloss': 'and', 'Root':..."
5,afp.20000715.0075:p6u1,وتعذر لمتحدثة باسم وزارة الدفاع الالمانية ان ت...,"[وتعذر, و, تعذر, لمتحدثة, ل, متحدثة, باسم, ب, ...","[_, وَ, تَعَذَّر, _, لِ, مُتَحَدِّثَة, _, بِ, ...","[13, 9, 16, 13, 2, 0, 13, 2, 0, 0, 0, 6, 5, 16...","[None, C---------, VP-A-3MS--, None, P--------...","[None, None, {'Aspect': 'Perf', 'Gender': 'Mas...","[None, 0, 1, None, 4, 2, None, 7, 5, 4, 7, 7, ...","[_, root, parataxis, _, case, obl:arg, _, case...","[None, [('root', 0)], [('parataxis', 1)], None...","[None, {'Vform': 'وَ', 'Gloss': 'and', 'Root':..."
...,...,...,...,...,...,...,...,...,...,...,...
6068,xinhua.20030511.0192:p1u1,تنسيق المواقف بين مصر وفلسطين قبيل زيارة باول ...,"[تنسيق, المواقف, بين, مصر, وفلسطين, و, فلسطين,...","[تَنسِيق, مَوقِف, بَينَ, مصر, _, وَ, فِلَسطِين...","[0, 0, 2, 12, 13, 9, 0, 2, 0, 12, 2, 0]","[N------S1R, N------P2D, PI------4-, U--------...","[{'Case': 'Nom', 'Definite': 'Cons', 'Number':...","[0, 1, 4, 1, None, 6, 4, 8, 1, 8, 11, 8]","[root, nmod, case, obl:arg, _, cc, conj, case,...","[[('root', 0)], [('nmod:gen', 1)], [('case', 4...","[{'Vform': 'تَنسِيقُ', 'Gloss': 'coordination,..."
6069,xinhua.20030511.0192:p2u1,القاهرة 11 مايو,"[القاهرة, 11, مايو]","[قَاهِرَة, 11, مَايُو]","[0, 3, 0]","[N------S1D, Q---------, N------S2I]","[{'Case': 'Nom', 'Definite': 'Def', 'Number': ...","[0, 1, 2]","[root, dep, nmod]","[[('root', 0)], [('dep', 1)], [('nmod:gen', 2)]]","[{'Vform': 'اَلقَاهِرَةُ', 'Gloss': 'Cairo', '..."
6072,xinhua.20030511.0192:p5u1,واكد شعث فى الاتصال على ضرورة قبول الجانب الإس...,"[واكد, و, أكد, شعث, فى, الاتصال, على, ضرورة, ق...","[_, وَ, أَكَّد, شعث, فِي, اِتِّصَال, عَلَى, ضَ...","[13, 9, 16, 12, 2, 0, 2, 0, 0, 0, 6, 0, 12, 0,...","[None, C---------, VP-A-3MS--, U---------, P--...","[None, None, {'Aspect': 'Perf', 'Gender': 'Mas...","[None, 0, 1, 2, 5, 2, 7, 2, 7, 8, 9, 8, 13, 8,...","[_, root, parataxis, nsubj, case, obl, case, o...","[None, [('root', 0)], [('parataxis', 1)], [('n...","[None, {'Vform': 'وَ', 'Gloss': 'and', 'Root':..."
6073,xinhua.20030511.0192:p6u1,وطالب باول اليوم الحكومة الفلسطينية الجديدة بم...,"[وطالب, و, طالب, باول, اليوم, الحكومة, الفلسطي...","[_, وَ, طَالَب, بَاوِل, يَوم, حُكُومَة, فِلَسط...","[13, 9, 16, 12, 0, 0, 6, 6, 13, 2, 0, 0, 6, 13...","[None, C---------, VP-A-3MS--, X---------, N--...","[None, None, {'Aspect': 'Perf', 'Gender': 'Mas...","[None, 0, 1, 2, 2, 2, 5, 5, None, 9, 2, 9, 10,...","[_, root, parataxis, nsubj, obl, obj, amod, am...","[None, [('root', 0)], [('parataxis', 1)], [('n...","[None, {'Vform': 'وَ', 'Gloss': 'and', 'Root':..."


In [11]:
dev_df = small_samples_data
dev_df["sample"] = dev_df.apply(
    lambda x: create_chat_example(
        sentence=' '.join([token for token,tag in zip(x["tokens"],x["upos"]) if tag != 13]),
        tags=construct_tags_map_str(sentence_tokens=x["tokens"], tags=x["upos"]),
    ),
    axis=1,
)
dev_df[["sample"]].to_json(
    "../registry/data/padt/few_shot.jsonl",
    lines=True,
    orient="records",
    force_ascii=False,
)

test_df = data["test"].to_pandas()

test_df["input"] = test_df.apply(
    lambda row: create_chat_prompt(
        input_text=' '.join([token for token,tag in zip(row["tokens"],row["upos"]) if tag != 13])
    ),
    axis=1,
)
test_df["ideal"] = test_df.apply(
    lambda row: construct_tags_map_str(
        sentence_tokens=row["tokens"],
        tags=row["upos"],
    ),
    axis=1,
)
test_df[["input", "ideal"]].to_json(
    "../registry/data/padt/samples.jsonl",
    lines=True,
    orient="records",
    force_ascii=False,
)


In [15]:
test_df

Unnamed: 0,idx,text,tokens,lemmas,upos,xpos,feats,head,deprel,deps,misc,input,ideal
0,assabah.20041005.0017:p1u1,سوريا: تعديل وزاري واسع يشمل 8 حقائب,"[سوريا, :, تعديل, وزاري, واسع, يشمل, 8, حقائب]","[سُورِيَا, :, تَعدِيل, وِزَارِيّ, وَاسِع, شَمِ...","[12, 1, 0, 6, 6, 16, 3, 0]","[X---------, G---------, N------S1I, A-----MS1...","[{'Foreign': 'Yes'}, None, {'Case': 'Nom', 'De...","[0, 1, 6, 3, 3, 1, 6, 7]","[root, punct, nsubj, amod, amod, parataxis, ob...","[[('root', 0)], [('punct', 1)], [('nsubj', 6)]...","[{'SpaceAfter': 'No', 'Vform': 'سُورِيَا', 'Gl...","[{'role': 'system', 'content': 'I wish you can...",سوريا:X\n::PUNCT\nتعديل:NOUN\nوزاري:ADJ\nواسع:...
1,assabah.20041005.0017:p2u1,دمشق (وكالات الانباء) - اجرى الرئيس السوري بشا...,"[دمشق, (, وكالات, الانباء, ), -, اجرى, الرئيس,...","[دمشق, (, وِكَالَة, نَبَأ, ), -, أَجرَى, رَئِي...","[12, 1, 0, 0, 1, 1, 16, 0, 6, 12, 12, 0, 6, 6,...","[U---------, G---------, N------P1R, N------P2...","[None, None, {'Case': 'Nom', 'Definite': 'Cons...","[0, 3, 1, 3, 3, 1, 1, 7, 8, 11, 8, 7, 12, 12, ...","[root, punct, dep, nmod, punct, punct, advcl, ...","[[('root', 0)], [('punct', 3)], [('dep', 1)], ...","[{'Vform': 'دمشق', 'Root': 'OOV', 'Translit': ...","[{'role': 'system', 'content': 'I wish you can...",دمشق:X\n(:PUNCT\nوكالات:NOUN\nالانباء:NOUN\n):...
2,assabah.20041005.0017:p3u1,واضافت المصادر ان مهدي دخل الله رئيس تحرير صحي...,"[واضافت, و, أضافت, المصادر, ان, مهدي, دخل, الل...","[_, وَ, أَضَاف, مَصدَر, أَنَّ, مهدي, دخل, الله...","[13, 9, 16, 0, 5, 12, 12, 12, 0, 0, 0, 0, 0, 1...","[None, C---------, VP-A-3FS--, N------P1D, C--...","[None, None, {'Aspect': 'Perf', 'Gender': 'Fem...","[None, 0, 1, 2, 16, 6, 16, 6, 6, 8, 9, 10, 11,...","[_, root, parataxis, nsubj, mark, nmod, nsubj,...","[None, [('root', 0)], [('parataxis', 1)], [('n...","[None, {'Vform': 'وَ', 'Gloss': 'and', 'Root':...","[{'role': 'system', 'content': 'I wish you can...",و:CCONJ\nأضافت:VERB\nالمصادر:NOUN\nان:SCONJ\nم...
3,assabah.20041005.0017:p4u1,وذكرت وكالة الانباء السورية ان التعديل شمل ثما...,"[وذكرت, و, ذكرت, وكالة, الانباء, السورية, ان, ...","[_, وَ, ذَكَر, وِكَالَة, نَبَأ, سُورِيّ, أَنَّ...","[13, 9, 16, 0, 0, 6, 5, 0, 16, 3, 0, 13, 2, 11...","[None, C---------, VP-A-3FS--, N------S1R, N--...","[None, None, {'Aspect': 'Perf', 'Gender': 'Fem...","[None, 0, 1, 2, 3, 3, 8, 8, 2, 8, 9, None, 12,...","[_, root, parataxis, nsubj, nmod, amod, mark, ...","[None, [('root', 0)], [('parataxis', 1)], [('n...","[None, {'Vform': 'وَ', 'Gloss': 'and', 'Root':...","[{'role': 'system', 'content': 'I wish you can...",و:CCONJ\nذكرت:VERB\nوكالة:NOUN\nالانباء:NOUN\n...
4,assabah.20041005.0017:p5u1,وعين اللواء كنعان الذي كان رئيسا لجهاز الامن ا...,"[وعين, و, عين, اللواء, كنعان, الذي, كان, رئيسا...","[_, وَ, عَيَّن, اللواء, كنعان, الذي, كَان, رَئ...","[13, 9, 16, 12, 12, 12, 17, 0, 13, 2, 0, 0, 6,...","[None, C---------, VP-P-3MS--, U---------, U--...","[None, None, {'Aspect': 'Perf', 'Gender': 'Mas...","[None, 0, 1, 2, 3, 3, 7, 3, None, 9, 7, 9, 10,...","[_, root, parataxis, nsubj:pass, nmod, cc, cop...","[None, [('root', 0)], [('parataxis', 1)], [('n...","[None, {'Vform': 'وَ', 'Gloss': 'and', 'Root':...","[{'role': 'system', 'content': 'I wish you can...",و:CCONJ\nعين:VERB\nاللواء:X\nكنعان:X\nالذي:X\n...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
675,xinhua.20030511.0181:p10u1,وقتل مسلحان فى مواجهة مع فريق مشرتك من الشرطة ...,"[وقتل, و, قتل, مسلحان, فى, مواجهة, مع, فريق, م...","[_, وَ, قَتَل, مُسَلَّح, فِي, مُوَاجِه, مَعَ, ...","[13, 9, 16, 0, 2, 6, 2, 0, 12, 2, 0, 13, 9, 0,...","[None, C---------, VP-P-3MS--, N------D1I, P--...","[None, None, {'Aspect': 'Perf', 'Gender': 'Mas...","[None, 0, 1, 2, 5, 2, 7, 5, 7, 10, 7, None, 12...","[_, root, parataxis, nsubj:pass, case, obl, ca...","[None, [('root', 0)], [('parataxis', 1)], [('n...","[None, {'Vform': 'وَ', 'Gloss': 'and', 'Root':...","[{'role': 'system', 'content': 'I wish you can...",و:CCONJ\nقتل:VERB\nمسلحان:NOUN\nفى:ADP\nمواجهة...
676,xinhua.20030511.0181:p11u1,ولقى مسلحان اخران مصرعهما فى اشتباك مع قوات ال...,"[ولقى, و, لقي, مسلحان, اخران, مصرعهما, مصرع, ه...","[_, وَ, لَقِي, مُسَلَّح, آخَر, _, مَصرَع, هُوَ...","[13, 9, 16, 0, 6, 13, 0, 11, 2, 12, 2, 0, 0, 2...","[None, C---------, VP-A-3MS--, N------D1I, A--...","[None, None, {'Aspect': 'Perf', 'Gender': 'Mas...","[None, 0, 1, 2, 3, None, 2, 5, 8, 2, 10, 8, 10...","[_, root, parataxis, nsubj, amod, _, obj, nmod...","[None, [('root', 0)], [('parataxis', 1)], [('n...","[None, {'Vform': 'وَ', 'Gloss': 'and', 'Root':...","[{'role': 'system', 'content': 'I wish you can...",و:CCONJ\nلقي:VERB\nمسلحان:NOUN\nاخران:ADJ\nمصر...
677,xinhua.20030511.0181:p12u1,وقتل احد مسلحى جماعة جيش محمد فى اشتباك مع قوا...,"[وقتل, و, قتل, احد, مسلحى, جماعة, جيش, محمد, ف...","[_, وَ, قَتَل, احد, مُسَلَّح, جَمَاعَة, جَيش, ...","[13, 9, 16, 12, 0, 0, 0, 0, 2, 12, 2, 0, 0, 2,...","[None, C---------, VP-P-3MS--, U---------, N--...","[None, None, {'Aspect': 'Perf', 'Gender': 'Mas...","[None, 0, 1, 2, 3, 4, 5, 6, 9, 2, 11, 9, 11, 1...","[_, root, parataxis, nsubj:pass, nmod, nmod, n...","[None, [('root', 0)], [('parataxis', 1)], [('n...","[None, {'Vform': 'وَ', 'Gloss': 'and', 'Root':...","[{'role': 'system', 'content': 'I wish you can...",و:CCONJ\nقتل:VERB\nاحد:X\nمسلحى:NOUN\nجماعة:NO...
678,xinhua.20030511.0181:p13u1,وفى الليلة الماضية، قتل مدني فى قرية مارهوت بم...,"[وفى, و, في, الليلة, الماضية, ،, قتل, مدني, فى...","[_, وَ, فِي, لَيلَة, مَاضِي, ،, قَتَل, مَدَنِي...","[13, 9, 2, 0, 6, 1, 16, 6, 2, 0, 12, 13, 2, 0,...","[None, C---------, P---------, N------S2D, A--...","[None, None, {'AdpType': 'Prep'}, {'Case': 'Ge...","[None, 0, 3, 6, 3, 4, 1, 6, 9, 6, 9, None, 12,...","[_, root, case, obl, amod, punct, parataxis, n...","[None, [('root', 0)], [('case', 3)], [('obl:فِ...","[None, {'Vform': 'وَ', 'Gloss': 'and', 'Root':...","[{'role': 'system', 'content': 'I wish you can...",و:CCONJ\nفي:ADP\nالليلة:NOUN\nالماضية:ADJ\n،:P...


In [12]:
import os
os.environ["OPENAI_API_KEY"] = "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"
os.environ["OpenAI-Organization"] = "org-YDVLZUQaNWf6UEhpr9txK66z"
os.environ["EVALS_THREADS"]="3"
os.environ["EVALS_THREAD_TIMEOUT"]="600"

In [13]:
!ls

ajgt.ipynb  ar-constants  metrec.ipynb	tashkeela.ipynb
apb.ipynb   easc.ipynb	  padt.ipynb	un_v1.ipynb


In [13]:
!ls ../registry/data/padt

few_shot.jsonl	samples.jsonl


In [14]:
pos_tagging_task_specs = """
padt:
    id: padt.test.v1
    metrics: [accuracy]
    description: Evaluate Arabic POS tagging
# Define the eval
padt.test.v1:
  # Specify the class name as a dotted path to the module and class
  class: evals.elsuite.pos_tagging:POSTagger
  args:
    samples_jsonl: padt/samples.jsonl
    few_shot_jsonl: padt/few_shot.jsonl # path to few shots file
    num_few_shot: 0 # max few shots to use

""".strip()
with open("../registry/evals/padt.yaml", "w") as file:
    file.write(pos_tagging_task_specs)


In [16]:
record_path = "../evals/eval_results_gpt_4_0/padt.jsonl"
!oaieval gpt-4-0314 padt --record_path {record_path} --seed 41 --modelspec_extra_options temperature=0.0 --max_samples 680

[2023-05-24 21:32:44,067] [registry.py:156] Loading registry from /home/zaid/evals/evals/registry/evals
[2023-05-24 21:32:44,178] [registry.py:156] Loading registry from /home/zaid/.evals/evals
[2023-05-24 21:32:45,162] [oaieval.py:213] [1;35mRun started: 2305241832454YODNQLR[0m
[2023-05-24 21:32:45,173] [data.py:75] Fetching padt/samples.jsonl
[2023-05-24 21:32:45,182] [eval.py:32] Evaluating 680 samples
[2023-05-24 21:32:45,186] [eval.py:152] Running in threaded mode with 3 threads!
  1%|▌                                       | 10/680 [01:35<1:04:55,  5.81s/it][2023-05-24 21:34:20,679] [_common.py:105] Backing off openai_chat_completion_create_retrying(...) for 0.9s (openai.error.RateLimitError: Rate limit reached for default-gpt-4 in organization org-YDVLZUQaNWf6UEhpr9txK66z on tokens per min. Limit: 40000 / min. Please try again in 1ms. Contact us through our help center at help.openai.com if you continue to have issues.)
[2023-05-24 21:34:21,907] [_common.py:105] Backing off op

In [63]:
import pandas as pd

with open(record_path, "r") as f:
    events_df = pd.read_json(f, lines=True)


In [64]:
events_df[events_df["final_report"].notnull()]["final_report"]


2001    {'accuracy': 0.7591028108921981}
Name: final_report, dtype: object

In [65]:
# matches_df = events_df[events_df.type == "tagging"].reset_index(drop=True)
# matches_df = matches_df.join(pd.json_normalize(matches_df.data))
# matches_df.accuracy.value_counts().plot.bar(
#     title="Correctness of generated answers", xlabel="Correctness", ylabel="Count"
# )

In [66]:
for i, r in pd.json_normalize(events_df[events_df.type == "sampling"].data).iterrows():
    print(f"Prompt: {r.prompt[-1]}")
    print(f"Sampled: {r.sampled}")
    print(f"{i}", "--" * 25)


Prompt: {'role': 'user', 'content': 'سوريا : تعديل وزاري واسع يشمل 8 حقائب'}
Sampled: سوريا:NOUN
: :PUNCT
تعديل:NOUN
وزاري:ADJ
واسع:ADJ
يشمل:VERB
8:NUM
حقائب:NOUN
0 --------------------------------------------------
Prompt: {'role': 'user', 'content': 'و ذكرت وكالة الانباء السورية ان التعديل شمل ثماني حقائب بين ها وزارتا الداخلية و الاقتصاد .'}
Sampled: و:CCONJ
ذكرت:VERB
وكالة:NOUN
الانباء:NOUN
السورية:ADJ
ان:SCONJ
التعديل:NOUN
شمل:VERB
ثماني:NUM
حقائب:NOUN
بين:ADP
ها:PRON
وزارتا:NOUN
الداخلية:NOUN
و:CCONJ
الاقتصاد:NOUN
.:PUNCT
1 --------------------------------------------------
Prompt: {'role': 'user', 'content': 'دمشق ( وكالات الانباء ) - اجرى الرئيس السوري بشار الاسد تعديلا حكومياً واسعا تم ب موجب ه إقالة وزيري الداخلية و الإعلام عن منصبي ها في حين ظل محمد ناجي العطري رئيساً ل الحكومة .'}
Sampled: دمشق:NOUN
(:PUNCT
وكالات:NOUN
ال:DET
انباء:NOUN
):PUNCT
-:PUNCT
اجرى:VERB
ال:DET
رئيس:NOUN
ال:DET
سوري:ADJ
بشار:PROPN
الاسد:PROPN
تعديلا:NOUN
حكومياً:ADV
واسعا:ADJ
تم:VERB
ب:PUNCT
موجب:NO

In [67]:
for i, r in pd.json_normalize(events_df[events_df.type == "tagging"].data).iterrows():
    print(f"Prompt: {r.prompt[-1]}")
    print(f"Sampled: {r.sampled}")
    print(f"{i}", "--" * 25)

AttributeError: 'Series' object has no attribute 'prompt'

In [68]:
len(sorted([int(i.split('.')[-1]) for i in events_df[events_df.type == "tagging"]['sample_id']]))

680

In [69]:
for  j in sorted([int(i.split('.')[-1]) for i in events_df[events_df.type == "tagging"]['sample_id']]):
    print(j)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [51]:
tagging_events = events_df[events_df.type == "tagging"]
tagging_events[tagging_events['sample_id']=='padt.test.122'].data[408]

{'accuracy': 0.872549019607843,
 'sampled': 'يبدو:VERB\nأن:SCONJ\nالقدر:NOUN\nأراد:VERB\nأن:SCONJ\nيظل:VERB\nالطيار:NOUN\nالمصري:ADJ\nعلي:ADP\nمراد:NOUN\nرمزا:NOUN\nل:ADP\nعداء:NOUN\nالمصريين:NOUN\nل:ADP\nكل:DET\nما:PRON\nهو:PRON\nإسرائيلي:ADJ\n.:PUNCT\nف:SCONJ\nالضجة:NOUN\nالتي:PRON\nأثار:VERB\nها:PRON\nمراد:NOUN\nلم:NOUN\nتنته:VERB\nبعد:PUNCT\n،:PUNCT\nعلى:ADP\nرغم:ADP\nمرور:NOUN\nشهور:NOUN\nعدة:ADJ\nعلى:ADP\nواقعة:NOUN\nرفض:NOUN\nه:PRON\nقيام:NOUN\nجنود:NOUN\nإسرائيليين:ADJ\nب:ADP\nتفتيش:NOUN\nطائرة:NOUN\nتابعة:ADJ\nل:ADP\nشركة:NOUN\n»:PUNCT\nمصر:NOUN\nل:ADP\nالطيران:NOUN\n«:PUNCT\nحطّ:VERB\nب:ADP\nها:PRON\nفي:ADP\nمطار:NOUN\nغزة:NOUN\nفي:ADP\nايلول:NOUN\n(:PUNCT\nسبتمبر:NOUN\n):PUNCT\nمن:ADP\nالعام:NOUN\nالماضي:ADJ\n.:PUNCT\nو:CCONJ\nتحول:VERB\nالطيار:NOUN\nالى:ADP\nبطل:NOUN\nشعبي:ADJ\nبعدما:SCONJ\nأخضعت:VERB\nه:PRON\nشركة:NOUN\n»:PUNCT\nمصر:NOUN\nل:ADP\nالطيران:NOUN\n«:PUNCT\nل:ADP\nالتحقيق:NOUN\nو:CCONJ\nأوقفت:VERB\nه:PRON\nعن:ADP\nالعمل:NOUN\nثم:ADV\nأحالت:VERB\nه:PRON\nعلى:ADP\

In [53]:
tagging_events[tagging_events['sample_id']=='padt.test.122'].data[432]

{'accuracy': 0.833333333333333,
 'sampled': 'يبدو:VERB\nأن:SCONJ\nالقدر:NOUN\nأراد:VERB\nأن:SCONJ\nيظل:VERB\nالطيار:NOUN\nالمصري:ADJ\nعلي:ADP\nمراد:NOUN\nرمزا:NOUN\nل:ADP\nعداء:NOUN\nالمصريين:NOUN\nل:ADP\nكل:DET\nما:PRON\nهو:PRON\nإسرائيلي:ADJ\n.:PUNCT\nف:SCONJ\nالضجة:NOUN\nالتي:PRON\nأثار:VERB\nها:PRON\nمراد:NOUN\nلم:NOUN\nتنتهي:VERB\nبعد:PUNCT\n،:PUNCT\nعلى:ADP\nرغم:ADP\nمرور:NOUN\nشهور:NOUN\nعدة:ADJ\nعلى:ADP\nواقعة:NOUN\nرفض:NOUN\nه:PRON\nقيام:NOUN\nجنود:NOUN\nإسرائيليين:ADJ\nب:PUNCT\nتفتيش:NOUN\nطائرة:NOUN\nتابعة:ADJ\nل:ADP\nشركة:NOUN\n»:PUNCT\nمصر:NOUN\nل:PUNCT\nالطيران:NOUN\n«:PUNCT\nحطّ:VERB\nب:PUNCT\nها:PRON\nفي:ADP\nمطار:NOUN\nغزة:NOUN\nفي:ADP\nايلول:NOUN\n(:PUNCT\nسبتمبر:NOUN\n):PUNCT\nمن:ADP\nالعام:NOUN\nالماضي:ADJ\n.:PUNCT\nو:CCONJ\nتحول:VERB\nالطيار:NOUN\nالى:ADP\nبطل:NOUN\nشعبي:ADJ\nبعدما:SCONJ\nأخضعت:VERB\nه:PRON\nشركة:NOUN\n»:PUNCT\nمصر:NOUN\nل:PUNCT\nالطيران:NOUN\n«:PUNCT\nل:ADP\nالتحقيق:NOUN\nو:CCONJ\nأوقفت:VERB\nه:PRON\nعن:ADP\nالعمل:NOUN\nثم:ADV\nأحالت:VERB\nه:PRON\

In [52]:
tagging_events[tagging_events['sample_id']=='padt.test.122']

Unnamed: 0,spec,run_id,event_id,sample_id,type,data,created_by,created_at
408,,230512061210P6ZP52PS,407.0,padt.test.122,tagging,"{'accuracy': 0.872549019607843, 'sampled': 'يب...",,2023-05-12 06:26:37.071275+00:00
432,,230512061210P6ZP52PS,431.0,padt.test.122,tagging,"{'accuracy': 0.833333333333333, 'sampled': 'يب...",,2023-05-12 06:28:08.192224+00:00
462,,230512061210P6ZP52PS,461.0,padt.test.122,tagging,"{'accuracy': 0.8382352941176471, 'sampled': 'ي...",,2023-05-12 06:30:00.874356+00:00
480,,230512061210P6ZP52PS,479.0,padt.test.122,tagging,"{'accuracy': 0.848039215686274, 'sampled': 'يب...",,2023-05-12 06:31:37.306904+00:00
508,,230512061210P6ZP52PS,507.0,padt.test.122,tagging,"{'accuracy': 0.848039215686274, 'sampled': 'يب...",,2023-05-12 06:33:20.730777+00:00
...,...,...,...,...,...,...,...,...
4555,,230512061210P6ZP52PS,4554.0,padt.test.122,tagging,"{'accuracy': 0.8137254901960781, 'sampled': 'ي...",,2023-05-12 13:24:55.759384+00:00
4564,,230512061210P6ZP52PS,4563.0,padt.test.122,tagging,"{'accuracy': 0.8382352941176471, 'sampled': 'ي...",,2023-05-12 13:26:31.404151+00:00
4573,,230512061210P6ZP52PS,4572.0,padt.test.122,tagging,"{'accuracy': 0.848039215686274, 'sampled': 'يب...",,2023-05-12 13:28:12.170796+00:00
4583,,230512061210P6ZP52PS,4582.0,padt.test.122,tagging,"{'accuracy': 0.8284313725490191, 'sampled': 'ي...",,2023-05-12 13:30:03.797965+00:00
