In [1]:
import os
import re
import json
import string
import pandas as pd

In [2]:
def postprocess_response(response):
    if "\n\n" in response:
        response=response.split("\n\n")[0]
    if "Q:" in response:
        response=response.split("Q:")[1]
    
    # extract the first word
    response = response.split(" ")[0]
    
    # remove punctuations in response
    response = response.translate(str.maketrans('', '', string.punctuation))

    # strip leading and trailing whitespaces
    response = response.strip()

    # lowercase
    response = response.lower()

    return response

def compute_exact_match(predictions, references):
    N_total=0
    N_correct=0
    for pred, ref in zip(predictions, references):
        if postprocess_response(pred) == postprocess_response(ref):
            
            N_correct+=1
        N_total+=1
    return N_correct/N_total * 100

In [3]:
def get_exact_match_from_json(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)
    predictions = [d['response'] for d in data]
    references = [d['target'] for d in data]
    return compute_exact_match(predictions, references)

In [17]:
n_shot=0
df=pd.DataFrame(columns=["n_errors", "add", "del", "sub", "jux", "mix"])
for i, n_errors in enumerate([0, 1, 2, 4, 8]):
    if i==0:
        em=get_exact_match_from_json(f"responses/mistralai/Mistral-7B-v0.1/mmlu_{n_shot}_shot_unperturbed.json")
        df.loc[i]=[n_errors]+[em]*5
    else:
        ems=[
            get_exact_match_from_json(f"responses/mistralai/Mistral-7B-v0.1/mmlu_{n_shot}_shot_grammatical_{error_type}_{n_errors}_errors.json")
            for error_type in ["add", "del", "sub", "jux", "mix"]
        ]
        df.loc[i]=[n_errors]+ems
# make n_errors column as type int
df["n_errors"]=df["n_errors"].astype(int)
df.set_index("n_errors", inplace=True)
df

Unnamed: 0_level_0,add,del,sub,jux,mix
n_errors,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,16.536106,16.536106,16.536106,16.536106,16.536106
1,11.572426,12.840051,12.996724,11.373024,12.142145
2,8.965959,11.038314,10.888762,8.617006,10.205099
4,6.651474,8.6811,8.638371,6.174334,7.335137
8,4.764279,6.024783,5.996297,4.087737,4.970802


In [18]:
n_shot=1
df=pd.DataFrame(columns=["n_errors", "add", "del", "sub", "jux", "mix"])
for i, n_errors in enumerate([0, 1, 2, 4, 8]):
    if i==0:
        em=get_exact_match_from_json(f"responses/mistralai/Mistral-7B-v0.1/mmlu_{n_shot}_shot_unperturbed.json")
        df.loc[i]=[n_errors]+[em]*5
    else:
        ems=[
            get_exact_match_from_json(f"responses/mistralai/Mistral-7B-v0.1/mmlu_{n_shot}_shot_grammatical_{error_type}_{n_errors}_errors.json")
            for error_type in ["add", "del", "sub", "jux", "mix"]
        ]
        df.loc[i]=[n_errors]+ems
# make n_errors column as type int
df["n_errors"]=df["n_errors"].astype(int)
df.set_index("n_errors", inplace=True)
df

Unnamed: 0_level_0,add,del,sub,jux,mix
n_errors,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,55.768409,55.768409,55.768409,55.768409,55.768409
1,55.10611,54.920951,54.764279,54.906708,54.863979
2,54.144709,54.329868,54.408204,53.788634,54.137587
4,53.047999,53.389831,52.926933,52.321607,52.869962
8,51.018373,50.797607,50.833215,49.166785,50.299103


In [19]:
n_shot=2
df=pd.DataFrame(columns=["n_errors", "add", "del", "sub", "jux", "mix"])
for i, n_errors in enumerate([0, 1, 2, 4, 8]):
    if i==0:
        em=get_exact_match_from_json(f"responses/mistralai/Mistral-7B-v0.1/mmlu_{n_shot}_shot_unperturbed.json")
        df.loc[i]=[n_errors]+[em]*5
    else:
        ems=[
            get_exact_match_from_json(f"responses/mistralai/Mistral-7B-v0.1/mmlu_{n_shot}_shot_grammatical_{error_type}_{n_errors}_errors.json")
            for error_type in ["add", "del", "sub", "jux", "mix"]
        ]
        df.loc[i]=[n_errors]+ems
# make n_errors column as type int
df["n_errors"]=df["n_errors"].astype(int)
df.set_index("n_errors", inplace=True)
df

Unnamed: 0_level_0,add,del,sub,jux,mix
n_errors,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,57.335137,57.335137,57.335137,57.335137,57.335137
1,56.274035,56.259792,56.067512,56.103119,56.288278
2,55.654465,55.483549,55.618858,55.013531,55.618858
4,54.10198,53.852728,54.130466,53.190429,53.923942
8,51.645065,52.058111,51.609457,49.971514,51.402934


In [20]:
n_shot=3
df=pd.DataFrame(columns=["n_errors", "add", "del", "sub", "jux", "mix"])
for i, n_errors in enumerate([0, 1, 2, 4, 8]):
    if i==0:
        em=get_exact_match_from_json(f"responses/mistralai/Mistral-7B-v0.1/mmlu_{n_shot}_shot_unperturbed.json")
        df.loc[i]=[n_errors]+[em]*5
    else:
        ems=[
            get_exact_match_from_json(f"responses/mistralai/Mistral-7B-v0.1/mmlu_{n_shot}_shot_grammatical_{error_type}_{n_errors}_errors.json")
            for error_type in ["add", "del", "sub", "jux", "mix"]
        ]
        df.loc[i]=[n_errors]+ems
# make n_errors column as type int
df["n_errors"]=df["n_errors"].astype(int)
df.set_index("n_errors", inplace=True)
df

Unnamed: 0_level_0,add,del,sub,jux,mix
n_errors,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,57.72682,57.72682,57.72682,57.72682,57.72682
1,56.053269,56.480558,56.003418,55.832502,56.380857
2,55.348241,55.554764,55.39097,55.10611,55.184447
4,54.144709,54.828372,54.0094,53.19755,53.774391
8,51.609457,51.773252,51.680672,49.480131,51.053981


In [21]:
n_shot=4
df=pd.DataFrame(columns=["n_errors", "add", "del", "sub", "jux", "mix"])
for i, n_errors in enumerate([0, 1, 2, 4, 8]):
    if i==0:
        em=get_exact_match_from_json(f"responses/mistralai/Mistral-7B-v0.1/mmlu_{n_shot}_shot_unperturbed.json")
        df.loc[i]=[n_errors]+[em]*5
    else:
        ems=[
            get_exact_match_from_json(f"responses/mistralai/Mistral-7B-v0.1/mmlu_{n_shot}_shot_grammatical_{error_type}_{n_errors}_errors.json")
            for error_type in ["add", "del", "sub", "jux", "mix"]
        ]
        df.loc[i]=[n_errors]+ems
# make n_errors column as type int
df["n_errors"]=df["n_errors"].astype(int)
df.set_index("n_errors", inplace=True)
df

Unnamed: 0_level_0,add,del,sub,jux,mix
n_errors,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,58.211081,58.211081,58.211081,58.211081,58.211081
1,56.217063,56.387979,56.551773,56.15297,56.387979
2,56.024783,55.67583,55.469306,55.227176,55.469306
4,54.137587,54.29426,54.522148,53.090728,54.073494
8,52.157812,52.485401,51.894317,49.943028,51.303233


In [22]:
n_shot=5
df=pd.DataFrame(columns=["n_errors", "add", "del", "sub", "jux", "mix"])
for i, n_errors in enumerate([0, 1, 2, 4, 8]):
    if i==0:
        em=get_exact_match_from_json(f"responses/mistralai/Mistral-7B-v0.1/mmlu_{n_shot}_shot_unperturbed.json")
        df.loc[i]=[n_errors]+[em]*5
    else:
        ems=[
            get_exact_match_from_json(f"responses/mistralai/Mistral-7B-v0.1/mmlu_{n_shot}_shot_grammatical_{error_type}_{n_errors}_errors.json")
            for error_type in ["add", "del", "sub", "jux", "mix"]
        ]
        df.loc[i]=[n_errors]+ems
# make n_errors column as type int
df["n_errors"]=df["n_errors"].astype(int)
df.set_index("n_errors", inplace=True)
df

Unnamed: 0_level_0,add,del,sub,jux,mix
n_errors,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,57.933343,57.933343,57.933343,57.933343,57.933343
1,56.366614,56.34525,56.494801,56.480558,56.452072
2,55.846745,55.896596,55.640222,55.362484,55.789774
4,54.493662,54.223045,54.379718,53.062242,54.123344
8,52.122205,52.314485,51.880074,50.135308,51.652186
