In [None]:
%%capture

%cd /content
from os import environ

ON_CLOUD = True

if ON_CLOUD:
    from getpass import getpass
    from urllib.parse import quote
    ! if [ -d mice ]; then rm -Rf mice; fi
    ! git clone https://$GITHUB_TOKEN@github.com/Ali1858/mice.git
    %cd mice
else:
    %cd ..

In [None]:
%%capture

! pip3 install -r requirements.txt

In [None]:
import numpy as np
import pandas as pd

In [None]:
def read_edits(path):
    edits = pd.read_csv(path, sep="\t", lineterminator="\n", error_bad_lines=False, warn_bad_lines=True)

    if edits['new_pred'].dtype == pd.np.dtype('float64'):
        edits['new_pred'] = edits.apply(lambda row: str(int(row['new_pred']) if not np.isnan(row['new_pred']) else ""), axis=1)
        edits['orig_pred'] = edits.apply(lambda row: str(int(row['orig_pred']) if not np.isnan(row['orig_pred']) else ""), axis=1)
        edits['contrast_pred'] = edits.apply(lambda row: str(int(row['contrast_pred']) if not np.isnan(row['contrast_pred']) else ""), axis=1)
    else:
        edits['new_pred'].fillna(value="", inplace=True)
        edits['orig_pred'].fillna(value="", inplace=True)
        edits['contrast_pred'].fillna(value="", inplace=True)
    return edits

In [None]:
def get_best_edits(edits):
    """ MiCE writes all edits that are found in Stage 2, 
    but we only want to evaluate the smallest per input. 
    Calling get_sorted_e() """
    return edits[edits['sorted_idx'] == 0]
    
def evaluate_edits(edits):
    temp = edits[edits['sorted_idx'] == 0]
    minim = temp['minimality'].mean()
    flipped = temp[temp['new_pred'].astype(str)==temp['contrast_pred'].astype(str)]
    nunique = temp['data_idx'].nunique()
    flip_rate = len(flipped)/nunique
    duration=temp['duration'].mean()
    metrics = {
        "num_total": nunique,
        "num_flipped": len(flipped),
        "flip_rate": flip_rate,
        "minimality": minim,
        "duration": duration,
    }
    for k, v in metrics.items():
        print(f"{k}: \t{round(v, 3)}")
    return metrics

In [None]:
def display_edits(row):
    # html_original, html_edited = html_highlight_diffs(row['orig_editable_seg'], row['edited_editable_seg'])
    minim = round(row['minimality'], 3)
    print(f"MINIMALITY: \t{minim}")
    print("")
    print(f"original sentence: {row['orig_editable_seg']}")
    print(f"edited sentence: {row['edited_editable_seg']}")

def display_classif_results(rows):
    for _, row in rows.iterrows():
        orig_contrast_prob_pred = round(row['orig_contrast_prob_pred'], 3)
        new_contrast_prob_pred = round(row['new_contrast_prob_pred'], 3)
        print("-----------------------")
        print(f"ORIG LABEL: \t{row['orig_pred']}")
        print(f"CONTR LABEL: \t{row['contrast_pred']} (Orig Pred Prob: {orig_contrast_prob_pred})")
        print(f"NEW LABEL: \t{row['new_pred']} (New Pred Prob: {new_contrast_prob_pred})")
        print("")
        display_edits(row)
        break


## Stats for SOC approach

In [None]:
import numpy as np
import pandas as pd

csv_path = "" #path for csv edits for the soc approach

edits1 = read_edits(csv_path)
print(edits1.shape)
edits1 = get_best_edits(edits1)
print(edits1.shape)
metrics = evaluate_edits(edits1)

(253, 16)
(40, 16)
num_total: 	40
num_flipped: 	31
flip_rate: 	0.775
minimality: 	2.447
duration: 	1031.325


  after removing the cwd from sys.path.


In [None]:


csv_path = ""
edits1 = read_edits(csv_path)
edits1 = get_best_edits(edits1)
random_rows = edits1.sample(1) # random choose one text
display_classif_results(random_rows)
sent = random_rows["orig_editable_seg"].values[0]


-----------------------
ORIG LABEL: 	misc
CONTR LABEL: 	comp (Orig Pred Prob: 0.089)
NEW LABEL: 	comp (New Pred Prob: 0.698)

MINIMALITY: 	0.378

original sentence: For sale: 030 direct slot adapter card for the mac iisi (with a mac coprocessor on it as well). If this what you are looking for. Let me know, email an offer as well if you are interested. 
edited sentence:  I have sale:  A direct -order adapter card  to my mac iisi , can I install a mac coprocessor on it as  well as you are  looking for.  Let me  know, email us as well if you are interested.  


  after removing the cwd from sys.path.


## Grad based approach

In [None]:
csv_path = "" #path for csv edits for the grad approach
edits1 = read_edits(csv_path)
print(edits1.shape)
edits1 = get_best_edits(edits1)
edits1 = edits1[edits1['orig_editable_seg'].notna()]
print(edits1.shape)
metrics = evaluate_edits(edits1)

(1860, 16)
(40, 16)
num_total: 	40
num_flipped: 	39
flip_rate: 	0.975
minimality: 	0.636
duration: 	64.118


  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


In [None]:



csv_path = "" 
print("imdbgrad")
edits1 = read_edits(csv_path)
edits1 = get_best_edits(edits1)

edits1 = edits1[edits1["orig_editable_seg"] == sent] #Getting similar sentence as SOC approach
display_classif_results(edits1)


imdbgrad
-----------------------
ORIG LABEL: 	misc
CONTR LABEL: 	comp (Orig Pred Prob: 0.089)
NEW LABEL: 	comp (New Pred Prob: 0.827)

MINIMALITY: 	0.222

original sentence: For sale: 030 direct slot adapter card for the mac iisi (with a mac coprocessor on it as well). If this what you are looking for. Let me know, email an offer as well if you are interested. 
edited sentence: For  the input:  Default for the  image iisi (with a v1or on it as well).  If this what you are looking for.  Let me know, email an offer as well if you are interested.  


  after removing the cwd from sys.path.
