In [1]:
import pickle
import nltk
import subprocess
import os
import re
import string
import json
import pandas as pd
import numpy as np
import ast
from pathlib import Path

In [2]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to /home/philliph/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

## Load IMDB training data and split each sentence into candidate prompts

In [3]:
def get_segments(sentence, label, example_index):
    tokens = nltk.word_tokenize(sentence)
    if len(tokens) < 6:    # skip sentences with less than 6 tokens
        out_df = None
    else:
        prompts = [' '.join(tokens[:i]) for i in range(4,len(tokens)+1)]
        continuations = [' ' + ' '.join(tokens[i:]) for i in range(4,len(tokens)+1)]
        
        out_df = pd.DataFrame({'example_index' : example_index, 'prompt' : prompts, 'continuation' : continuations, 'original' : sentence, 'label' : label})
    
    return out_df

In [None]:
urllib.request.urlretrieve("https://github.com/tapilab/aaai-2021-counterfactuals/raw/main/data/ds_imdb_sent.pkl", "data/ds_imdb_sent.pkl")

In [None]:
class Counterfactual:
    def __init__(self, df_train, df_test, moniker):
        display(df_train.head(1))
        self.moniker = moniker
        self.train = df_train
        self.test = df_test

In [4]:
with open('data/ds_imdb_sent.pkl', 'rb') as f:
    imdb_sents = pickle.load(f)

dataset = {'text' : imdb_sents.train.text.values, 'label' : imdb_sents.train.label.values}

positive, negative = [],[]
segment_df = []
counter = 0
for j in range(len(dataset['text'])):
    label = 0 if dataset['label'][j] == -1 else 4
    sentence = dataset['text'][j]
    segment_df.append(get_segments(sentence, label, j))

segment_df = pd.concat(segment_df).reset_index()

## Evaluate polarity of each candidate prompt using the self-explaining-NLP repository

#### Set paths to self-explaining-NLP repository and model files
You can download roberta-large from [here](https://huggingface.co/roberta-large/tree/main) and the checkpoints from [here](https://drive.google.com/drive/folders/1RV5OJSzN_7p-YkjkmAhq2vzhouZEtzSS?usp=sharing)

In [5]:
se_nlp_path = os.path.abspath('../Self_Explaining_Structures_Improve_NLP_Models')
roberta_large_path = os.path.join(se_nlp_path, "roberta-large")
checkpoint_path = os.path.join(se_nlp_path, "checkpoints/sst5_checkpoints/large/epoch\=6-valid_loss\=0.5880-valid_acc_end\=0.5909.ckpt")

#### Write candidate prompts for classifier

In [19]:
output_dir = os.path.join(se_nlp_path, 'imdb_sents_train')
Path(output_dir).mkdir(parents=True, exist_ok=True)

with open(os.path.join(output_dir, 'test.txt'), 'w') as fo:
    for i in range(segment_df.shape[0]):
        fo.write(str(segment_df.iloc[i]['label']) + '\t' + segment_df.iloc[i]['prompt'] + '\n')

#### Predict polarity of candidate prompts using the SST-5 classifier

In [None]:
output = subprocess.run(['predict_prompt_polarity.sh', se_nlp_path, roberta_large_path, checkpoint_path, output_dir], 
                        cwd='../Self_Explaining_Structures_Improve_NLP_Models', capture_output=True, shell=True)

if output.returncode == 0:
    print('Prompt predictions complete')
else:
    raise ValueError('The prompt prediction script exited with a non-zero status. Check output for additional details.')

#### Load prompt predictions and identify those which are neutral

In [6]:
output_file = os.path.join(se_nlp_path, "output.txt")
with open(output_file) as f:
    prompt_predictions = f.read().splitlines()

prompt_predictions = [i for i in prompt_predictions if '<->' in i]
segment_df['prediction'] = [i.split('<->')[1] for i in prompt_predictions]

neutral_prompts = segment_df[segment_df['prediction'] == '2']

## Identify concepts using CoCo-Ex

#### Set paths to the CoCo-Ex repository and the Stanford parser
You can download the Stanford parser [here](https://nlp.stanford.edu/software/stanford-parser-full-2018-10-17.zip)

In [7]:
coco_ex_path = os.path.abspath('../CoCo-Ex')
stanford_parser_path = os.path.join(coco_ex_path, "StanfordParser-3.9.2/stanford-parser-full-2018-10-17")

#### Output IMDB dataset to csv and run CoCo-Ex

In [6]:
coco_df = pd.DataFrame(dataset['text'])[0].str.replace('\n','')
coco_df.to_csv(os.path.join(coco_ex_path, 'imdb_sents_train.csv'), index=True, header=False, sep=';')

In [None]:
output = subprocess.run(['identify_concepts.sh', coco_ex_path, stanford_parser_path], 
                        capture_output=True, shell=True)

if output.returncode == 0:
    print('Concept identification complete')
else:
    raise ValueError('The concept identification script exited with a non-zero status. Check output for additional details.')

#### Read CoCo-Ex results

In [8]:
file_path = os.path.join(coco_ex_path, 'imdb_sents_train_filtered.tsv')
entities_filtered = pd.read_csv(file_path, sep='\t', header=None, error_bad_lines=False, warn_bad_lines=False, encoding="utf-8")
coco_concepts = []
coco_sents = []
index = []
for i in range(entities_filtered.shape[0]):
    if entities_filtered.iloc[i][3] is not np.nan:
        concepts = entities_filtered.iloc[i][3][1:-1].split('][')
        concepts = list(set([j.split('|')[0] for j in concepts if j.split('|')[1] == 'NP']))
        coco_concepts.append(concepts)
        coco_sents.append(entities_filtered.iloc[i][2])
        index.append(int(entities_filtered.iloc[i][0].replace('text:','')))
coco_df = pd.DataFrame({'sent' : coco_sents, 'concepts' : coco_concepts, 'example_index' : index})
regex = re.compile('[^a-zA-Z]')
coco_df['key'] = coco_df['sent'].apply(lambda x: regex.sub('', x.lower()))



  entities_filtered = pd.read_csv(file_path, sep='\t', header=None, error_bad_lines=False, warn_bad_lines=False, encoding="utf-8")


  entities_filtered = pd.read_csv(file_path, sep='\t', header=None, error_bad_lines=False, warn_bad_lines=False, encoding="utf-8")


## Prepare prompts and constraint sets

#### Filter neutral prompt continuations to only contain identified concepts

In [9]:
neutral_prompts = neutral_prompts.merge(coco_df, how='left', on = 'example_index')
neutral_prompts = neutral_prompts[~pd.isna(neutral_prompts['concepts'])]
neutral_prompts['original_continuation'] = neutral_prompts['continuation']
neutral_prompts['continuation'] = neutral_prompts.apply(lambda x: ' '.join(list(set([i for i in x['original_continuation'].split() if i.lower() in x['concepts']]))), axis = 1)

#### Remove tokens consisting only of punctuation to avoid empty constraints

In [10]:
neutral_prompts = neutral_prompts.assign(continuation = neutral_prompts['continuation'].apply(lambda x: ' '.join([i for i in x.split() if len(set(i).difference(set(string.punctuation))) > 0])),
                                         prompt = neutral_prompts['prompt'].apply(lambda x: ' '.join([i for i in x.split() if len(set(i).difference(set(string.punctuation))) > 0])))
neutral_prompts['constraints'] = neutral_prompts['continuation'].apply(lambda x: [[i.lower(), i.title()] for i in x.split() if not i.translate(str.maketrans('', '', string.punctuation)).isdigit() and len(''.join([j for j in i if j in string.printable])) > 0])
neutral_prompts = neutral_prompts[neutral_prompts['constraints'].str.len() > 0]

#### Filter neutral prompts

In [11]:
neutral_prompts['length'] = neutral_prompts['prompt'].str.len()
neutral_prompts = neutral_prompts[neutral_prompts['original'].str.len() < 256] # ensure the number of characters is less than NueroLogic's max_tgt_length parameter
neutral_prompts = neutral_prompts[~neutral_prompts['original'].isin(neutral_prompts[neutral_prompts['original'] == neutral_prompts['prompt']]['original'])]
neutral_prompts = neutral_prompts[neutral_prompts['original'].str.len() - neutral_prompts['length'] > 2]
neutral_prompts = neutral_prompts[neutral_prompts['prompt'].str.split().str.len() >= 4]
neutral_prompts = neutral_prompts.drop_duplicates('prompt')

# require continuation to be >= 1 token
neutral_prompts = neutral_prompts[neutral_prompts['continuation'].str.split().str.len() >= 1]

# keep only the longest neutral prompt
neutral_prompts = neutral_prompts.sort_values(['original','length'], ascending = False).drop_duplicates(['original'])

#### Create single-token prompts for NeuroCFs-1g

In [12]:
single_token_prompts = []
for j in set(range(len(dataset['text']))).intersection(coco_df['example_index']):
    label = dataset['label'][j]
    sentence = dataset['text'][j]
    tokens = nltk.word_tokenize(sentence)
    
    prompt = ''
    k = 0
    while prompt == '':
        prompt = tokens[k].translate(str.maketrans('', '', string.punctuation)).translate(str.maketrans('', '', string.digits)).title()
        k+=1
    continuation = ' ' + ' '.join(tokens[k:])
    
    # filter constraints to only identified concepts
    constraints = [[i.lower(), i.title()] for i in tokens[k:] if i.lower() in coco_df.loc[coco_df['example_index'] == j,'concepts'].values[0] and i not in set(string.punctuation) and not i.translate(str.maketrans('', '', string.punctuation)).isdigit() and len(''.join([j for j in i if j in string.printable])) > 0]
    
    if len(constraints) > 0:
        out_df = pd.DataFrame({'example_index' : j, 
                               'prompt' : prompt, 
                               'continuation' : continuation, 
                               'original' : sentence, 
                               'constraints' : str(constraints), 
                               'label' : 0 if label == -1 else 4}, index=[j])
        single_token_prompts.append(out_df)

single_token_prompts = pd.concat(single_token_prompts).reset_index()
single_token_prompts['constraints'] = single_token_prompts['constraints'].apply(lambda x: ast.literal_eval(x))

## Generate counterfactuals using NeuroLogic Decoding

#### Set path to the NeuroLogic Decoding repository

In [13]:
neurologic_path = os.path.abspath('../neurologic_decoding')
lm_steer_path = os.path.join(neurologic_path, 'models/experts/sentiment/large/')

#### Specify the type of NeuroCFs you want to generate

In [14]:
NeuroCFs_type = 'NeuroCFs-np'
# NeuroCFs_type = 'NeuroCFs-1g'

if NeuroCFs_type == 'NeuroCFs-np':
    prompts_df = neutral_prompts
elif NeuroCFs_type == 'NeuroCFs-1g':
    prompts_df = single_token_prompts

#### Write prompts and constraints sets for NeuroLogic Decoding

In [15]:
def write_prompts_neurologic(df, output_file):
    prompts = df['prompt']
    constraints = df['constraints']
    original = df['original']
    with open(output_file + '_prompts.txt', 'w') as f:
        for item in prompts:
            f.write("%s\n" % item)
    with open(output_file + '_constraints.json', 'w') as f:
        for item in constraints:
            json.dump(item, f)
            f.write("\n")
    with open(output_file + '_original.txt', 'w') as f:
        for item in original:
            f.write("%s\n" % item)

In [16]:
data_dir = os.path.join(neurologic_path, 'dataset/counterfactual/imdb_sents_train/', NeuroCFs_type)
Path(data_dir).mkdir(parents=True, exist_ok=True)

write_prompts_neurologic(prompts_df[prompts_df['label'] < 2], os.path.join(data_dir, 'negative'))
write_prompts_neurologic(prompts_df[prompts_df['label'] > 2], os.path.join(data_dir, 'positive'))

with open(os.path.join(data_dir, 'prompts_df.pkl'), 'wb') as f:
    pickle.dump(prompts_df, f)
prompts_df.to_csv(os.path.join(data_dir, 'prompts_df.csv'), index=False)

#### Run NeuroLogic Decoding

In [None]:
output_dir = os.path.join(neurologic_path, 'output', NeuroCFs_type)
Path(output_dir).mkdir(parents=True, exist_ok=True)

polarities = ['positive', 'negative']
for p in polarities:
    q = list(set(polarities).difference([p]))[0]
    prompt_file = os.path.join(data_dir, p + '_prompts.txt')
    constraint_file = os.path.join(data_dir, p + '_constraints.json')
    output_file = os.path.join(output_dir, p + "_to_" + q + ".txt")
    model_dir = os.path.join(lm_steer_path, 'finetuned_gpt2_' + q)
    
    output = subprocess.run(['run_neurologic.sh', neurologic_path, prompt_file, constraint_file, output_file, model_dir], 
                            capture_output=True, shell=True)

    if output.returncode == 0:
        print('Generation of ' + p + ' counterfactuals complete')
    else:
        raise ValueError('The NeuroLogic Decoding script exited with a non-zero status. Check output for additional details.')

## Load the generated NeuroCounterfactuals and calculate metrics

In [20]:
def load_neurologic_generations(prompts_file, constraints_file, generations_file, original=None, target=None):        
    # load prompts, constraints, and generations
    data = []; constraints = []; neuro_gens = []
    with open(prompts_file,'rb') as f:
        for line in f:
            data.append(line.decode("utf-8").replace('\n',''))
    with open(constraints_file,'rb') as f:
        for line in f:
            constraints.append(json.loads(line))
    skip_next = False
    with open(generations_file,'rb') as f:
        for line in f:
            text = line.decode("utf-8").replace('\n','').strip()
            if skip_next:
                skip_next = False
                continue
            elif len(text) == 0:
                skip_next = True
            else:
                neuro_gens.append(text)
            
    # create data frame
    generations = pd.DataFrame({'prompt' : data, 'counterfactual' : neuro_gens, 'constraints' : constraints})
    generations['original_ending'] = generations['constraints'].apply(lambda x: ' '.join([j[0] for j in x]))
    generations['original'] = generations['prompt'] + ' ' + generations['original_ending']
    
    if 'positive_prompts' in prompts_file:
        generations['original_label'] = 'POSITIVE'
        generations['target_label'] = 'NEGATIVE'
    elif 'negative_prompts' in prompts_file:
        generations['original_label'] = 'NEGATIVE'
        generations['target_label'] = 'POSITIVE'
    
    return generations

In [23]:
generations = []
for p in polarities:
    q = list(set(polarities).difference([p]))[0]
    gens = load_neurologic_generations(prompts_file = os.path.join(data_dir, p + '_prompts.txt'), 
                                       constraints_file = os.path.join(data_dir, p + '_constraints.json'), 
                                       generations_file = os.path.join(output_dir, p + '_to_' + q + '.txt'))
    generations.append(gens)

#### Save NeuroCounterfactuals

In [25]:
counterfactuals = pd.concat(generations)
counterfactuals = counterfactuals.drop_duplicates('counterfactual')
counterfactuals['label'] = counterfactuals['target_label'].apply(lambda x: 1 if x == 'POSITIVE' else -1)

In [28]:
ctf_output_dir = os.path.abspath(os.path.join('output', NeuroCFs_type))
Path(ctf_output_dir).mkdir(parents=True, exist_ok=True)

with open(os.path.join(ctf_output_dir, 'counterfactuals.pkl'), 'wb') as f:
    pickle.dump(counterfactuals, f)