# Stance Classification for wtwt
- read in the data and pre-process
- set up an LLM
- Investigate different prompting schemes
    - just context
    - context + few-shot learning
    - content + few-shot learning + reasoning

In [1]:
# Package installations to work on WIRE

! pip install transformers
! pip install langchain
! pip install accelerate
! pip install einops
! pip install deepspeed

Collecting transformers
  Using cached transformers-4.33.1-py3-none-any.whl (7.6 MB)
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)
  Using cached huggingface_hub-0.17.1-py3-none-any.whl (294 kB)
Collecting regex!=2019.12.17 (from transformers)
  Using cached regex-2023.8.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (771 kB)
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Using cached tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
Collecting safetensors>=0.3.1 (from transformers)
  Using cached safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
Installing collected packages: tokenizers, safetensors, regex, huggingface-hub, transformers
Successfully installed huggingface-hub-0.17.1 regex-2023.8.8 safetensors-0.3.3 tokenizers-0.13.3 transformers-4.33.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m

In [2]:
import os, re, pandas as pd, numpy as np, ast, json
from pprint import pprint
from tqdm import tqdm  

import torch
from langchain import PromptTemplate, FewShotPromptTemplate, HuggingFacePipeline, LLMChain
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import deepspeed

from sklearn.metrics import classification_report

from matplotlib import pyplot as plt
import seaborn as sns

2023-09-12 12:27:51.399661: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


[2023-09-12 12:27:53,700] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)


# 1. Import and Preprocess Data

In [43]:
file = os.path.join("wtwt","data_merged.csv")

In [44]:
df = pd.read_csv(file)

In [48]:
df['event'] = df['event'].replace({  
    'CVS_AET': 'CVS Health buying Aetna',  
    'CI_ESRX': 'Cigna buying Express Scripts',  
    'ANTM_CI': 'Anthem buying Cigna',  
    'AET_HUM': 'Aetna buying Humana',  
    'FOXA_DIS': 'Disney buying 21st Century Fox'  
}) 

In [51]:
df.head()

Unnamed: 0,tweet_id,stance,event,full_text
0,971761970117357568,agree,Cigna buying Express Scripts,Cigna and ESI set to merge. Here we go...
1,950934259371520000,neutral,Cigna buying Express Scripts,Express Scripts Closes Acquisition Of eviCore;...
2,973718376496357376,neutral,Cigna buying Express Scripts,RT @Arsyjgot7: เรื่องดีๆของวันนี้คืออัลบั้ม #E...
3,996772902006599680,agree,Cigna buying Express Scripts,Here's the just-released 400+ page merger prox...
4,979498827966279680,neutral,Cigna buying Express Scripts,Watch UNH CVS AET CI ESRX MOH ANTM on the $HUM...


In [52]:
df.shape

(32409, 4)

# 2. Connect to LLM
- for standing up a smaller huggingface model
```python
llm = HuggingFacePipeline.from_model_id(model_id="declare-lab/flan-alpaca-gpt4-xl", task = 'text2text-generation', device=0,
                                      model_kwargs={"max_length":500, "do_sample":False})
```
- for a mid-sized, more modern, huggingface model. You can use accelerate and chance ``` device = "auto"``` to use multiple GPUs
```python
model = "tiiuae/falcon-7b-instruct"

tokenizer = AutoTokenizer.from_pretrained(model)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map=0,
    max_length=200,
    do_sample=False,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
)

llm = HuggingFacePipeline(pipeline=pipe)
```


In [53]:
model = "declare-lab/flan-alpaca-gpt4-xl"

tokenizer = AutoTokenizer.from_pretrained(model)

pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    trust_remote_code=True,
    device_map=0,
    max_length=1000,
    do_sample=False,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)

llm = HuggingFacePipeline(pipeline=pipe)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

look at a few examples

In [20]:
question = '''What is the stance of of the following social media post toward the U.S. Army. Give the stance as either 'FOR', "AGAINST', or 'NEUTRAL'. Only return the stance:
"@vondeveen If the Army wants to actually recruit people, maybe stop breaking people and actually prosecute sexual assualt #nomorewar."
'''
llm(question)

'AGAINST'

In [21]:
question = '''What is the stance of of the following social media post toward the U.S. Army. Give the stance as either 'FOR', "AGAINST', or 'NEUTRAL'. Only return the stance:
"@artfulask I have never seen a pink-eared duck before. #Army"
'''
llm(question)

'NEUTRAL'

In [22]:
question = '''What is the stance of of the following social media post toward the U.S. Army. Give the stance as either 'FOR', "AGAINST', or 'NEUTRAL'. Only return the stance:
"I think the @Army helped me become disciplined. I would have surely flunked out of college chasing tail if I didn't get some discipline there. #SFL"
'''
llm(question)

'FOR'

# 3. Prompt Engineering for Stance Classification

## 3(a). Task-only prompt

In [133]:
# task-only prompt

task_template = '''
Classify the statement as to whether it is 'FOR', 'AGAINST', 'NEUTRAL', or 'UNRELATED'. Only return the classification label for the statement, and no other text.

statement: {statement}
'''

task_prompt = PromptTemplate(
    input_variables=["statement"],
    template=task_template
)

### Run an example

In [134]:
event = df['event'][0]
statements = df[df['event'] == event]['full_text'][0:20].tolist()
y_true = df[df['event'] == event]['stance'][0:20].tolist()

In [135]:
llm_chain = LLMChain(prompt=task_prompt, llm=llm)

results=[]
for statement in statements:
    results.append(llm_chain.run(statement=statement))



In [136]:
# converting to given labels for comparison

y_true = [str.lower(i) for i in y_true]
y_pred = []  
  
for word in results:  
    lower_word = word.lower()  
    if 'for' in lower_word:  
        y_pred.append('agree')  
    elif 'against' in lower_word:  
        y_pred.append('disagree')  
    elif 'neutral' in lower_word or 'unrelated' in lower_word:  
        y_pred.append('neutral')  
    else:  
        y_pred.append('neutral')

# generating the classification report
report = classification_report(y_true, y_pred)

print(report)

              precision    recall  f1-score   support

       agree       0.20      0.33      0.25         3
    disagree       0.00      0.00      0.00         0
     neutral       0.85      0.65      0.73        17

    accuracy                           0.60        20
   macro avg       0.35      0.33      0.33        20
weighted avg       0.75      0.60      0.66        20



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Run on all Data

In [137]:
# Running across the whole dataset

results = []
for index, row in tqdm(df.iterrows()):
    results.append(llm_chain.run(event=row['event'], statement=row['full_text']))

32409it [1:15:44,  7.13it/s]


In [138]:
np.unique(results, return_counts=True)

array(["'AGAINST'", "'FOR'", "'FOR' and 'AGAINST'.", "'NEUTRAL'",
       "'UNRELATED'", 'AGAINST', 'FOR', 'NEUTRAL', 'UNRELATED'],
      dtype='<U20')

In [139]:
y_pred = []  
  
for word in results:  
    lower_word = word.lower()  
    if 'for' in lower_word:  
        y_pred.append('agree')  
    elif 'against' in lower_word:  
        y_pred.append('disagree')  
    elif 'comment' in lower_word or 'unrelated' in lower_word:  
        y_pred.append('neutral')  
    else:  
        y_pred.append('neutral')
        
df['task_preds'] = y_pred

In [140]:
np.unique(df['task_preds'], return_counts=True)

(array(['agree', 'disagree', 'neutral'], dtype=object),
 array([ 4711,  9058, 18640]))

In [141]:
df[df['stance'].isna()]

Unnamed: 0,tweet_id,stance,event,full_text,task_preds


In [142]:
report = classification_report(df['stance'], df['task_preds'])

print(report)

              precision    recall  f1-score   support

       agree       0.21      0.23      0.22      4237
    disagree       0.27      0.78      0.40      3170
     neutral       0.81      0.60      0.69     25002

    accuracy                           0.57     32409
   macro avg       0.43      0.54      0.44     32409
weighted avg       0.68      0.57      0.60     32409



## 3(b). Context prompt

In [143]:
# context prompt

context_template = '''
The following statement is a social media post that may be commenting on a corporate merger. Classify the statement as to whether it is 'FOR', 'AGAINST', 'NEUTRAL', or 'UNRELATED' to the event "{event}" happening. Only return the classification label for the statement, and no other text.

statement: {statement}
'''

context_prompt = PromptTemplate(
    input_variables=["event","statement"],
    template=context_template
)

### Run an example

In [144]:
event = df['event'][0]
statements = df[df['event'] == event]['full_text'][0:20].tolist()
y_true = df[df['event'] == event]['stance'][0:20].tolist()

In [145]:
llm_chain = LLMChain(prompt=context_prompt, llm=llm)

results=[]
for statement in statements:
    results.append(llm_chain.run(event=event, statement=statement))



In [146]:
# converting to given labels for comparison

y_true = [str.lower(i) for i in y_true]
y_pred = []  
  
for word in results:  
    lower_word = word.lower()  
    if 'for' in lower_word:  
        y_pred.append('agree')  
    elif 'against' in lower_word:  
        y_pred.append('disagree')  
    elif 'neutral' in lower_word or 'unrelated' in lower_word:  
        y_pred.append('neutral')  
    else:  
        y_pred.append('neutral')

# generating the classification report  
report = classification_report(y_true, y_pred)  
  
print(report)  

              precision    recall  f1-score   support

       agree       0.00      0.00      0.00         3
    disagree       0.00      0.00      0.00         0
     neutral       0.75      0.53      0.62        17

    accuracy                           0.45        20
   macro avg       0.25      0.18      0.21        20
weighted avg       0.64      0.45      0.53        20



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Run on all Data

In [147]:
# Running across the whole dataset

results = []
for index, row in tqdm(df.iterrows()):
    results.append(llm_chain.run(event=row['event'], statement=row['full_text']))

32409it [1:17:36,  6.96it/s]


In [148]:
np.unique(results, return_counts=True)

array(["'AGAINST'", "'FOR'", "'FOR' and 'AGAINST'.", "'NEUTRAL'",
       "'UNRELATED'", 'AGAINST', 'FOR', 'NEUTRAL', 'UNRELATED'],
      dtype='<U20')

In [149]:
y_pred = []  
  
for word in results:  
    lower_word = word.lower()  
    if 'for' in lower_word:  
        y_pred.append('agree')  
    elif 'against' in lower_word:  
        y_pred.append('disagree')  
    elif 'neutral' in lower_word or 'unrelated' in lower_word:  
        y_pred.append('neutral')  
    else:  
        y_pred.append('neutral')
        
df['context_preds'] = y_pred

In [150]:
np.unique(df['context_preds'], return_counts=True)

(array(['agree', 'disagree', 'neutral'], dtype=object),
 array([ 3231,  8978, 20200]))

In [151]:
report = classification_report(df['stance'], df['context_preds'])

print(report)

              precision    recall  f1-score   support

       agree       0.28      0.21      0.24      4237
    disagree       0.29      0.82      0.43      3170
     neutral       0.82      0.66      0.73     25002

    accuracy                           0.62     32409
   macro avg       0.46      0.56      0.47     32409
weighted avg       0.70      0.62      0.64     32409



## 3(c). Context + FSP

In [152]:
# Create an example template

example_template = '''
event: {event}
statement: {statement}
stance: {stance}
'''

example_prompt = PromptTemplate(
    input_variables=["event","statement", "stance"],
    template=example_template
)

In [153]:
# Give some examples

examples = [
    {'event':"Aetna buying Humana",
     'statement':'''Talk of Aetna, Anthem acquisition moves reaches fever pitch http://t.co/lxjHa7eXWh''',
     'stance': 'NEUTRAL'},
    {'event':"Anthem buying Cigna",
     'statement':'''Sr. Mktg Advisor Aetna may acquire Humana or Cigna http://t.co/lTjwFi0Y8O #marketing #strategy''',
     'stance': 'FOR'},
    {'event':"Cigna buying Express Scripts",
     'statement':'''Following the lead of CVS's $CVS acquisition of Aetna $AET and Cigna's $CI acquisition of Express Scripts Holding $ESRX, Walmart $WMT and Humana Inc $HUM are in preliminary talks focusing on possible partnership, or Walmart acquisition of Humana https://t.co/H2FWYwFJYz''',
     'stance': 'NEUTRAL'},
    {'event':"CVS Health buying Aetna",
     'statement':'''@IngrahamAngle @realDonaldTrump He needs to block @cvshealth acquisition of @Aetna, and @Cigna acquisition of @ExpressScripts.  Patients will suffer.''',
     'stance': 'AGAINST'},
    {'event':"Disney buying 21st Century Fox",
     'statement':'''Finally saw #Fant4stic \nImmediately regretted it.''',
     'stance': 'UNRELATED'},
]

In [154]:
prefix = """
The following statements are social media posts that may be commenting on a corporate merger. The statements can either be 'FOR', 'AGAINST', 'NEUTRAL', or 'UNRELATED' toward the corporate merger happening.
"""

suffix = '''
Now, classify the following statement as to whether it is 'FOR', 'AGAINST','NEUTRAL', or 'UNRELATED' toward "{event}" happening. Only return the classification label for the statement, and no other text.

statement: {statement}
'''

few_shot_prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["event", "statement"],
    example_separator="\n"
)

### Run an example

In [155]:
event = df['event'][0]
statements = df[df['event'] == event]['full_text'][0:20].tolist()
y_true = df[df['event'] == event]['stance'][0:20].tolist()

In [156]:
llm_chain = LLMChain(prompt=few_shot_prompt, llm=llm)

results=[]
for statement in statements:
    results.append(llm_chain.run(event=event, statement=statement))



In [157]:
# converting to given labels for comparison

y_true = [str.lower(i) for i in y_true]
y_pred = []  
  
for word in results:  
    lower_word = word.lower()  
    if 'for' in lower_word:  
        y_pred.append('agree')  
    elif 'against' in lower_word:  
        y_pred.append('disagree')  
    elif 'neutral' in lower_word or 'unrelated' in lower_word:  
        y_pred.append('neutral')  
    else:  
        y_pred.append('neutral')

# generating the classification report  
report = classification_report(y_true, y_pred)  
  
print(report)  

              precision    recall  f1-score   support

       agree       0.29      0.67      0.40         3
    disagree       0.00      0.00      0.00         0
     neutral       0.91      0.59      0.71        17

    accuracy                           0.60        20
   macro avg       0.40      0.42      0.37        20
weighted avg       0.82      0.60      0.67        20



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Run on all Data

In [None]:
# Running across the whole dataset

results = []
for index, row in tqdm(df.iterrows()):
    results.append(llm_chain.run(event=row['event'], statement=row['full_text']))

18992it [1:09:45,  4.65it/s]

In [None]:
np.unique(results, return_counts=True)

In [None]:
y_pred = []  
  
for word in results:  
    lower_word = word.lower()  
    if 'for' in lower_word:  
        y_pred.append('agree')  
    elif 'against' in lower_word:  
        y_pred.append('disagree')  
    elif 'neutral' in lower_word or 'unrelated' in lower_word:  
        y_pred.append('neutral')  
    else:  
        y_pred.append('neutral')
        
df['fsp_preds'] = y_pred

In [None]:
np.unique(df['fsp_preds'], return_counts=True)

In [None]:
report = classification_report(df['stance'], df['fsp_preds'])

print(report)

## 3(d). Context + FSP + Reasoning

In [None]:
# Create an example template

example_and_reason_template = '''
event: {event}
statement: {statement}
stance: {stance}
reason: {reason}
'''

example_and_reason_prompt = PromptTemplate(
    input_variables=["event","statement", "stance", "reason"],
    template=example_and_reason_template
)

In [None]:
# Give some examples

examples = [
    {'event':"Aetna buying Humana",
     'statement':'''Talk of Aetna, Anthem acquisition moves reaches fever pitch http://t.co/lxjHa7eXWh''',
     'stance': 'NEUITRAL',
     'reason': "The statement is just a comment about the merger and so does not express a stance as for or against the merger."
    },
    {'event':"Anthem buying Cigna",
     'statement':'''Sr. Mktg Advisor Aetna may acquire Humana or Cigna http://t.co/lTjwFi0Y8O #marketing #strategy''',
     'stance': 'FOR',
     'reason': 'The statement mentions that Aetna may acquire Cigna, which supports the merger happening.'
    },
    {'event':"Cigna buying Express Scripts",
     'statement':'''Following the lead of CVS's $CVS acquisition of Aetna $AET and Cigna's $CI acquisition of Express Scripts Holding $ESRX, Walmart $WMT and Humana Inc $HUM are in preliminary talks focusing on possible partnership, or Walmart acquisition of Humana https://t.co/H2FWYwFJYz''',
     'stance': 'NEUTRAL',
     'reason': 'The statement mentions a number of possible mergers, and so is just a neutral comment on Cigna buying Express Scripts.'
    },
    {'event':"CVS Health buying Aetna",
     'statement':'''@IngrahamAngle @realDonaldTrump He needs to block @cvshealth acquisition of @Aetna, and @Cigna acquisition of @ExpressScripts.  Patients will suffer.''',
     'stance': 'AGAINST',
     'reason': 'the statement is asking prominent political figures to block CVS Health buying Aetna over concerns for patients, thus the statement is against the merger.'
    },
    {'event':"Disney buying 21st Century Fox",
     'statement':'''Finally saw #Fant4stic \nImmediately regretted it.''',
     'stance': 'UNRELATED',
     'reason': 'The statement mentioned a Disney film but does not comment on Disney buying 21st Century Fox, so it is unrelated to the merger.'
    },
]

In [None]:
prefix = """
The following statements are social media posts that may be commenting on a corporate merger. The statements can either be 'FOR', 'AGAINST', 'NEUTRAL', or 'UNRELATED' toward the corporate merger happening.
"""

suffix = '''
Now, classify the following statement as to whether it is "FOR", "AGAINST", or "NEUTRAL" toward the "{event}" happening, and give your reasoning. Only return the classification label for the statement and the reason for the classification label in the form of: 'stance: STANCE, reason: REASON'

statement: {statement}
'''

few_shot_and_reason_prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_and_reason_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["event", "statement"],
    example_separator="\n"
)

### Run an example

In [None]:
event = df['event'][0]
statements = df[df['event'] == event]['full_text'][0:20].tolist()
y_true = df[df['event'] == event]['stance'][0:20].tolist()

In [None]:
llm_chain = LLMChain(prompt=few_shot_and_reason_prompt, llm=llm)

results=[]
for statement in statements:
    results.append(llm_chain.run(event=event, statement=statement))

In [None]:
y_pred = []  
reasons = []  
  
for result in results:  
    split_result = result.split('reason:')  
    if len(split_result) == 2:  
        # Extract stance and reason, remove leading/trailing whitespace and convert stance to lowercase  
        stance, reason = split_result
        stance = stance.replace('stance:', '').replace(',', '').strip().lower()
        reason = reason.strip()
        reasons.append(reason)
    else:
        # model did not give a reason
        stance = split_result[0]
        stance = stance.replace('stance:', '').replace(',', '').strip().lower()
        reasons.append('')
    if 'for' in stance:  
        y_pred.append('agree')  
    elif 'against' in stance:  
        y_pred.append('disagree')  
    elif 'neutral' in stance:
        y_pred.append('neutral')  
    else:  
        y_pred.append('neutral')

y_true = [str.lower(i) for i in y_true]

# generating the classification report
report = classification_report(y_true, y_pred)
print(report)

### Run on all Data

In [None]:
# Running across the whole dataset

results = []
for index, row in tqdm(df.iterrows()):
    results.append(llm_chain.run(event=row['event'], statement=row['full_text']))

In [None]:
y_pred = []  
reasons = []  
  
for result in results:  
    split_result = result.split('reason:')  
    if len(split_result) == 2:  
        # Extract stance and reason, remove leading/trailing whitespace and convert stance to lowercase  
        stance, reason = split_result
        stance = stance.replace('stance:', '').replace(',', '').strip().lower()
        reason = reason.strip()
        reasons.append(reason)
    else:
        # model did not give a reason
        stance = split_result[0]
        stance = stance.replace('stance:', '').replace(',', '').strip().lower()
        reasons.append('')
    if 'for' in stance:  
        y_pred.append('agree')  
    elif 'against' in stance:  
        y_pred.append('disagree')  
    elif 'neutral' in stance:  
        y_pred.append('neutral')
    else:  
        y_pred.append('neutral')
        
df['fsp_reason_preds'] = y_pred
df['fsp_reason_reasons'] = reasons

In [None]:
np.unique(df['fsp_reason_preds'], return_counts=True)

In [None]:
report = classification_report(df['stance'], df['fsp_reason_preds'])

print(report)

In [None]:
df.to_csv("wtwt_flan-alpaca-3B_results.csv")