# Stance Classification for PhemeRumors
- read in the data and pre-process
- set up an LLM
- Investigate different prompting schemes
    - just context
    - context + few-shot learning
    - content + few-shot learning + reasoning

In [170]:
# Package installations to work on WIRE

! pip install openai
! pip install langchain
! pip install accelerate
! pip install einops


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Collecting einops
  Using cached einops-0.6.1-py3-none-any.whl (42 kB)
Installing collected packages: einops
Successfully installed einops-0.6.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release 

In [204]:
import os, re, pandas as pd, numpy as np, openai, ast, json
from pprint import pprint
from tqdm import tqdm  

import torch
from langchain import OpenAI, PromptTemplate, FewShotPromptTemplate, HuggingFacePipeline, LLMChain
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

from sklearn.metrics import classification_report

from matplotlib import pyplot as plt
import seaborn as sns

# 1. Import and Preprocess Data

In [2]:
file = os.path.join("phemerumours","data_merged.csv")

In [3]:
df = pd.read_csv(file)

In [4]:
df.head()

Unnamed: 0,tweet_id,stance,event,full_text
0,576755174531862529,agree,putinmissing,Coup? RT @jimgeraghty: Rumors all Russian mili...
1,576319832800555008,agree,putinmissing,Hoppla! @L0gg0l: Swiss Rumors: Putin absence d...
2,576513463738109954,disagree,putinmissing,Putin reappears on TV amid claims he is unwell...
3,552783667052167168,agree,charliehebdo,France: 10 people dead after shooting at HQ of...
4,552793679082311680,agree,charliehebdo,"11 confirmed dead, Francois Hollande to visit ..."


In [5]:
df.shape

(2859, 4)

In [6]:
# Define a dictionary with the original values as keys and new values as corresponding values  
event_dict = {  
    'putinmissing': "Russian President Putin has gone missing",   
    'charliehebdo': "there was a shooting event at Charlie Hebdo in Paris",   
    'prince-toronto': "singer Prince will play a secret show in Toronto",   
    'ferguson': "There is unrest in Ferguson, Missouri",   
    'germanwings-crash': "A Germanwings plane crashed",   
    'ottawashooting': "There was a shooting event in Ottawa",   
    'sydneysiege': "There is a hostage situation in Sydney",   
    'ebola-essien': "Michael Essien contracted Ebola"  
}  
  
# Use the map function to replace the values  
df['event'] = df['event'].map(event_dict)  

# 2. Connect to LLM
- for connectiong to Open AI
```python
with open("../aci_openai_credentials.txt") as f:
    open_api_base, open_api_key = f.read().split("\n")

openai.api_type = "azure"
openai.api_base = open_api_base
openai.api_version = "2023-03-15-preview"
openai.api_key = open_api_key
```
- for standing up a smaller huggingface model
```python
llm = HuggingFacePipeline.from_model_id(model_id="declare-lab/flan-alpaca-gpt4-xl", task = 'text2text-generation', device=0,
                                      model_kwargs={"max_length":500, "do_sample":False})
```
- for a mid-sized, more modern, huggingface model. You can use accelerate and chance ``` device = "auto"``` to use multiple GPUs
```python
model = "tiiuae/falcon-7b-instruct"

tokenizer = AutoTokenizer.from_pretrained(model)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map=0,
    max_length=200,
    do_sample=False,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
)

llm = HuggingFacePipeline(pipeline=pipe)
```


In [10]:
with open("../personal_openai_key.txt") as f:
    open_api_key = f.read()
    
openai.api_key = open_api_key

In [15]:
question = '''What is the stance of the author of the following toward the U.S. Army. Give the stance as either 'FOR', "AGAINST', or 'NEUTRAL'. Only return the stance:
"@vondeveen If the Army wants to actually recruit people, maybe stop breaking people and actually prosecute sexual assualt #nomorewar."
'''
response = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages = [{"role":"user","content":question}],
  temperature=0.1,
  max_tokens=50,
  stop=None)

response.choices[0].message['content']

'AGAINST'

In [16]:
question = '''What is the stance of the author of the following toward the U.S. Army. Give the stance only as either 'FOR', "AGAINST', or 'NEUTRAL'. Only return the stance:
"@artfulask I have never seen a pink-eared duck before. #Army"
'''
response = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages = [{"role":"user","content":question}],
  temperature=0.1,
  max_tokens=50,
  stop=None)

response.choices[0].message['content']

'NEUTRAL'

In [17]:
question = '''What is the stance of the author of the following toward the U.S. Army. Give the stance as either 'FOR', "AGAINST', or 'NEUTRAL'. Only return the stance:
"I think the @Army helped me become disciplined. I would have surely flunked out of college chasing tail if I didn't get some discipline there. #SFL"
'''
response = openai.ChatCompletion.create(
  model="gpt-3.5-turbo",
  messages = [{"role":"user","content":question}],
  temperature=0.1,
  max_tokens=50,
  stop=None)

response.choices[0].message['content']

'FOR'

# 3. Prompt Engineering for Stance Classification

## 3(a). Context-only prompt
- prompt for single comment at a time
```python
context_template = '''
The following statement is a social media commenting on wether an event, "{event}", is true. Classify the statement as to wether it "AGREE", "DISAGREE", or is "NEUTRAL" toward event "{event}" being true. Only return the classification label of "AGREE", "DISAGREE", or "NEUTRAL" for the statement, and no other text.

statement: {statement}
classification:
'''

context_prompt = PromptTemplate(
    input_variables=["event","statement"],
    template=context_template
)
```

In [113]:
# context prompt

context_template = '''
The following statements are from social media and are commenting on wether an event, "{event}", happened. Classify each of the statements as to wether they "AGREE", "DISAGREE", or are "NEUTRAL" toward event "{event}" happening. Only return the classification label for all of the statements, and no other text.

statements:
{statements}
'''

context_prompt = PromptTemplate(
    input_variables=["event","statements"],
    template=context_template
)

### Run an example

- To do this with openAI:
```python
response = openai.ChatCompletion.create(
  engine="ACI_GPT-4",
  messages = [{"role":"user","content":context_prompt.format(event = event, statements=statements)}],
  temperature=0.8,
  max_tokens=800,
  top_p=0.95,
  frequency_penalty=0,
  presence_penalty=0,
  stop=None)

y_pred = response.choices[0].message['content']
```
- To do this with a local model
```python
llm_chain = LLMChain(prompt=context_prompt, llm=llm)

results=[]
for statement in statements:
    results.append(llm_chain.run(event=event, statement=statement))
```

In [114]:
event = df['event'][0]
statements = df[df['event'] == event]['full_text'][0:20].tolist()
y_true = df[df['event'] == event]['stance'][0:20].tolist()

In [121]:
response = openai.ChatCompletion.create(
  model="gpt-4",
  messages = [{"role":"user","content":context_prompt.format(event = event, statements=statements)}],
  stop=None)

results = response.choices[0].message['content']

In [123]:
# converting to lowercase
y_true = [str.lower(i) for i in y_true]
y_pred = [word.lower() for word in ast.literal_eval(results)]

# generating the classification report  
report = classification_report(y_true, y_pred)  
  
print(report)  

              precision    recall  f1-score   support

       agree       0.80      0.80      0.80         5
    disagree       0.50      1.00      0.67         1
     neutral       1.00      0.93      0.96        14

    accuracy                           0.90        20
   macro avg       0.77      0.91      0.81        20
weighted avg       0.93      0.90      0.91        20



### Run on all Data
- To do this OpenAI and multiple statements per query:
```python
df['fsp_preds'] =''
num_statements_per_prompt = 20

for event in df['event'].unique():
    all_statements = df[df['event'] == event]['full_text'].tolist()
    preds = []
    print("event: {}, has {} statements".format(event, len(all_statements)))
    for statements in [all_statements[idx:idx+num_statements_per_prompt] for idx in range(0, len(all_statements), num_statements_per_prompt)]:

        response = openai.ChatCompletion.create(
          engine="ACI_GPT-4",
          messages = [{"role":"user","content":few_shot_prompt.format(event = event, statements=statements)}],
          temperature=0.8,
          max_tokens=800,
          top_p=0.95,
          frequency_penalty=0,
          presence_penalty=0,
          stop=None)
        
        results = [str.lower(i) for i in ast.literal_eval(response.choices[0].message['content'])][:num_statements_per_prompt]
        preds += results
    df.loc[df['event'] == event, 'fsp_preds'] = preds  
```
- Run on a single statement with a local model
```python
# Running across the whole dataset

results = []
for index, row in tqdm(df.iterrows()):
    results.append(llm_chain.run(event=row['event'], statement=row['full_text']))
```

In [124]:
df['context_preds'] =''
num_statements_per_prompt = 20

for event in df['event'].unique():
    all_statements = df[df['event'] == event]['full_text'].tolist()
    preds = []
    print("event: {}, has {} statements".format(event, len(all_statements)))
    for statements in [all_statements[idx:idx+num_statements_per_prompt] for idx in range(0, len(all_statements), num_statements_per_prompt)]:

        response = openai.ChatCompletion.create(
          model="gpt-4",
          messages = [{"role":"user","content":context_prompt.format(event = event, statements=statements)}],
          stop=None)
        
        results = [str.lower(i) for i in ast.literal_eval(response.choices[0].message['content'])][:num_statements_per_prompt]
        preds += results
    df.loc[df['event'] == event, 'context_preds'] = preds  

event: Russian President Putin has gone missing, has 46 statements
event: there was a shooting event at Charlie Hebdo in Paris, has 711 statements
event: singer Prince will play a secret show in Toronto, has 78 statements
event: There is unrest in Ferguson, Missouri, has 694 statements
event: A Germanwings plane crashed, has 209 statements
event: There was a shooting event in Ottawa, has 482 statements
event: There is a hostage situation in Sydney, has 623 statements
event: Michael Essien contracted Ebola, has 16 statements


In [128]:
np.unique(df['context_preds'], return_counts=True)

(array(['agree', 'disagree', 'neutral'], dtype=object),
 array([1369,   56, 1434]))

In [129]:
report = classification_report(df['stance'], df['context_preds'])

print(report)

              precision    recall  f1-score   support

       agree       0.16      0.90      0.27       238
    disagree       0.04      0.22      0.06         9
     neutral       0.98      0.54      0.70      2612

    accuracy                           0.57      2859
   macro avg       0.39      0.56      0.34      2859
weighted avg       0.91      0.57      0.66      2859



## 3(b). Context + FSP

In [130]:
# Create an example template

example_template = '''
event: {event}
statement: {statement}
stance: {stance}
'''

example_prompt = PromptTemplate(
    input_variables=["event","statement", "stance"],
    template=example_template
)

In [131]:
# Give some examples

examples = [
    {'event':"Putin has gone missing",
     'statement':"Putin reappears on TV amid claims he is unwell and under threat of coup http://t.co/YZln23EUx1 http://t.co/ZsAnBa5gz3",
     'stance': 'DISAGREE'},
    {'event':"Michael Essien contracted Ebola",
     'statement': '''What? "@FootballcomEN: Unconfirmed reports claim that Michael Essien has contracted Ebola. http://t.co/GsEizhwaV7"''',
     'stance': 'NEUTRAL'},
    {'event':"A Germanwings plane crashed",
     'statement': '''@thatjohn @planefinder why would they say urgence in lieu of mayday which is standard ?''',
     'stance': 'NEUTRAL'},
    {'event':"There is a hostage situation in Sydney",
     'statement': '''@KEEMSTARx dick head it's not confirmed its Jihadist extremists. Don't speculate''',
     'stance': 'NEUTRAL'},
    {'event':"singer Prince will play a secret show in Toronto",
     'statement': '''OMG. #Prince rumoured to be performing in Toronto today. Exciting!''',
     'stance': 'AGREE'}
]

In [135]:
prefix = """
The following statements are examples of the stance of a social media comments towards wether an event happened or not.
"""

suffix = '''
Now, classify each of the statements as to wether they "AGREE", "DISAGREE", or are "NEUTRAL" toward event "{event}" happening. Only return the classification label for all of the statements, and no other text.

statements:
{statements}
'''

few_shot_prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["event", "statements"],
    example_separator="\n"
)

### Run an example

In [136]:
event = df['event'][0]
statements = df[df['event'] == event]['full_text'][0:20].tolist()
y_true = df[df['event'] == event]['stance'][0:20].tolist()

In [139]:
response = openai.ChatCompletion.create(
  model="gpt-4",
  messages = [{"role":"user","content":few_shot_prompt.format(event = event, statements=statements)}],
  stop=None)

results = response.choices[0].message['content']

In [140]:
results

"['AGREE', 'AGREE', 'DISAGREE', 'AGREE', 'AGREE', 'AGREE', 'DISAGREE', 'NEUTRAL', 'NEUTRAL', 'NEUTRAL', 'NEUTRAL', 'NEUTRAL', 'NEUTRAL', 'NEUTRAL', 'NEUTRAL', 'NEUTRAL', 'NEUTRAL', 'NEUTRAL', 'NEUTRAL', 'NEUTRAL']"

In [142]:
# converting to lowercase
y_true = [str.lower(i) for i in y_true]
y_pred = [word.lower() for word in ast.literal_eval(results)]

# generating the classification report
report = classification_report(y_true, y_pred)
print(report)

              precision    recall  f1-score   support

       agree       0.80      0.80      0.80         5
    disagree       0.50      1.00      0.67         1
     neutral       1.00      0.93      0.96        14

    accuracy                           0.90        20
   macro avg       0.77      0.91      0.81        20
weighted avg       0.93      0.90      0.91        20



### Run on all Data

In [143]:
df['fsp_preds'] =''
num_statements_per_prompt = 20

for event in df['event'].unique():
    all_statements = df[df['event'] == event]['full_text'].tolist()
    preds = []
    print("event: {}, has {} statements".format(event, len(all_statements)))
    for statements in [all_statements[idx:idx+num_statements_per_prompt] for idx in range(0, len(all_statements), num_statements_per_prompt)]:

        response = openai.ChatCompletion.create(
          model="gpt-4",
          messages = [{"role":"user","content":few_shot_prompt.format(event = event, statements=statements)}],
          stop=None)
        
        results = [str.lower(i) for i in ast.literal_eval(response.choices[0].message['content'])][:num_statements_per_prompt]
        preds += results
    df.loc[df['event'] == event, 'fsp_preds'] = preds  

event: Russian President Putin has gone missing, has 46 statements
event: there was a shooting event at Charlie Hebdo in Paris, has 711 statements
event: singer Prince will play a secret show in Toronto, has 78 statements
event: There is unrest in Ferguson, Missouri, has 694 statements
event: A Germanwings plane crashed, has 209 statements
event: There was a shooting event in Ottawa, has 482 statements
event: There is a hostage situation in Sydney, has 623 statements
event: Michael Essien contracted Ebola, has 16 statements


In [144]:
np.unique(df['fsp_preds'], return_counts=True)

(array(['agree', 'disagree', 'neutral'], dtype=object),
 array([1508,   54, 1297]))

In [145]:
report = classification_report(df['stance'], df['fsp_preds'])

print(report)

              precision    recall  f1-score   support

       agree       0.14      0.91      0.25       238
    disagree       0.04      0.22      0.06         9
     neutral       0.98      0.49      0.65      2612

    accuracy                           0.52      2859
   macro avg       0.39      0.54      0.32      2859
weighted avg       0.91      0.52      0.62      2859



## 3(c). Context + FSP + Reasoning

In [146]:
# Create an example template

example_and_reason_template = '''
event: {event}
statement: {statement}
stance: {stance}
reason: {reason}
'''

example_and_reason_prompt = PromptTemplate(
    input_variables=["event","statement", "stance", "reason"],
    template=example_and_reason_template
)

In [147]:
# Give some examples

examples = [
    {'event':"Putin has gone missing",
     'statement':"Putin reappears on TV amid claims he is unwell and under threat of coup http://t.co/YZln23EUx1 http://t.co/ZsAnBa5gz3",
     'stance': 'DISAGREE',
     'reason': "the statement disagress with Putin having dissapeared as he was on TV and gives a reason for Putin's absence."
    },
    {'event':"Michael Essien contracted Ebola",
     'statement': '''What? "@FootballcomEN: Unconfirmed reports claim that Michael Essien has contracted Ebola. http://t.co/GsEizhwaV7"''',
     'stance': 'NEUTRAL',
     'reason': "the statement is neutral toward wether Michael Essen contracted Ebola, as it mostly just repeats the original post from @FootballcomEN askign for more information."
    },
    {'event':"A Germanwings plane crashed",
     'statement': '''@thatjohn @planefinder why would they say urgence in lieu of mayday which is standard ?''',
     'stance': 'NEUTRAL',
     'reason': "the statement is neutral toward wether a germanwings plane crashed as it is only asking for clarifiying details about what has been reported."
    },
    {'event':"There is a hostage situation in Sydney",
     'statement': '''@KEEMSTARx dick head it's not confirmed its Jihadist extremists. Don't speculate''',
     'stance': 'NEUTRAL',
     'reason': "the statement is neutral toward wether there is a hostage siutation happening in Sydney as it is admonishing someone for speculating on a detail of the situation."
    },
    {'event':"singer Prince will play a secret show in Toronto",
     'statement': '''OMG. #Prince rumoured to be performing in Toronto today. Exciting!''',
     'stance': 'AGREE',
     'reason': 'The statement agrees with singer Prince performing a show in Toronto, as it expresses excitment at the singer performing.'
    }
]

In [209]:
prefix = """
The following are examples of the stance of a social media comment towards an event happening. Each example has the stance that indicates wether the comment agrees that the event is happening and the reason for why that stance was given for that statement.
"""

suffix = '''
Now, classify each of the statements as to wether they "AGREE", "DISAGREE", or are "NEUTRAL" toward event "{event}" happening and your reasoning. Only return the classification label for all of the statements and the reasoning for each classification label in the form of:
[label, label, ...], [reason, reason, ...]

statements:
{statements}
'''

few_shot_and_reason_prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_and_reason_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["event", "statements"],
    example_separator="\n"
)

### Run an example

In [210]:
event = df['event'][0]
statements = df[df['event'] == event]['full_text'][0:20].tolist()
y_true = df[df['event'] == event]['stance'][0:20].tolist()

In [211]:
response = openai.ChatCompletion.create(
  model="gpt-4",
  messages = [{"role":"user","content":few_shot_and_reason_prompt.format(event = event, statements=statements)}],
  stop=None)

results = response.choices[0].message['content']

In [221]:
# Remove trailing newline and split into two strings  
stances_str, statements_str = re.split('], \[|\], \n\[|\],\n\n\[|\], \n\n\[|\],\n\[', results.strip())
  
# Add brackets to make them valid list strings  
stances_str = stances_str + ']'  
statements_str = '[' + statements_str  
  
# Convert strings to lists  
y_pred = [str.lower(i) for  i in ast.literal_eval(stances_str)]
#reasons = ast.literal_eval(statements_str)  

y_true = [str.lower(i) for i in y_true]

# generating the classification report
report = classification_report(y_true, y_pred)
print(report)

              precision    recall  f1-score   support

       agree       0.25      1.00      0.40         5
    disagree       0.00      0.00      0.00         1
     neutral       0.00      0.00      0.00        14

    accuracy                           0.25        20
   macro avg       0.08      0.33      0.13        20
weighted avg       0.06      0.25      0.10        20



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Run on all Data

In [235]:
df['fsp_reason_preds'] =''
num_statements_per_prompt = 20

for event in df['event'].unique():
    all_statements = df[df['event'] == event]['full_text'].tolist()
    preds = []
    print("event: {}, has {} statements".format(event, len(all_statements)))
    for statements in [all_statements[idx:idx+num_statements_per_prompt] for idx in range(0, len(all_statements), num_statements_per_prompt)]:

        response = openai.ChatCompletion.create(
          model="gpt-4",
          messages = [{"role":"user","content":few_shot_and_reason_prompt.format(event = event, statements=statements)}],
          stop=None)
        results = response.choices[0].message['content']
        # Remove trailing newline and split into two strings  
        stances_str, statements_str = re.split('], \[|\], \n\[|\],\n\n\[|\], \n\n\[|\],\n\[', results.strip())

        # Add brackets to make them valid list strings  
        stances_str = stances_str + ']'  
        statements_str = '[' + statements_str  

        # Convert strings to lists  
        preds += [str.lower(i) for  i in ast.literal_eval(stances_str)][:num_statements_per_prompt]
        #reasons = ast.literal_eval(statements_str)  
        
    df.loc[df['event'] == event, 'fsp_reason_preds'] = preds

event: Russian President Putin has gone missing, has 46 statements
event: there was a shooting event at Charlie Hebdo in Paris, has 711 statements
event: singer Prince will play a secret show in Toronto, has 78 statements
event: There is unrest in Ferguson, Missouri, has 694 statements
event: A Germanwings plane crashed, has 209 statements
event: There was a shooting event in Ottawa, has 482 statements
event: There is a hostage situation in Sydney, has 623 statements
event: Michael Essien contracted Ebola, has 16 statements


In [236]:
np.unique(df['fsp_reason_preds'], return_counts=True)

(array(['agree', 'disagree', 'neutral'], dtype=object),
 array([1491,   52, 1316]))

In [237]:
report = classification_report(df['stance'], df['fsp_reason_preds'])

print(report)

              precision    recall  f1-score   support

       agree       0.14      0.86      0.24       238
    disagree       0.04      0.22      0.07         9
     neutral       0.97      0.49      0.65      2612

    accuracy                           0.52      2859
   macro avg       0.38      0.52      0.32      2859
weighted avg       0.90      0.52      0.62      2859



In [238]:
df.to_csv("phemerumors_gpt4_results.csv")