# Commonsense Causal Reasoning

In [17]:
import torch
import numpy as np
from dataclasses import dataclass
from typing import Optional, Union
from transformers import AutoTokenizer
from datasets import load_dataset, load_metric
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer
from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy

## Load COPA Dataset

In [18]:
copa = load_dataset("super_glue", "copa")

Found cached dataset super_glue (C:/Users/lyh/.cache/huggingface/datasets/super_glue/copa/1.0.3/bb9675f958ebfee0d5d6dc5476fafe38c79123727a7258d515c450873dbdbbed)
100%|██████████| 3/3 [00:00<00:00, 601.65it/s]


In [19]:
# See one example

copa["train"][50]

{'premise': 'I drank from the water fountain.',
 'choice1': 'I was thirsty.',
 'choice2': 'I felt nauseous.',
 'question': 'cause',
 'idx': 50,
 'label': 0}

In [20]:
# Data size
print(len(copa["train"]))
print(len(copa["validation"]))
print(len(copa["test"]))

400
100
500


## Preprocessing

See [Multiple choice](https://huggingface.co/docs/transformers/tasks/multiple_choice).

Here, we use `AutoModelForMultipleChoice` for the baseline. The model receives one input sentence as question and several sentences as candidates. Then the model predicts the correct answer sentence by text classification. Here we use `premise` with `question` as query and choice_i as candidates.

Example 1:

```python
{'premise': 'My body cast a shadow over the grass.',
 'choice1': 'The sun was rising.',
 'choice2': 'The grass was cut.',
 'question': 'cause',
 'idx': 0,
 'label': 0}
```

- `query`: my body cast a shadow over the grass because
- `candidates1`: the sun was rising.
- `candidates2`: the grass was cut.

---

Example 2:

```python
{'premise': 'The elderly woman suffered a stroke.',
 'choice1': "The woman's daughter came over to clean her house.",
 'choice2': "The woman's daughter moved in to take care of her.",
 'question': 'effect',
 'idx': 11,
 'label': 1}
```

- `query`: the elderly woman suffered a stroke so
- `candidates1`: the woman's daughter came over to clean her house.
- `candidates2`: the woman's daughter moved in to take care of her.

## Few-shot Learning


## Preprocessing

See [Multiple choice](https://huggingface.co/docs/transformers/tasks/multiple_choice).

Here, we use `AutoModelForMultipleChoice` for the baseline. The model receives one input sentence as question and several sentences as candidates. Then the model predicts the correct answer sentence by text classification. Here we use `premise` with `question` as query and choice_i as candidates.

Example 1:

```python
{'premise': 'My body cast a shadow over the grass.',
 'choice1': 'The sun was rising.',
 'choice2': 'The grass was cut.',
 'question': 'cause',
 'idx': 0,
 'label': 0}
```

- `query`: my body cast a shadow over the grass because
- `candidates1`: the sun was rising.
- `candidates2`: the grass was cut.

---

Example 2:

```python
{'premise': 'The elderly woman suffered a stroke.',
 'choice1': "The woman's daughter came over to clean her house.",
 'choice2': "The woman's daughter moved in to take care of her.",
 'question': 'effect',
 'idx': 11,
 'label': 1}
```

- `query`: the elderly woman suffered a stroke so
- `candidates1`: the woman's daughter came over to clean her house.
- `candidates2`: the woman's daughter moved in to take care of her.

In [21]:
copa["train"][0]["label"]

0

In [22]:
import os
import openai

openai.api_key = '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'

In [23]:
def generate_prompt_direct(example):
    return """ Answer the Question of Premise.
    Premise:The teacher took roll.
    Question:What is the effect of Premise?
    Answer:She identified the students that were absent.
    
    Premise:The book was deemed inappropriate for children.
    Question:What is the effect of Premise?
    Answer:Schools banned it from its libraries.
    
    Premise:The child caught a life-threatening illness.
    Question:What is the cause of Premise?
    Answer:She didn't get vaccinated.
    
    Premise:The young woman was denied entrance into the bar.
    Question:What is the cause of Premise?
    Answer:She forgot her ID.
    
    Premise:The man had lipstick on his cheek.
    Question:What is the cause of Premise?
    Answer:The woman kissed him.
    
    Premise:The woman had a religious awakening.
    Question:What is the effect of Premise?
    Answer:She began going to church.
    
    Premise:The woman deleted the email.
    Question:What is the cause of Premise?
    Answer:The sender tried to solicit money from her.
    
    Premise:The girl wanted to make her mother happy.
    Question:What is the effect of Premise?
    Answer:The girl picked a flower for her mother.
    
    Premise:The girl applied the scissors to the paper.
    Question:What is the effect of Premise?
    Answer:The paper sliced apart.
    
    Premise:I got distracted from my conversation with the woman.
    Question:What is the cause of Premise?
    Answer:Everyone in the room was talking.
    
    Premise:The cat purred.
    Question:What is the cause of Premise?
    Answer:I petted it.
    
    Premise:The police officer pulled over the celebrity.
    Question:What is the effect of Premise?
    Answer:The celebrity offered the officer a bribe.
    
    Premise:My feet were blistered.
    Question:What is the cause of Premise?
    Answer:I went hiking.
    
    Premise:The shopper wondered about the cost of the item.
    Question:What is the effect of Premise?
    Answer:He checked its price tag.
    
    Premise:The woman wrote a check to the gas company.
    Question:What is the cause of Premise?
    Answer:She received her monthly bill.
    
    Premise:I applied pressure to the cut on my arm.
    Question:What is the effect of Premise?
    Answer:It stopped bleeding.
    
    Premise:The man needed coins to fill the parking meter.
    Question:What is the effect of Premise?
    Answer:He searched under his car seats for loose change.
    
    Premise:I lingered in bed upon awakening.
    Question:What is the cause of Premise?
    Answer:It was Saturday.
    
    Premise:I put ice cubes in the hot soup.
    Question:What is the effect of Premise?
    Answer:The soup cooled down.
    
    Premise:I ran the ice cube under warm water.
    Question:What is the effect of Premise?
    Answer:The ice cube vanished.2
    
    Premise:The patient underwent the risky medical procedure.
    Question:What is the cause of Premise?
    Answer:Specialists recommended the procedure.
    
    Premise: {}
    Question:What is the {} of Premise?
    Answer:
    
    """.format(
        example['premise'],
        example['question'],
    )  

In [24]:
copa["train"][360]

{'premise': 'The patient underwent the risky medical procedure.',
 'choice1': 'The procedure was costly.',
 'choice2': 'Specialists recommended the procedure.',
 'question': 'cause',
 'idx': 360,
 'label': 1}

In [25]:
def index(example):
    response = openai.Completion.create(
        model="text-davinci-003",
        prompt=generate_prompt_direct(example),
        temperature=0.7,
        max_tokens=100,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    return response

In [26]:
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate import meteor
from nltk import word_tokenize
import time
from torchmetrics.functional.text.rouge import rouge_score
from cider.cider import Cider

cider = Cider()


In [27]:
def generate_prediction(response):
    ans = response.choices[0].text
    ans = ans.strip()
    
    return ans

In [28]:
def test_direct(examples, sleep_time=1):
    #labels = np.array(examples['label'])
    premises = examples['premise']
    questions = examples['question']
    labels = []
    for i in range(len(examples["premise"])):
        if examples["label"][i] == 0:
            labels.append(examples["choice1"][i])
        else:
            labels.append(examples["choice2"][i])
    bleu_scores = []
    bleu_scores_1 = []
    bleu_scores_2 = []
    bleu_scores_3 = []
    bleu_scores_4 = []
    meteor_scores = []
    anss = []
    rouge_scores = [] 
    responses = []
    for i in range(len(premises)):
        example = { 
            'premise': premises[i],
            'question': questions[i],
        }
        res = index(example)
        ans = generate_prediction(res)
        time.sleep(sleep_time)
        responses.append(res)
        anss.append(ans)
        meteor_score = round(meteor([word_tokenize(ans)], word_tokenize(labels[i])))
        meteor_scores.append(meteor_score)
        bleu_score_4 = sentence_bleu(labels[i].split(), ans)#,weights=(1, 0, 0, 0))
        bleu_score_3 = sentence_bleu(labels[i].split(), ans,weights=(0, 0, 1, 0))
        bleu_score_2 = sentence_bleu(labels[i].split(), ans,weights=(0, 1, 0, 0))
        bleu_score_1 = sentence_bleu(labels[i].split(), ans,weights=(1, 0, 0, 0))
        bleu_score = np.exp(np.log(bleu_score_1) + 0.5*np.log(bleu_score_2) + 1/3*np.log(bleu_score_3)+0.25*np.log(bleu_score_4))
        bleu_scores.append(bleu_score)
        bleu_scores_1.append(bleu_score_1)
        bleu_scores_2.append(bleu_score_2)
        bleu_scores_3.append(bleu_score_3)
        bleu_scores_4.append(bleu_score_4)
        rouge_ = rouge_score(ans, labels[i])
        rouge_scores.append(rouge_)
    cider_score = cider.compute(anss, [labels])
    return bleu_scores,bleu_scores_1,bleu_scores_2,bleu_scores_3,bleu_scores_4, labels, responses, anss, rouge_scores, meteor_scores, cider_score# rouge_score# meteor_scores

In [29]:
bleu_scores,bleu_scores_1,bleu_scores_2,bleu_scores_3,bleu_scores_4, labels,responses, anss, rouge_scores, meteor_scores,cider_score = test_direct(copa['validation'][:100])

RateLimitError: You exceeded your current quota, please check your plan and billing details.

In [170]:
bleu_scores_average = np.mean(bleu_scores)

In [171]:
bleu_scores_average

0.07022913222912637

In [172]:
bleu_scores1_average = np.mean(bleu_scores_1)

In [173]:
bleu_scores1_average

0.3984325808408321

In [174]:
bleu_scores1_average = np.mean(bleu_scores_1)
bleu_scores2_average = np.mean(bleu_scores_2)
bleu_scores3_average = np.mean(bleu_scores_3)
bleu_scores4_average = np.mean(bleu_scores_4)
print(bleu_scores1_average)
print(bleu_scores2_average)
print(bleu_scores3_average)
print(bleu_scores4_average)

0.3984325808408321
0.23815132012019802
0.1468892222838526
0.1514231753999145


In [160]:
meteor_scores_average = np.mean(meteor_scores)

In [161]:
meteor_scores_average

0.27

In [180]:
rouge_list = []
for rouge_ in rouge_scores:
    rouge_list.append(rouge_['rouge2_recall'])
rouge_average = np.mean(rouge_list)
rouge_average

0.14061905

In [None]:
cider_score

In [145]:
def show_example_response(r, example, ans):
    print(example)
    #print(r.choices[0].text)
    print("Answer :", ans)

In [155]:
for i in range(20):
    show_example_response(responses[i], copa['validation'][i], anss[i])

{'premise': 'The man turned on the faucet.', 'choice1': 'The toilet filled with water.', 'choice2': 'Water flowed from the spout.', 'question': 'effect', 'idx': 0, 'label': 1}
Answer : Water began flowing out of the faucet.
{'premise': 'The girl found a bug in her cereal.', 'choice1': 'She poured milk in the bowl.', 'choice2': 'She lost her appetite.', 'question': 'effect', 'idx': 1, 'label': 1}
Answer : She threw the cereal away.
{'premise': 'The woman retired.', 'choice1': 'She received her pension.', 'choice2': 'She paid off her mortgage.', 'question': 'effect', 'idx': 2, 'label': 0}
Answer : She stopped working and began collecting her pension.
{'premise': 'I wanted to conserve energy.', 'choice1': 'I swept the floor in the unoccupied room.', 'choice2': 'I shut off the light in the unoccupied room.', 'question': 'effect', 'idx': 3, 'label': 1}
Answer : I turned off the lights when I left the room.
{'premise': 'The hamburger meat browned.', 'choice1': 'The cook froze it.', 'choice2'