# Commonsense Causal Reasoning

In [1]:
import torch
import numpy as np
from dataclasses import dataclass
from typing import Optional, Union
from transformers import AutoTokenizer
from datasets import load_dataset, load_metric
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
from transformers import AutoModelForMultipleChoice, TrainingArguments, Trainer
from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy

  from .autonotebook import tqdm as notebook_tqdm


## Load COPA Dataset

In [2]:
copa = load_dataset("super_glue", "copa")

Found cached dataset super_glue (C:/Users/lyh/.cache/huggingface/datasets/super_glue/copa/1.0.3/bb9675f958ebfee0d5d6dc5476fafe38c79123727a7258d515c450873dbdbbed)
100%|██████████| 3/3 [00:00<00:00, 110.39it/s]


In [6]:
# See one example

copa["train"][50]

{'premise': 'I drank from the water fountain.',
 'choice1': 'I was thirsty.',
 'choice2': 'I felt nauseous.',
 'question': 'cause',
 'idx': 50,
 'label': 0}

In [3]:
# Data size
print(len(copa["train"]))
print(len(copa["validation"]))
print(len(copa["test"]))

400
100
500


## Preprocessing

See [Multiple choice](https://huggingface.co/docs/transformers/tasks/multiple_choice).

Here, we use `AutoModelForMultipleChoice` for the baseline. The model receives one input sentence as question and several sentences as candidates. Then the model predicts the correct answer sentence by text classification. Here we use `premise` with `question` as query and choice_i as candidates.

Example 1:

```python
{'premise': 'My body cast a shadow over the grass.',
 'choice1': 'The sun was rising.',
 'choice2': 'The grass was cut.',
 'question': 'cause',
 'idx': 0,
 'label': 0}
```

- `query`: my body cast a shadow over the grass because
- `candidates1`: the sun was rising.
- `candidates2`: the grass was cut.

---

Example 2:

```python
{'premise': 'The elderly woman suffered a stroke.',
 'choice1': "The woman's daughter came over to clean her house.",
 'choice2': "The woman's daughter moved in to take care of her.",
 'question': 'effect',
 'idx': 11,
 'label': 1}
```

- `query`: the elderly woman suffered a stroke so
- `candidates1`: the woman's daughter came over to clean her house.
- `candidates2`: the woman's daughter moved in to take care of her.

## Few-shot Learning


## Preprocessing

See [Multiple choice](https://huggingface.co/docs/transformers/tasks/multiple_choice).

Here, we use `AutoModelForMultipleChoice` for the baseline. The model receives one input sentence as question and several sentences as candidates. Then the model predicts the correct answer sentence by text classification. Here we use `premise` with `question` as query and choice_i as candidates.

Example 1:

```python
{'premise': 'My body cast a shadow over the grass.',
 'choice1': 'The sun was rising.',
 'choice2': 'The grass was cut.',
 'question': 'cause',
 'idx': 0,
 'label': 0}
```

- `query`: my body cast a shadow over the grass because
- `candidates1`: the sun was rising.
- `candidates2`: the grass was cut.

---

Example 2:

```python
{'premise': 'The elderly woman suffered a stroke.',
 'choice1': "The woman's daughter came over to clean her house.",
 'choice2': "The woman's daughter moved in to take care of her.",
 'question': 'effect',
 'idx': 11,
 'label': 1}
```

- `query`: the elderly woman suffered a stroke so
- `candidates1`: the woman's daughter came over to clean her house.
- `candidates2`: the woman's daughter moved in to take care of her.

In [3]:
copa["train"][0]["label"]

0

In [15]:
import os
import openai

openai.api_key = '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'

'''
prompt_create = generate_prompt()

response = openai.Completion.create(
  model="text-davinci-003",
  prompt=prompt_create,
  temperature=0.7,
  max_tokens=256,
  top_p=1,
  frequency_penalty=0,
  presence_penalty=0
)
'''

'\nprompt_create = generate_prompt()\n\nresponse = openai.Completion.create(\n  model="text-davinci-003",\n  prompt=prompt_create,\n  temperature=0.7,\n  max_tokens=256,\n  top_p=1,\n  frequency_penalty=0,\n  presence_penalty=0\n)\n'

In [5]:
def generate_prompt_direct(example):
    return """ Get Answer from Premise and Question.
    Premise:The teacher took roll.
    Question:What is the effect of it?
    Answer:She identified the students that were absent.
    Premise:The book was deemed inappropriate for children.
    Question:What is the effect of it?
    Answer:Schools banned it from its libraries.
    Premise:The child caught a life-threatening illness.
    Question:What is the cause of it?
    Answer:She didn't get vaccinated.
    Premise:The young woman was denied entrance into the bar.
    Question:What is the cause of it?
    Answer:She forgot her ID.
    Premise:The man had lipstick on his cheek.
    Question:What is the cause of it?
    Answer:The woman kissed him.
    Premise:The woman had a religious awakening.
    Question:What is the effect of it?
    Answer:She began going to church.
    Premise:The woman deleted the email.
    Question:What is the cause of it?
    Answer:The sender tried to solicit money from her.
    Premise:The girl wanted to make her mother happy.
    Question:What is the effect of it?
    Answer:The girl picked a flower for her mother.
    Premise:The girl applied the scissors to the paper.
    Question:What is the effect of it?
    Answer:The paper sliced apart.
    Premise:I got distracted from my conversation with the woman.
    Question:What is the cause of it?
    Answer:Everyone in the room was talking.
    Premise: {}
    Question: What is the {} of it?
    Answer:
    
    """.format(
        example['premise'],
        example['question'],
    )  

In [6]:
copa["train"][240]

{'premise': 'I got distracted from my conversation with the woman.',
 'choice1': 'Everyone in the room was talking.',
 'choice2': 'The woman was telling a funny story.',
 'question': 'cause',
 'idx': 240,
 'label': 0}

In [7]:
def index(example):
    response = openai.Completion.create(
        model="text-davinci-003",
        prompt=generate_prompt_direct(example),
        temperature=0.7,
        max_tokens=100,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    return response

In [None]:
import gensim
from sklearn.metrics.pairwise import cosine_similarity

# load word2vec
model = gensim.models.KeyedVectors.load_word2vec_format('path to word2vec e.g. GoogleNews-vectors-negative300.bin', binary=True)

# your inputs
first_sentence_list = ['driver', 'backs', 'into', 'stroller', 'with', 'child', ',', 'drives', 'off']
second_sentence_list = ['driver', 'backs', 'into', 'mom', ',', 'stroller', 'with', 'child', 'then', 'drives', 'off']

# remove oov
first = [word for word in first_sentence_list if word in model.key_to_index]
second = [word for word in second_sentence_list if word in model.key_to_index]

# average word embeddings to get sentence embeddings
first_sent_embedding = np.mean(model[first], axis=0)
second_sent_embedding = np.mean(model[second], axis=0)

# calculate similarities
result = cosine_similarity(first_sent_embedding.reshape(1,-1),first_sent_embedding.reshape(1,-1))

print(result)

In [19]:
from nltk.translate.bleu_score import sentence_bleu
nltk.translate import meteor
import time
from rouge import Rouge

rouge_model = Rouge()
meteor = evaluate.load('meteor')


NameError: name 'evaluate' is not defined

In [9]:
def generate_prediction(response):
    ans = response.choices[0].text
    ans = ans.strip()
    
    return ans

In [16]:
def test_direct(examples, sleep_time=1.2):
    #labels = np.array(examples['label'])
    premises = examples['premise']
    questions = examples['question']
    labels = []
    for i in range(len(examples["premise"])):
        if examples["label"][i] == 0:
            labels.append(examples["choice1"][i])
        else:
            labels.append(examples["choice2"][i])
    bleu_scores = []
    meteor_scores = []
    anss = []
    responses = []
    for i in range(len(premises)):
        example = { 
            'premise': premises[i],
            'question': questions[i],
            #'label': labels[i]
        }
        res = index(example)
        ans = generate_prediction(res)
        time.sleep(sleep_time)
        responses.append(res)
        anss.append(ans)
        meteor_score = round(meteor([word_tokenize(ans)], [word_tokenize(labels[i])]))
        meteor_scores.append()
        bleu_score_1 = sentence_bleu(labels[i].split(), ans,weights=(1, 0, 0, 0))
        #bleu_score_2 = sentence_bleu(labels[i].split(), ans,weights=(0, 1, 0, 0))
        #bleu_score = np.exp(np.log(bleu_score_1) + 0.5*np.log(bleu_score_2))
        bleu_scores.append(bleu_score_1)
        rouge_score = rouge_model(ans, labels, avg = True)
    return bleu_scores, labels, responses, anss, rouge_score, meteor_scores

In [17]:
bleu_scores, labels, responses, anss, rouge_score, meteor_scores = test_direct(copa['validation'][:20])

APIError: The server had an error while processing your request. Sorry about that! You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 00cf52643f837e2ce16dfdcfe02d2b0b in your message.) {
  "error": {
    "message": "The server had an error while processing your request. Sorry about that! You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 00cf52643f837e2ce16dfdcfe02d2b0b in your message.)",
    "type": "server_error",
    "param": null,
    "code": null
  }
}
 500 {'error': {'message': 'The server had an error while processing your request. Sorry about that! You can retry your request, or contact us through our help center at help.openai.com if the error persists. (Please include the request ID 00cf52643f837e2ce16dfdcfe02d2b0b in your message.)', 'type': 'server_error', 'param': None, 'code': None}} {'Date': 'Tue, 06 Dec 2022 20:24:43 GMT', 'Content-Type': 'application/json', 'Content-Length': '366', 'Connection': 'keep-alive', 'Access-Control-Allow-Origin': '*', 'Openai-Organization': 'epfl-146', 'Openai-Processing-Ms': '30742', 'Openai-Version': '2020-10-01', 'Strict-Transport-Security': 'max-age=15724800; includeSubDomains', 'X-Request-Id': '00cf52643f837e2ce16dfdcfe02d2b0b'}

In [None]:
bleu_scores_average = np.mean(bleu_scores)

In [None]:
bleu_scores_average

In [None]:
meteor_scores_average = np.mean(meteor_scores)

In [None]:
meteor_scores_average

In [None]:
rouge_score

In [17]:
def show_example_response(r, example, ans):
    print(example)
    #print(r.choices[0].text)
    print("Answer :", ans)

In [18]:
for i in range(20):
    show_example_response(responses[i], copa['validation'][i], anss[i])

{'premise': 'The man turned on the faucet.', 'choice1': 'The toilet filled with water.', 'choice2': 'Water flowed from the spout.', 'question': 'effect', 'idx': 0, 'label': 1}
Answer : Water began to flow from the faucet.
{'premise': 'The girl found a bug in her cereal.', 'choice1': 'She poured milk in the bowl.', 'choice2': 'She lost her appetite.', 'question': 'effect', 'idx': 1, 'label': 1}
Answer : She was disgusted and threw out the cereal.
{'premise': 'The woman retired.', 'choice1': 'She received her pension.', 'choice2': 'She paid off her mortgage.', 'question': 'effect', 'idx': 2, 'label': 0}
Answer : She stopped working and began to enjoy her free time.
{'premise': 'I wanted to conserve energy.', 'choice1': 'I swept the floor in the unoccupied room.', 'choice2': 'I shut off the light in the unoccupied room.', 'question': 'effect', 'idx': 3, 'label': 1}
Answer : I turned off the lights when I left the room.
{'premise': 'The hamburger meat browned.', 'choice1': 'The cook froze 