# Commonsense Causal Reasoning Using Prompts

In [172]:
import numpy as np
from datasets import load_dataset, load_metric

import time
import os
import openai
from sklearn.metrics import classification_report

openai.api_key_path="api_key.txt"

## Load COPA Dataset

In [2]:
copa = load_dataset("super_glue", "copa")

Found cached dataset super_glue (C:/Users/gazer/.cache/huggingface/datasets/super_glue/copa/1.0.3/bb9675f958ebfee0d5d6dc5476fafe38c79123727a7258d515c450873dbdbbed)
100%|██████████| 3/3 [00:00<00:00, 601.59it/s]


In [3]:
# See one example
copa["train"][0]

{'premise': 'My body cast a shadow over the grass.',
 'choice1': 'The sun was rising.',
 'choice2': 'The grass was cut.',
 'question': 'cause',
 'idx': 0,
 'label': 0}

In [4]:
# Data size
print(len(copa["train"]))
print(len(copa["validation"]))
print(len(copa["test"]))

400
100
500


## Baseline

using the whole set directly as a prompt

In [11]:
def generate_prompt_base(example):
    return """ Identify the correct response from two sentences.
    
    Premise: {}
    Choice1: {}
    Choice2: {}
    Question: {}
    Answer:
    """.format(
        example['premise'],
        example['choice1'],
        example['choice2'],
        example['question'],
    )  

In [104]:
def index(example):
    response = openai.Completion.create(
        model="text-davinci-003",
        prompt=generate_prompt_base(example),
        temperature=0.7,
        max_tokens=256,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0
    )
    return response

In [153]:
def generate_pred_label(choices, response):
    ans = response.choices[0].text
    ans = ans.replace("Choice1:", "")
    ans = ans.replace("Choice2:", "")
    ans = ans.strip()
    
    if (ans==choices[0]):
        return 0, ans
    elif (ans==choices[1]):
        return 1, ans
    else: 
        return -1, ans

In [165]:
def test(examples, sleep_time=0.5):
    labels = np.array(examples['label'])
    premises = examples['premise']
    choice1s = examples['choice1']
    choice2s = examples['choice2']
    questions = examples['question']
    y_pred = np.zeros(len(labels))
    
    anss = []
    responses = []
    for i in range(len(labels)):
        example = { 
            'premise': premises[i],
            'choice1': choice1s[i],
            'choice2':choice2s[i],
            'question': questions[i],
        }
        res = index(example)
        choices = [choice1s[i], choice2s[i]]
        y_pred[i], ans = generate_pred_label(choices, res)
        time.sleep(sleep_time)
        responses.append(res)
        anss.append(ans)
    return y_pred, labels, responses, anss



In [154]:
y_pred, y_true, responses, anss= test(copa['validation'][:5])

In [158]:
def show_example_response(r, example, ans):
    print(example)
    print(r.choices[0].text)
    print("Answer :", ans)

In [159]:
for i in range(5):
    show_example_response(responses[i], copa['validation'][i], anss[i])
    print('\n\n')

{'premise': 'The man turned on the faucet.', 'choice1': 'The toilet filled with water.', 'choice2': 'Water flowed from the spout.', 'question': 'effect', 'idx': 0, 'label': 1}

Choice2: Water flowed from the spout.
Answer : Water flowed from the spout.



{'premise': 'The girl found a bug in her cereal.', 'choice1': 'She poured milk in the bowl.', 'choice2': 'She lost her appetite.', 'question': 'effect', 'idx': 1, 'label': 1}
 She lost her appetite.
Answer : She lost her appetite.



{'premise': 'The woman retired.', 'choice1': 'She received her pension.', 'choice2': 'She paid off her mortgage.', 'question': 'effect', 'idx': 2, 'label': 0}
 She received her pension.
Answer : She received her pension.



{'premise': 'I wanted to conserve energy.', 'choice1': 'I swept the floor in the unoccupied room.', 'choice2': 'I shut off the light in the unoccupied room.', 'question': 'effect', 'idx': 3, 'label': 1}
 I shut off the light in the unoccupied room.
Answer : I shut off the light in the 

### baseline on validation set 

In [170]:
val_pred = np.zeros(len(copa['validation']))
val_true = np.zeros(len(copa['validation']))
val_responses = []
val_ans = []
for i in range(5):
    a = i*20
    b = (i+1)*20
    pred, true, responses, anss= test(copa['validation'][a:b], sleep_time = 1)
    val_pred[a:b] = pred
    val_true[a:b] = true
    val_responses.append(responses)
    val_ans.append(anss)
    time.sleep(10)

In [171]:
print(classification_report(val_true, val_pred))

              precision    recall  f1-score   support

         0.0       0.90      0.96      0.93        55
         1.0       0.95      0.87      0.91        45

    accuracy                           0.92       100
   macro avg       0.92      0.92      0.92       100
weighted avg       0.92      0.92      0.92       100

