In [None]:
!pip install google-generativeai



In [None]:
import pathlib
import textwrap

import google.generativeai as genai

from IPython.display import display
from IPython.display import Markdown


def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Configure Gemini API
from google.colab import userdata
GOOGLE_API_KEY = userdata.get('gemini')
genai.configure(api_key=GOOGLE_API_KEY)
gemini_model = genai.GenerativeModel('gemini-pro')

In [None]:
run = 1

In [None]:
import json
import re
import google.generativeai as genai

saved_responses = []

# Function to normalize text for comparison
def normalize_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces with a single space
    return text.strip()

# Function to determine if the response is correct
def is_response_correct(response, item):
    correct_hypothesis = item['choice_id: ' + str(item['label'])]
    response_text = normalize_text(response)
    correct_hypothesis_normalized = normalize_text(correct_hypothesis)

    saved_responses.append({
            'index': item['index'],
            'premise': item['context'],
            'task': item['ask-for'],
            'hypothesis1': item['choice_id: 0'],
            'hypothesis2': item['choice_id: 1'],
            'hypothesis3': item['choice_id: 2'],
            'label': item['label'],
            'generated_response': response_text
        })

    if response_text == correct_hypothesis_normalized:
        return True

    if "hypothesis 1" in response_text or "hypothesis 2" in response_text or "hypothesis 3" in response_text:
        if "hypothesis 1" in response_text:
            hypothesis_number = 1
        elif"hypothesis 2" in response_text:
            hypothesis_number = 2
        else:
            hypothesis_number = 3
        correct_hypothesis_number = item['label'] + 1
        return hypothesis_number == correct_hypothesis_number

    key_phrases = correct_hypothesis_normalized.split()
    matching_phrases = [phrase for phrase in key_phrases if phrase in response_text]
    match_ratio = len(matching_phrases) / len(key_phrases)

    return match_ratio > 0.5

# Function to load the dataset from a JSON file
def load_copa_data(file_path, sample_size=100000):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)

    if sample_size >= len(data):
        return data

    return random.sample(data, sample_size)

def format_copa_query_for_gemini(item):
    premise = item['context']
    hypothesis1 = item['choice_id: 0']
    hypothesis2 = item['choice_id: 1']
    hypothesis3 = item['choice_id: 2']
    ask_for = item['ask-for']
    task = "cause" if ask_for == "cause" else "effect"
#     return f"premise: {premise} hypothesis 1: {hypothesis1} hypothesis 2: {hypothesis2} task: {task}"
    return f"Given the premise: {premise}; what is the {task}? Is it hypothesis 1: {hypothesis1}; OR hypothesis 2: {hypothesis2}; OR hypothesis 3: {hypothesis3}? Pick one and do not explain."

# Function to query the Gemini model
def query_gemini(query, model):
    chat = model.start_chat(history=[])
    response = chat.send_message(query)
    return response.text

# Load the COPA data
copa_data = load_copa_data('/content/drive/MyDrive/Datasets/causalnetcd.json')

In [None]:
import random
import time
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


def compute_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted')
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    return [accuracy, f1, precision, recall]


responses_file = '/content/drive/MyDrive/Datasets/causalnetcd_responses_' + str(run) + '.jsonl'
# Function to evaluate accuracy with Gemini
def evaluate_accuracy_with_gemini(data, model):
    correct_count = 0
    total_items = len(data)
    skipped_items = 0

    try:
        with open(responses_file, 'r', encoding='utf-8') as file:
            saved_responses = [json.loads(line) for line in file]
        print("Loaded saved responses.")
        return saved_responses
    except FileNotFoundError:
        for index, item in enumerate(data):
            try:
                query = format_copa_query_for_gemini(item)
                print(f"\nProcessing item {index + 1}/{total_items}...")

                response = query_gemini(query, model)
                print(f"Generated Response: {response}")

                correct_hypothesis = item['choice_id: ' + str(item['label'])]
                print(f"Correct Hypothesis: Hypothesis: {item['label'] + 1}, {correct_hypothesis}")
                acts.append(item['label'])

                is_correct = is_response_correct(response, item)

                if is_correct:
                    print(f"Item {index + 1}: Correct")
                    correct_count += 1
                    preds.append(item['label'])
                else:
                    print(f"Item {index + 1}: Incorrect")
                    preds.append(0 if item['label'] else 1)


            except Exception as e:
                print(f"Item {index + 1} skipped due to internal server error: {e}")
                skipped_items += 1
                time.sleep(5)

    # Adjust total_items to exclude skipped items for accuracy calculation
    total_items -= skipped_items
    accuracy = correct_count / total_items if total_items > 0 else 0
    print(f"\nProcessed Items: {total_items}, Skipped Items: {skipped_items}")
    print(f"Accuracy: {accuracy * 100:.2f}%")
    return accuracy


# Evaluate accuracy with Gemini
acts, preds = [], []  # acts: ground truth
accuracy = evaluate_accuracy_with_gemini(copa_data, gemini_model)
print(accuracy)
print(f"Accuracy with Gemini: {accuracy * 100:.3f}%")

with open(responses_file, 'w', encoding='utf-8') as file:
        for response in saved_responses:
            file.write(json.dumps(response) + '\n')
        print("Responses saved.")

sklearn_accuracy, f1val, precisionval, recallval = compute_metrics(acts, preds)
print(f"Accuracy: {accuracy * 100:.3f}%")
print(f"sklearn Accuracy: {sklearn_accuracy * 100:.3f}%")
print(f"F1: {f1val * 100:.3f}%")
print(f"Precision: {precisionval * 100:.3f}%")
print(f"Recall: {recallval * 100:.3f}%")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Correct Hypothesis: Hypothesis: 1, The recycling program might have inadvertently encouraged more consumption, leading to more waste.
Item 6: Correct

Processing item 7/1000...
Generated Response: Hypothesis 3: The financial security provided by UBI encouraged entrepreneurial ventures.
Correct Hypothesis: Hypothesis: 3, The financial security provided by UBI encouraged entrepreneurial ventures.
Item 7: Correct

Processing item 8/1000...
Generated Response: Hypothesis 3: The increase in green spaces indirectly reduced pollution by promoting a cleaner urban environment.
Correct Hypothesis: Hypothesis: 3, The increase in green spaces indirectly reduced pollution by promoting a cleaner urban environment.
Item 8: Correct

Processing item 9/1000...
Generated Response: Hypothesis 3: Renewable energy projects created new job opportunities, contributing to lower unemployment.
Correct Hypothesis: Hypothesis: 3, Renewable energy pro