In [None]:
!pip install -q cohere gdown tqdm

In [None]:
from cohere import Client, CohereAPIError
import os
import pandas as pd
from tqdm.notebook import tqdm

api_key = '...'
client = Client(api_key)

In [None]:
RAW_DATA_PATH = 'SE2024/train_gpt-4_raw_RAG.csv'
RESULT_CSV_PATH = "SE2024/train_gpt-4_RAG_summarized.csv"


# Cohere api limit safety

In [None]:
from time import sleep

class SafeCohere:
    def __init__(self, client):
        self.co = client
    
    def chat(self, prompt, **kw_args):
        while True:
            try:
                return self.co.chat(
                    prompt,
                    **kw_args
                )
            except CohereAPIError as e:
                print(f"Rate limit reached, waiting for 60 seconds: {e}")
                sleep(60)
    def summarize(self, text, **kw_args):
        while True:
            try:
                return self.co.summarize(
                    text,
                    **kw_args
                )
            except CohereAPIError as e:
                print(f"Rate limit reached, waiting for 60 seconds: {e}")
                sleep(60)
                
co = SafeCohere(client)

# Prepare data

In [None]:
os.makedirs("./SE2024", exist_ok=True)

In [None]:
if os.path.exists(RAW_DATA_PATH):
    print('Data file already exists')
else:
    print("Data doesn't exist, start download from the google drive...")
    !gdown 15VK8MaOEg2gF8iwmI4bummXt8whZF9Bq -O $RAW_DATA_PATH

In [None]:
data = pd.read_csv(RAW_DATA_PATH)

# Generate logical relation summary

In [None]:
data["in summary"] = None

itr = tqdm(data.iterrows(), total=len(data))
for i, row in itr:
    
    prediction = co.summarize(
        row['HYPOTHESIS'],
        model='command',
        length='short',
        extractiveness='high',
        temperature=0.0,
    ).summary
    data.loc[i, "in summary"] = prediction
    

# Save result

In [None]:
data.to_csv(RESULT_CSV_PATH, index=False)