In [1]:
import pandas as pd
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import os
from tqdm import tqdm

In [2]:
def setup_flan_t5():
    device = "cuda:0"  # Change as needed based on your GPU setup
    model_id = "google/flan-t5-large"
    model = AutoModelForSeq2SeqLM.from_pretrained(model_id).to(device)  # Ensure the model is on GPU
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    return model, tokenizer, device

In [3]:
model, tokenizer, device = setup_flan_t5()



In [4]:
inputs = tokenizer("A step by step recipe to make bolognese pasta:", return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_length=120)
print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])

Toss the pasta with the sauce, then add the meat and toss again.


In [5]:
def get_completion(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(**inputs, max_length=120)
    return tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    
# Example usage
prompt = "do you know incontext learning?"
output_sentence = get_completion(prompt)
print(output_sentence if output_sentence else "Prompt was skipped or an error occurred.")


# from openai import OpenAI
# client = OpenAI()

# completion = client.chat.completions.create(
#   model="gpt-4-turbo-2024-04-09",
#   messages=[
#     {"role": "system", "content": "You are a helpful assistant."},
#     {"role": "user", "content": "Hello!"}
#   ]
# )

# print(completion.choices[0].message)

context learning


In [6]:
def load_examples(file_path):
    with open(file_path, 'r') as file:
        examples = file.read().strip()
    return examples

In [23]:
# def generate_data(csv_input, csv_output, examples_file):
#     # Load the examples for few-shot learning
#     examples = load_examples(examples_file)
#     # Load the CSV file
#     goldstandard = pd.read_csv(csv_input)


#     # ------------------------------test
#     # goldstandard = goldstandard.head(5) 

    
#     # Initialize the output DataFrame
#     generated_data = []

#     # Process each row in the DataFrame
#     for idx, row in tqdm(goldstandard.iterrows(), total=goldstandard.shape[0], desc="Generating Data"):
#         prompt = f"{examples}\n\n" \
#                  f"INSTRUCTION: Write a sentence with the semantic meaning the same as [{row['#CUE_COLUMN']}], " \
#                  f"use the template [{row['FRAGMENT_COLUMN']}], the sentence should be euphemistic not explicit, " \
#                  f"the sentence should be [{row['LABEL_COLUMN']}]." \
#                  f"\n\n OUTPUT:"
#         generated_sentence = get_completion(prompt)
#         generated_data.append({
#             '#CUE_COLUMN': row['#CUE_COLUMN'],
#             'FRAGMENT_COLUMN': row['FRAGMENT_COLUMN'],
#             'INSTANCE_COLUMN': generated_sentence,
#             'LABEL_COLUMN': row['LABEL_COLUMN']
#         })

#     # Create a DataFrame from the generated data
#     new_df = pd.DataFrame(generated_data)
#     new_df.to_csv(output_csv, index=False)
#     print("Data generation complete and saved to", csv_output)



def generate_data(dataset_csv, examples_file, output_csv):
    # Read the dataset CSV
    df = pd.read_csv(dataset_csv)
    # df = df.head(5)

    examples = load_examples(examples_file)
    # Initialize lists to store explanations and predictions
    
    
    explanations = []
    predictions = []
    
    # Iterate over each row in the dataset
    for index, row in tqdm(df.iterrows(), total=len(df), desc="Generating predictions"):
        # Extract the text from the row
        text = row['text']
        
        # Generate the completion using the provided prompt and get_completion function
        prompt = f"{examples}\n\n" \
                 f"Please follow the INSTRUCTION and think step by step: given a SENTENCE, first output an explanation about whether you think the given SENTENCE is labeled as abuse, then output the label PREDICTION\n\nINSTRUCTION: Think step by step: given the following SENTENCE, first output an explanation about whether you think the given SENTENCE is labeled as abuse, then output the label PREDICTION\nSENTENCE: {text}\nEXPLANATION: \nPREDICTION: "
        completion = get_completion(prompt)
        
        # Extract the explanation and prediction from the completion
        explanation = completion.split("EXPLANATION: ")[-1].split("PREDICTION:")[0].strip()
        prediction = completion.split("PREDICTION:")[-1].strip()

        if prediction == "NONE":
            prediction = "NONE ABUSE"
        
        # Append the explanation and prediction to their respective lists
        explanations.append(explanation)
        predictions.append(prediction)
    
    # Add explanations and predictions as columns to the DataFrame
    df['explanations'] = explanations
    df['predictions'] = predictions
    display(df)
    # Save the DataFrame with explanations and predictions
    df.to_csv(output_csv, index=False)

In [24]:
dataset_csv = '../../dataset/train_dev/test_set.csv'
# output_csv = './flant5_large_cot_prediction_test.csv'
output_csv = './flant5_large_cot_prediction.csv'
examples_file = 'cot_FewShot_prompt.txt'

In [25]:
generate_data(dataset_csv, examples_file, output_csv)

Generating predictions:   0%|          | 0/360 [00:00<?, ?it/s]Generating predictions:   0%|          | 1/360 [00:00<01:28,  4.08it/s]Generating predictions:   1%|          | 2/360 [00:00<01:23,  4.30it/s]Generating predictions:   1%|          | 3/360 [00:00<01:21,  4.36it/s]Generating predictions:   1%|          | 4/360 [00:00<01:21,  4.36it/s]Generating predictions:   1%|▏         | 5/360 [00:01<01:20,  4.41it/s]Generating predictions:   2%|▏         | 6/360 [00:01<01:19,  4.43it/s]Generating predictions:   2%|▏         | 7/360 [00:01<01:22,  4.28it/s]Generating predictions:   2%|▏         | 8/360 [00:01<01:21,  4.34it/s]Generating predictions:   2%|▎         | 9/360 [00:02<01:26,  4.07it/s]Generating predictions:   3%|▎         | 10/360 [00:02<01:23,  4.19it/s]Generating predictions:   3%|▎         | 11/360 [00:02<01:21,  4.27it/s]Generating predictions:   3%|▎         | 12/360 [00:02<01:20,  4.32it/s]Generating predictions:   4%|▎         | 13/360 [00:03<01:19,  4.36it

Unnamed: 0,inst_id,text,labels,explanations,predictions
0,1,You take things that don't belong to you.,ABUSE,ABUSE,ABUSE
1,2,I take it you don't want to be characterised i...,NONE ABUSE,NONE,NONE ABUSE
2,3,You are so imaginative at what could go wrong.,NONE ABUSE,NONE,NONE ABUSE
3,4,You need to act your age.,ABUSE,NONE,NONE ABUSE
4,5,I think you might be challenged vertically.,ABUSE,NONE,NONE ABUSE
...,...,...,...,...,...
355,356,"You really carry on talking, regardless of the...",NONE ABUSE,NONE,NONE ABUSE
356,357,You are the reason I have decided to spend mor...,NONE ABUSE,NONE,NONE ABUSE
357,358,You have always given me amazing vibes.,NONE ABUSE,NONE,NONE ABUSE
358,359,It's great that no one notices you when you sh...,NONE ABUSE,NONE,NONE ABUSE


In [26]:
from sklearn.metrics import classification_report

def evaluate(predictions_path):
    # Load the predictions dataframe
    predictions_df = pd.read_csv(predictions_path)
    
    # Extract true labels and predicted labels
    true_labels = predictions_df['labels']
    predicted_labels = predictions_df['predictions']
    
    # Generate classification report
    report = classification_report(true_labels, predicted_labels, output_dict=True)
    
    # Extract precision, recall, and F1 score for each class
    precision = report['weighted avg']['precision']
    recall = report['weighted avg']['recall']
    f1_score = report['weighted avg']['f1-score']
    
    # Print evaluation metrics
    print("Evaluation Metrics:")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1_score:.4f}")

evaluate("flant5_large_cot_prediction.csv")

Evaluation Metrics:
Precision: 0.6356
Recall: 0.6278
F1 Score: 0.6161


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
