In [1]:
import json
import ollama
import numpy as np
from sklearn.metrics import accuracy_score
import os
import pandas as pd
from evo_prompt import *

In [2]:
path = os.getcwd()
"""
ONLY USE ONCE TO CREATE TEST SUBSET

data_path = os.path.join(path, "COVID_tweets_dataset", "Corona_balanced.csv")
data = pd.read_csv(data_path)

test_set = pd.concat([
    data[data['Sentiment_float'] == sentiment].sample(n=100, random_state=42)
    for sentiment in [1, 2, 3, 4, 5]
])

test_set.to_csv(os.path.join(path, "COVID_tweets_dataset", "test_set.csv"), index=False)
"""

test_set = pd.read_csv(os.path.join(path, "COVID_tweets_dataset", "test_set.csv"))

In [3]:
def evaluation_function(prompt, dev_set, generated_prompt=False, return_invalid_samples=False):
    X = dev_set['OriginalTweet']
    y_true = dev_set['Sentiment_float']
    y_pred = []
    invalid_samples = 0

    for tweet in X:
        if generated_prompt:
            prompt_cont = prompt.replace("[INSERT TWEET HERE]", tweet)
        else:
            prompt_cont = prompt.replace("[TWEET]", tweet)

        model_response = ollama.chat(model="llama3.2", messages=[
            {
                'role': 'user',
                'content': prompt_cont,
            },
        ], options={
            'temperature': 0.7,
            'num_predict': 1,
            'repeat_penalty': 1,
            'top_k': 10,
            'top_p': 0.9
        })['message']['content']

        # Eventual mapping
        sentiment_mapping = {
            'Negative': 2,
            'Neutral': 3,
            'Positive': 4,
            'Extremely Negative': 1,
            'Extremely Positive': 5
        }

        if model_response in sentiment_mapping.keys():
            model_response = sentiment_mapping[model_response]
        else:
            try:
                model_response = int(model_response)
                if model_response < 1 or model_response > 5:
                    raise KeyError
            except:
                invalid_samples += 1
                model_response = np.random.randint(low=1, high=5)
        y_pred.append(model_response)

    accuracy = accuracy_score(y_true, y_pred)
    if return_invalid_samples:
        return accuracy, invalid_samples
    else:
        return accuracy

In [4]:
#Written prompts EvoPrompt tests

json_path = "prompts_examples.json"
with open(json_path, "r") as file:
    prompt_data = json.load(file)

prompts = []
for idx in range(1,5):
    prompts.append(prompt_data["written_prompts"]["prompt_" + str(idx)])
population_size = len(prompts)





evoprompt = EvoPrompt(initial_prompts = prompts,
                        dev_set = test_set,
                          evaluation_function=evaluation_function,
                           max_iterations = 10,
                            evolution_method= 'GA')
best_prompt = evoprompt.optimize(prompts_source="written")

print("Best prompt:", best_prompt)

For parents
: You are such a clever model in tasks like classification. Can you please classify this tweet: [TWEET] - based on its sentiment, into one of those 5 categories:

Extremely Negative
Negative
Neutral
Positive
Extremely Positive
Your response must be a single number between 1 and 5. Do not include any additional text.
Based on the given tweet that I provide you below, please classify the Sentiment of it on the scale of 1 (Extremely Negative) to 5 (Extremely Positive):
[TWEET]
Your response must be a single number between 1 and 5. Do not include any additional text.
Created child: prompt
Saved logs to logs_results_evoprompt/written_prompts_log_v3.txt
For parents
: Classify the sentiment of the following tweet into one of the 5 categories: 1 - Extremely Negative, 2 - Negative, 3 - Neutral, 4 - Positive, 5 - Extremely Positive. Tweet: [TWEET]
Your response must be a single number between 1 and 5. Do not include any additional text.
prompt
Created child: Classify the sentiment of

KeyboardInterrupt: 