In [54]:
# load dataset
import pandas as pd
# load from txt file
datapath = 'dataset/irony'
df = pd.read_csv(datapath+'/SemEval2018-T3-train-taskA.txt', sep='\t', header=None)
print(df.head())
labels = df[1]
texts = df[2]
# remove the first row in labels and texts
labels = labels[1:]
texts = texts[1:]
print(labels.head())
print(texts.head())

             0      1                                                  2
0  Tweet index  Label                                         Tweet text
1            1      1  Sweet United Nations video. Just in time for C...
2            2      1  @mrdahl87 We are rumored to have talked to Erv...
3            3      1  Hey there! Nice to see you Minnesota/ND Winter...
4            4      0                3 episodes left I'm dying over here
1    1
2    1
3    1
4    0
5    1
Name: 1, dtype: object
1    Sweet United Nations video. Just in time for C...
2    @mrdahl87 We are rumored to have talked to Erv...
3    Hey there! Nice to see you Minnesota/ND Winter...
4                  3 episodes left I'm dying over here
5    I can't breathe! was chosen as the most notabl...
Name: 2, dtype: object


In [58]:
import ollama

# Function to interact with the Ollama API
def chat_with_context(history):
    response = ollama.chat(model='llama3', messages=history)
    return response['message']['content']

# Initialize the chat history
chat_history = []

# Step 1: Generate Correctly Classified and Misclassified Samples
num_samples = 5
task_description = "We need to predict whether a given tweet is ironic or not. The labels are 'ironic' and 'not ironic'."
instruction = "Classify each tweet as either 'ironic' or 'not ironic'."

generate_samples_request = (
    f"As an advanced language model you should create {num_samples} samples for the task outlined below.\n"
    "Generate samples that are likely to be correctly classified as 'ironic' or 'not ironic' and samples that might be misclassified according to the task instructions.\n\n"
    f"### Task Description:\n{task_description}\n\n"
    f"### Task Instructions:\n{instruction}\n\n"
    "### Requirements for Samples:\n"
    "1. Each sample must present a unique and intricate challenge.\n"
    "2. The complexity of the samples should be such that simply applying the given task instruction would likely lead to incorrect or incomplete results for some samples.\n"
    "3. The samples should cover a diverse range of scenarios within the scope of the task, avoiding repetition and predictability.\n"
    "4. Ensure that the samples, while challenging, remain realistic and pertinent to the task's context.\n"
    "Generate the samples keeping these requirements in mind.\n###"
)

chat_history.append({'role': 'user', 'content': task_description})
chat_history.append({'role': 'user', 'content': generate_samples_request})

samples_response = chat_with_context(chat_history)
chat_history.append({'role': 'assistant', 'content': samples_response})
print("Generated Samples:")
print(samples_response)

# Step 2: Analyze Samples with Chain of Thought
analyze_samples_request = (
    f"Here are some samples: {samples_response}\nUsing chain of thought, analyze these samples "
    "and conclude a procedure for predicting whether a tweet is 'ironic' or 'not ironic'. Identify key characteristics of both correctly and incorrectly classified samples, capture the mistakes from failed cases, and conclude under what circumstances we should predict each label."
)

chat_history.append({'role': 'user', 'content': analyze_samples_request})

analysis_response = chat_with_context(chat_history)
chat_history.append({'role': 'assistant', 'content': analysis_response})
print("Analysis of Samples:")
print(analysis_response)

# Step 3: Generate Optimized Prompt
generate_prompt_request = (
    f"Based on the following analysis: {analysis_response}\nGenerate an optimized prompt for predicting "
    "whether a tweet is 'ironic' or 'not ironic'. Ensure the model responds only with 'ironic' or 'not ironic'.\n\n"
    "### Requirements for Optimized Prompt:\n"
    "1. The prompt must include a clear description of the task and the labels.\n"
    "2. It should provide criteria for classifying tweets as 'ironic' or 'not ironic' based on the analysis.\n"
    "3. The prompt must ensure that the model responds strictly with 'ironic' or 'not ironic'.\n"
    "4. The prompt should help the model avoid common pitfalls and misclassifications identified during the analysis.\n"
    "5. Ensure the language is unambiguous and tailored to maximize the model's prediction accuracy."
)

chat_history.append({'role': 'user', 'content': generate_prompt_request})

optimized_prompt_response = chat_with_context(chat_history)
chat_history.append({'role': 'assistant', 'content': optimized_prompt_response})
print("Optimized Prompt:")
print(optimized_prompt_response)


Generated Samples:
Here are five sample tweets that aim to challenge the task of predicting irony:

**Sample 1:**
"Just got my first speeding ticket ever! #blessed" (Ironic)
This tweet appears to be a straightforward statement, but the phrase "#blessed" is often used to express good fortune or happiness. The irony lies in the fact that getting a speeding ticket is not typically considered a blessing.

**Sample 2:**
"I'm so excited for my new job as an accountant! Numbers and spreadsheets all day long #dreamjob" (Not Ironic)
This tweet seems genuine, with the speaker expressing enthusiasm for their new role. The use of hashtags like "#dreamjob" reinforces this sentiment. However, the statement is not ironic, as it's a straightforward expression of joy.

**Sample 3:**
"Just won an award for 'Best Use of Excel' #winning" (Ironic)
This tweet might be misclassified at first glance, but the irony lies in the fact that winning an award for "Best Use of Excel" is not typically considered a pre

In [66]:

def get_prediction(text):
    prompt = (
        f"{optimized_prompt_response}\n\n"
        f"Tweet: {text}\n\n"
        "### Requirements:\n"
        "1. Respond with only a single-digit (0 for not ironic, 1 for ironic).\n"
        "2. Do not provide any additional text or explanation.\n"
        "Respond with only '0' or '1':"
    )
    response = ollama.generate(model='llama3', prompt=prompt)
    # Ensure the response is either '0' or '1'
    response_text = response['response'].strip()
    if '1' in response_text:
        return '1'
    else:
        return '0'

predictions = []
for i, text in enumerate(texts):
    if i % 100 == 0:
        print(f"Predicting tweet {i+1} out of {len(texts)}")
    # if i == 1000:
    #     break
    prediction = get_prediction(text)
    print(f"Prediction for tweet {i+1}: {prediction}")
    predictions.append(prediction)

# Step 6: Evaluate Model
from sklearn.metrics import accuracy_score
predictions = ["1" if "1" in p else "0" for p in predictions]
accuracy = accuracy_score(labels, predictions)
print(f"Model Accuracy: {accuracy}")

Predicting tweet 1 out of 3817
Prediction for tweet 1: 1
Prediction for tweet 2: 1
Prediction for tweet 3: 0
Prediction for tweet 4: 1
Prediction for tweet 5: 1
Prediction for tweet 6: 1
Prediction for tweet 7: 1
Prediction for tweet 8: 1
Prediction for tweet 9: 1
Prediction for tweet 10: 1
Prediction for tweet 11: 1
Prediction for tweet 12: 1
Prediction for tweet 13: 1
Prediction for tweet 14: 1
Prediction for tweet 15: 1
Prediction for tweet 16: 1
Prediction for tweet 17: 0
Prediction for tweet 18: 0
Prediction for tweet 19: 1
Prediction for tweet 20: 1
Prediction for tweet 21: 1
Prediction for tweet 22: 1
Prediction for tweet 23: 1
Prediction for tweet 24: 1
Prediction for tweet 25: 0
Prediction for tweet 26: 1
Prediction for tweet 27: 1
Prediction for tweet 28: 1
Prediction for tweet 29: 1
Prediction for tweet 30: 1
Prediction for tweet 31: 1
Prediction for tweet 32: 0
Prediction for tweet 33: 1
Prediction for tweet 34: 1
Prediction for tweet 35: 1
Prediction for tweet 36: 1
Predic

In [10]:
def get_prediction(text):
    response = ollama.generate(model='llama3', prompt=f"Predict if the tweet text is ironic or not: {text}. make sure to respond with only the prediction value (0 or 1)")
    return response['response']

predictions = []
for i, text in enumerate(texts):
    if i % 100 == 0:
        print(f"Predicting tweet {i+1} out of {len(texts)}")
    prediction = get_prediction(text)
    predictions.append(prediction)

# Step 6: Evaluate Model
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(labels, predictions)
print(f"Model Accuracy: {accuracy}")

Predicting tweet 1 out of 3818
Predicting tweet 101 out of 3818
Predicting tweet 201 out of 3818
Predicting tweet 301 out of 3818
Predicting tweet 401 out of 3818
Predicting tweet 501 out of 3818
Predicting tweet 601 out of 3818
Predicting tweet 701 out of 3818
Predicting tweet 801 out of 3818
Predicting tweet 901 out of 3818
Predicting tweet 1001 out of 3818
Predicting tweet 1101 out of 3818
Predicting tweet 1201 out of 3818
Predicting tweet 1301 out of 3818
Predicting tweet 1401 out of 3818
Predicting tweet 1501 out of 3818
Predicting tweet 1601 out of 3818
Predicting tweet 1701 out of 3818
Predicting tweet 1801 out of 3818
Predicting tweet 1901 out of 3818
Predicting tweet 2001 out of 3818
Predicting tweet 2101 out of 3818
Predicting tweet 2201 out of 3818
Predicting tweet 2301 out of 3818
Predicting tweet 2401 out of 3818
Predicting tweet 2501 out of 3818
Predicting tweet 2601 out of 3818
Predicting tweet 2701 out of 3818
Predicting tweet 2801 out of 3818
Predicting tweet 2901 out 