In [54]:
# load dataset
import pandas as pd
# load from txt file
datapath = 'dataset/irony'
df = pd.read_csv(datapath+'/SemEval2018-T3-train-taskA.txt', sep='\t', header=None)
print(df.head())
labels = df[1]
texts = df[2]
# remove the first row in labels and texts
labels = labels[1:]
texts = texts[1:]
print(labels.head())
print(texts.head())

             0      1                                                  2
0  Tweet index  Label                                         Tweet text
1            1      1  Sweet United Nations video. Just in time for C...
2            2      1  @mrdahl87 We are rumored to have talked to Erv...
3            3      1  Hey there! Nice to see you Minnesota/ND Winter...
4            4      0                3 episodes left I'm dying over here
1    1
2    1
3    1
4    0
5    1
Name: 1, dtype: object
1    Sweet United Nations video. Just in time for C...
2    @mrdahl87 We are rumored to have talked to Erv...
3    Hey there! Nice to see you Minnesota/ND Winter...
4                  3 episodes left I'm dying over here
5    I can't breathe! was chosen as the most notabl...
Name: 2, dtype: object


# learn from various samples

In [58]:
import ollama

# Function to interact with the Ollama API
def chat_with_context(history):
    response = ollama.chat(model='llama3', messages=history)
    return response['message']['content']

# Initialize the chat history
chat_history = []

# Step 1: Generate Correctly Classified and Misclassified Samples
num_samples = 5
task_description = "We need to predict whether a given tweet is ironic or not. The labels are 'ironic' and 'not ironic'."
instruction = "Classify each tweet as either 'ironic' or 'not ironic'."

generate_samples_request = (
    f"As an advanced language model you should create {num_samples} samples for the task outlined below.\n"
    "Generate samples that are likely to be correctly classified as 'ironic' or 'not ironic' and samples that might be misclassified according to the task instructions.\n\n"
    f"### Task Description:\n{task_description}\n\n"
    f"### Task Instructions:\n{instruction}\n\n"
    "### Requirements for Samples:\n"
    "1. Each sample must present a unique and intricate challenge.\n"
    "2. The complexity of the samples should be such that simply applying the given task instruction would likely lead to incorrect or incomplete results for some samples.\n"
    "3. The samples should cover a diverse range of scenarios within the scope of the task, avoiding repetition and predictability.\n"
    "4. Ensure that the samples, while challenging, remain realistic and pertinent to the task's context.\n"
    "Generate the samples keeping these requirements in mind.\n###"
)

chat_history.append({'role': 'user', 'content': task_description})
chat_history.append({'role': 'user', 'content': generate_samples_request})

samples_response = chat_with_context(chat_history)
chat_history.append({'role': 'assistant', 'content': samples_response})
print("Generated Samples:")
print(samples_response)

# Step 2: Analyze Samples with Chain of Thought
analyze_samples_request = (
    f"Here are some samples: {samples_response}\nUsing chain of thought, analyze these samples "
    "and conclude a procedure for predicting whether a tweet is 'ironic' or 'not ironic'. Identify key characteristics of both correctly and incorrectly classified samples, capture the mistakes from failed cases, and conclude under what circumstances we should predict each label."
)

chat_history.append({'role': 'user', 'content': analyze_samples_request})

analysis_response = chat_with_context(chat_history)
chat_history.append({'role': 'assistant', 'content': analysis_response})
print("Analysis of Samples:")
print(analysis_response)

# Step 3: Generate Optimized Prompt
generate_prompt_request = (
    f"Based on the following analysis: {analysis_response}\nGenerate an optimized prompt for predicting "
    "whether a tweet is 'ironic' or 'not ironic'. Ensure the model responds only with 'ironic' or 'not ironic'.\n\n"
    "### Requirements for Optimized Prompt:\n"
    "1. The prompt must include a clear description of the task and the labels.\n"
    "2. It should provide criteria for classifying tweets as 'ironic' or 'not ironic' based on the analysis.\n"
    "3. The prompt must ensure that the model responds strictly with 'ironic' or 'not ironic'.\n"
    "4. The prompt should help the model avoid common pitfalls and misclassifications identified during the analysis.\n"
    "5. Ensure the language is unambiguous and tailored to maximize the model's prediction accuracy."
)

chat_history.append({'role': 'user', 'content': generate_prompt_request})

optimized_prompt_response = chat_with_context(chat_history)
chat_history.append({'role': 'assistant', 'content': optimized_prompt_response})
print("Optimized Prompt:")
print(optimized_prompt_response)


Generated Samples:
Here are five sample tweets that aim to challenge the task of predicting irony:

**Sample 1:**
"Just got my first speeding ticket ever! #blessed" (Ironic)
This tweet appears to be a straightforward statement, but the phrase "#blessed" is often used to express good fortune or happiness. The irony lies in the fact that getting a speeding ticket is not typically considered a blessing.

**Sample 2:**
"I'm so excited for my new job as an accountant! Numbers and spreadsheets all day long #dreamjob" (Not Ironic)
This tweet seems genuine, with the speaker expressing enthusiasm for their new role. The use of hashtags like "#dreamjob" reinforces this sentiment. However, the statement is not ironic, as it's a straightforward expression of joy.

**Sample 3:**
"Just won an award for 'Best Use of Excel' #winning" (Ironic)
This tweet might be misclassified at first glance, but the irony lies in the fact that winning an award for "Best Use of Excel" is not typically considered a pre

In [66]:
def get_prediction(text):
    prompt = (
        f"{optimized_prompt_response}\n\n"
        f"Tweet: {text}\n\n"
        "### Requirements:\n"
        "1. Respond with only a single-digit (0 for not ironic, 1 for ironic).\n"
        "2. Do not provide any additional text or explanation.\n"
        "Respond with only '0' or '1':"
    )
    response = ollama.generate(model='llama3', prompt=prompt)
    # Ensure the response is either '0' or '1'
    response_text = response['response'].strip()
    if '1' in response_text:
        return '1'
    else:
        return '0'

predictions = []
for i, text in enumerate(texts):
    if i % 1000 == 0:
        print(f"Predicting tweet {i+1} out of {len(texts)}")
    # if i == 1000:
    #     break
    prediction = get_prediction(text)
    predictions.append(prediction)

# Step 6: Evaluate Model
from sklearn.metrics import accuracy_score
predictions = ["1" if "1" in p else "0" for p in predictions]
accuracy = accuracy_score(labels, predictions)
print(f"Model Accuracy: {accuracy}")

Predicting tweet 1 out of 3817
Prediction for tweet 1: 1
Prediction for tweet 2: 1
Prediction for tweet 3: 0
Prediction for tweet 4: 1
Prediction for tweet 5: 1
Prediction for tweet 6: 1
Prediction for tweet 7: 1
Prediction for tweet 8: 1
Prediction for tweet 9: 1
Prediction for tweet 10: 1
Prediction for tweet 11: 1
Prediction for tweet 12: 1
Prediction for tweet 13: 1
Prediction for tweet 14: 1
Prediction for tweet 15: 1
Prediction for tweet 16: 1
Prediction for tweet 17: 0
Prediction for tweet 18: 0
Prediction for tweet 19: 1
Prediction for tweet 20: 1
Prediction for tweet 21: 1
Prediction for tweet 22: 1
Prediction for tweet 23: 1
Prediction for tweet 24: 1
Prediction for tweet 25: 0
Prediction for tweet 26: 1
Prediction for tweet 27: 1
Prediction for tweet 28: 1
Prediction for tweet 29: 1
Prediction for tweet 30: 1
Prediction for tweet 31: 1
Prediction for tweet 32: 0
Prediction for tweet 33: 1
Prediction for tweet 34: 1
Prediction for tweet 35: 1
Prediction for tweet 36: 1
Predic

# With zero shot COT

In [77]:
def get_prediction(text):
    prompt = (
        "Predict whether the following tweet is ironic or not:\n\n"
        "Let's think step by step"
        f"Tweet: {text}\n\n"
        "### Requirements:\n"
        "1. Respond with only a single-digit (0 for not ironic, 1 for ironic).\n"
        "2. Do not provide any additional text or explanation.\n"
        "Respond with only '0' or '1':"
    )
    response = ollama.generate(model='llama3', prompt=prompt)
    # Ensure the response is either '0' or '1'
    response_text = response['response'].strip()
    if '1' in response_text:
        return '1'
    else:
        return '0'

predictions = []
for i, text in enumerate(texts):
    if i % 1000 == 0:
        print(f"Predicting tweet {i+1} out of {len(texts)}")
    # if i == 1000:
    #     break
    prediction = get_prediction(text)
    predictions.append(prediction)

# Step 6: Evaluate Model
from sklearn.metrics import accuracy_score
predictions = ["1" if "1" in p else "0" for p in predictions]
accuracy = accuracy_score(labels, predictions)
print(f"Model Accuracy: {accuracy}")

Predicting tweet 1 out of 3817
Prediction for tweet 1: 1
Prediction for tweet 2: 1
Prediction for tweet 3: 0
Prediction for tweet 4: 1
Prediction for tweet 5: 1
Prediction for tweet 6: 1
Prediction for tweet 7: 1
Prediction for tweet 8: 1
Prediction for tweet 9: 1
Prediction for tweet 10: 1
Prediction for tweet 11: 1
Prediction for tweet 12: 1
Prediction for tweet 13: 1
Prediction for tweet 14: 1
Prediction for tweet 15: 1
Prediction for tweet 16: 1
Prediction for tweet 17: 1
Prediction for tweet 18: 1
Prediction for tweet 19: 1
Prediction for tweet 20: 1
Prediction for tweet 21: 1
Prediction for tweet 22: 1
Prediction for tweet 23: 1
Prediction for tweet 24: 1
Prediction for tweet 25: 1
Prediction for tweet 26: 1
Prediction for tweet 27: 1
Prediction for tweet 28: 1
Prediction for tweet 29: 1
Prediction for tweet 30: 1
Prediction for tweet 31: 1
Prediction for tweet 32: 1
Prediction for tweet 33: 1
Prediction for tweet 34: 1
Prediction for tweet 35: 1
Prediction for tweet 36: 1
Predic

# learn only negative samples

In [73]:
import ollama
from sklearn.metrics import accuracy_score

# Function to interact with the Ollama API
def chat_with_context(history):
    response = ollama.chat(model='llama3', messages=history)
    return response['message']['content']

# Initialize the chat history
chat_history = []

# Step 1: Define Task Description
task_description = "We need to predict whether a given tweet is ironic or not. The labels are 'ironic' and 'not ironic'."
chat_history.append({'role': 'user', 'content': task_description})

# Step 2: Generate Misleading Samples
num_samples = 5
generate_misleading_samples_request = (
    f"As an advanced language model, generate {num_samples} misleading samples for the task outlined below.\n"
    "For each sample, provide the likely mispredicted label ('ironic' or 'not ironic').\n\n"
    f"### Task Description:\n{task_description}\n\n"
    "### Requirements for Misleading Samples:\n"
    "1. Each sample must be challenging and likely to cause mispredictions.\n"
    "2. Include a likely mispredicted label for each sample.\n"
    "3. Ensure the samples are realistic and relevant to the task.\n"
    "Generate the misleading samples keeping these requirements in mind.\n###"
)

chat_history.append({'role': 'user', 'content': generate_misleading_samples_request})

misleading_samples_response = chat_with_context(chat_history)
chat_history.append({'role': 'assistant', 'content': misleading_samples_response})
print("Generated Misleading Samples:")
print(misleading_samples_response)

# Step 3: Analyze Misleading Samples
analyze_misleading_samples_request = (
    f"Here are some misleading samples: {misleading_samples_response}\nUsing a step-by-step approach, analyze these samples "
    "to understand why the mispredictions occur. Conclude the findings in a few bullet points."
)

chat_history.append({'role': 'user', 'content': analyze_misleading_samples_request})

analysis_response = chat_with_context(chat_history)
chat_history.append({'role': 'assistant', 'content': analysis_response})
print("Analysis of Misleading Samples:")
print(analysis_response)

# Step 4: Generate Optimized Prompt
generate_prompt_request = (
    f"Based on the following analysis: {analysis_response}\nGenerate an optimized prompt for predicting "
    "whether a tweet is 'ironic' or 'not ironic'. Ensure the model responds only with 'ironic' or 'not ironic'.\n\n"
    "### Requirements for Optimized Prompt:\n"
    "1. The prompt must include a clear description of the task and the labels.\n"
    "2. It should provide criteria for classifying tweets as 'ironic' or 'not ironic' based on the analysis.\n"
    "3. The prompt must ensure that the model responds strictly with 'ironic' or 'not ironic'.\n"
    "4. The prompt should help the model avoid common pitfalls and misclassifications identified during the analysis.\n"
    "5. Ensure the language is unambiguous and tailored to maximize the model's prediction accuracy."
)

chat_history.append({'role': 'user', 'content': generate_prompt_request})

optimized_prompt_response = chat_with_context(chat_history)
chat_history.append({'role': 'assistant', 'content': optimized_prompt_response})
print("Optimized Prompt:")
print(optimized_prompt_response)

Generated Misleading Samples:
Here are 5 misleading samples for the task:

**Sample 1:**
"I'm so excited to be stuck in this traffic jam on my way home from work! #commuterlife"
Likely mispredicted label: **'ironic'** (but it's actually just frustrated)

This sample appears ironic because of the juxtaposition of excitement and a negative situation, but it's actually just a frustrated tweet.

**Sample 2:**
"Just got back from the most amazing vacation in Hawaii! The sun, sand, and surf were incredible. Can't wait to go back again next year #paradisefound"
Likely mispredicted label: **'ironic'** (but it's actually just a genuine review)

This sample seems ironic because of its over-the-top enthusiasm, but it's actually just a genuine review of a wonderful vacation.

**Sample 3:**
"Just tried the new vegan restaurant downtown and it was a total disaster. The 'meatball' tasted like cardboard. #veganfail"
Likely mispredicted label: **'ironic'** (but it's actually just a negative review)

Th

In [76]:
# Example of using the optimized prompt for prediction
def get_prediction(text):
    prompt = (
        f"{optimized_prompt_response}\n\n"
        f"Tweet: {text}\n\n"
        "### Requirements:\n"
        "1. Respond with only a single-digit (0 for not ironic, 1 for ironic).\n"
        "2. Do not provide any additional text or explanation.\n"
        "Respond with only '0' or '1':"
    )
    response = ollama.generate(model='llama3', prompt=prompt)
    # Ensure the response is either '0' or '1'
    response_text = response['response'].strip()
    if '1' in response_text:
        return '1'
    else:
        return '0'

# Make predictions
predictions = []
for i, text in enumerate(texts):
    if i % 1000 == 0:
        print(f"Predicting text {i+1} out of {len(texts)}")
    # if i == 1000:
    #     break
    prediction = get_prediction(text)
    predictions.append(prediction)

# Evaluate Model
predictions = ["1" if "1" in p else "0" for p in predictions]
accuracy = accuracy_score(labels, predictions)
print(f"Model Accuracy: {accuracy}")

Predicting text 1 out of 3817
Prediction for text 1: 1
Prediction for text 2: 0
Prediction for text 3: 0
Prediction for text 4: 0
Prediction for text 5: 0
Prediction for text 6: 1
Prediction for text 7: 1
Prediction for text 8: 0
Prediction for text 9: 0
Prediction for text 10: 0
Prediction for text 11: 1
Prediction for text 12: 1
Prediction for text 13: 0
Prediction for text 14: 0
Prediction for text 15: 1
Prediction for text 16: 1
Prediction for text 17: 0
Prediction for text 18: 0
Prediction for text 19: 0
Prediction for text 20: 1
Prediction for text 21: 1
Prediction for text 22: 1
Prediction for text 23: 0
Prediction for text 24: 1
Prediction for text 25: 0
Prediction for text 26: 1
Prediction for text 27: 1
Prediction for text 28: 1
Prediction for text 29: 0
Prediction for text 30: 1
Prediction for text 31: 0
Prediction for text 32: 0
Prediction for text 33: 0
Prediction for text 34: 0
Prediction for text 35: 1
Prediction for text 36: 1
Prediction for text 37: 0
Prediction for te

# with basic prompt

In [10]:
def get_prediction(text):
    response = ollama.generate(model='llama3', prompt=f"Predict if the tweet text is ironic or not: {text}. make sure to respond with only the prediction value (0 or 1)")
    return response['response']

predictions = []
for i, text in enumerate(texts):
    if i % 1000 == 0:
        print(f"Predicting tweet {i+1} out of {len(texts)}")
    prediction = get_prediction(text)
    predictions.append(prediction)

# Step 6: Evaluate Model
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(labels, predictions)
print(f"Model Accuracy: {accuracy}")

Predicting tweet 1 out of 3818
Predicting tweet 101 out of 3818
Predicting tweet 201 out of 3818
Predicting tweet 301 out of 3818
Predicting tweet 401 out of 3818
Predicting tweet 501 out of 3818
Predicting tweet 601 out of 3818
Predicting tweet 701 out of 3818
Predicting tweet 801 out of 3818
Predicting tweet 901 out of 3818
Predicting tweet 1001 out of 3818
Predicting tweet 1101 out of 3818
Predicting tweet 1201 out of 3818
Predicting tweet 1301 out of 3818
Predicting tweet 1401 out of 3818
Predicting tweet 1501 out of 3818
Predicting tweet 1601 out of 3818
Predicting tweet 1701 out of 3818
Predicting tweet 1801 out of 3818
Predicting tweet 1901 out of 3818
Predicting tweet 2001 out of 3818
Predicting tweet 2101 out of 3818
Predicting tweet 2201 out of 3818
Predicting tweet 2301 out of 3818
Predicting tweet 2401 out of 3818
Predicting tweet 2501 out of 3818
Predicting tweet 2601 out of 3818
Predicting tweet 2701 out of 3818
Predicting tweet 2801 out of 3818
Predicting tweet 2901 out 

# few shot prompt

In [84]:
import ollama
import pandas as pd
from sklearn.metrics import accuracy_score

# Load the dataset
datapath = 'dataset/irony'
df = pd.read_csv(datapath+'/SemEval2018-T3-train-taskA.txt', sep='\t', header=None)
labels = df[1][1:]
texts = df[2][1:]

# construct few shot learning task
few_shot_text = texts[:3]
few_shot_labels = labels[:3]

# use the first few samples as few shot learning examples
prompt = (f"Here are the first few tweets in the dataset:\n\n1. {few_shot_text.iloc[0]} label:{few_shot_labels.iloc[0]}\n2. {few_shot_text.iloc[1]} label:{few_shot_labels.iloc[1]}\n3. {few_shot_text.iloc[2]} label:{few_shot_labels.iloc[2]}\n\n"
          "Based on these examples, predict whether the following tweet is ironic or not:"
          "### Requirements:\n"
        "1. Respond with only a single-digit (0 for not ironic, 1 for ironic).\n"
        "2. Do not provide any additional text or explanation.\n"
        "Respond with only '0' or '1':"
)

# Function to interact with the Ollama API
def get_prediction(text):
    response = ollama.generate(model='llama3', prompt=prompt)
    # Ensure the response is either '0' or '1'
    response_text = response['response'].strip()
    if '1' in response_text:
        return '1'
    else:
        return '0'
    
# Make predictions
predictions = []
for i, text in enumerate(texts):
    if i == 0 or i == 1 or i == 2:
        continue
    if i % 1000 == 0:
        print(f"Predicting tweet {i+1} out of {len(texts)}")
    prediction = get_prediction(text)
    predictions.append(prediction)

# Step 6: Evaluate Model
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(labels[3:], predictions)
print(f"Model Accuracy: {accuracy}")



Predicting tweet 101 out of 3817
Predicting tweet 201 out of 3817
Predicting tweet 301 out of 3817
Predicting tweet 401 out of 3817
Predicting tweet 501 out of 3817
Predicting tweet 601 out of 3817
Predicting tweet 701 out of 3817
Predicting tweet 801 out of 3817
Predicting tweet 901 out of 3817
Predicting tweet 1001 out of 3817
Predicting tweet 1101 out of 3817
Predicting tweet 1201 out of 3817
Predicting tweet 1301 out of 3817
Predicting tweet 1401 out of 3817
Predicting tweet 1501 out of 3817
Predicting tweet 1601 out of 3817
Predicting tweet 1701 out of 3817
Predicting tweet 1801 out of 3817
Predicting tweet 1901 out of 3817
Predicting tweet 2001 out of 3817
Predicting tweet 2101 out of 3817
Predicting tweet 2201 out of 3817
Predicting tweet 2301 out of 3817
Predicting tweet 2401 out of 3817
Predicting tweet 2501 out of 3817
Predicting tweet 2601 out of 3817
Predicting tweet 2701 out of 3817
Predicting tweet 2801 out of 3817
Predicting tweet 2901 out of 3817
Predicting tweet 3001 o