In [56]:
from datasets import load_dataset
import ollama
dataset = load_dataset("ag_news")
texts = dataset["train"]["text"]
labels = dataset["train"]["label"]
mapping = {0: "World", 1: "Sports", 2: "Business", 3: "Sci/Tech"}
# shuffle the data
import random
random.seed(42)

shuffled_indices = list(range(len(texts)))
random.shuffle(shuffled_indices)

texts = [texts[i] for i in shuffled_indices]

labels = [labels[i] for i in shuffled_indices]
# create a balanced dataset of size 10000
from collections import Counter
import numpy as np

balanced_texts = []
balanced_labels = []
counter = Counter(labels)
samples_per_label = 2500
for i in range(4):
    label = i
    label_indices = np.where(np.array(labels) == label)[0]
    label_indices = label_indices[:samples_per_label]
    balanced_texts.extend([texts[i] for i in label_indices])
    balanced_labels.extend([labels[i] for i in label_indices])

# shuffle the balanced dataset
shuffled_indices = list(range(len(balanced_texts)))
random.shuffle(shuffled_indices)
balanced_texts = [balanced_texts[i] for i in shuffled_indices]
balanced_labels = [balanced_labels[i] for i in shuffled_indices]
texts = balanced_texts
labels = balanced_labels

# verify the balance
counter = Counter(balanced_labels)


In [106]:
print(texts[:3])
print(labels[:3])

['Hewlett-Packard takes a new approach to software The company extends a deal with open-source Java software maker JBoss in an effort to compete with IBM.', 'Hurricanes Likely To Cause Insurers #39; Rates To Rise MIAMI -- Four hurricanes in just six weeks are likely going to force Florida #39;s top two insurers to raise rates or reduce their business in the state.', 'Three pops and Thomson goes out Atlanta Braves starting pitcher John Thomson, center, is pulled from the game against the Houston Astros during the first inning of Game 3 of the National League Division Series in Houston, on Saturday.']
[3, 2, 1]


In [109]:
import ollama
from sklearn.metrics import accuracy_score
# Function to interact with the Ollama API
def chat_with_context(history):
    response = ollama.chat(model='llama3', messages=history)
    return response['message']['content']

# Initialize the chat history
chat_history = []
num_samples = 5

# Step 1: Define Task Description for AG News Dataset
task_description = "We need to predict the category of a given news article. The labels are 'World', 'Sports', 'Business', and 'Sci/Tech'."

generate_samples_request = (
    f"As an advanced language exposed to diverse datasets, you are expected to create {num_samples} samples for the task outlined below.\n"
    "Generate samples that are likely to be correctly classified as 'World', 'Sports', 'Business', or 'Sci/Tech', as well as samples that might challenge the classification accuracy according to the task instructions.\n\n"
    f"### Task Description:\n{task_description}\n\n"
    "### Requirements for Samples:\n"
    "1. Each sample must present a unique and intricate challenge.\n"
    "2. The complexity of the samples should be such that simply applying the given task instruction would likely lead to incorrect or incomplete results for some samples.\n"
    "3. The samples should cover a diverse range of scenarios within the scope of the task, avoiding repetition and predictability.\n"
    "4. Ensure that the samples, while challenging, remain realistic and pertinent to the task's context.\n"
    "Generate the samples keeping these requirements in mind.\n###"
    "Generate the samples keeping these enhanced requirements in mind.\n###"
)

chat_history.append({'role': 'user', 'content': task_description})
chat_history.append({'role': 'user', 'content': generate_samples_request})

samples_response = chat_with_context(chat_history)
chat_history.append({'role': 'assistant', 'content': samples_response})
print("Generated Samples:")
print(samples_response)

# Step 2: Analyze Samples with Chain of Thought
analyze_samples_request = (
    f"Based on the following samples: {samples_response}\n"
    "Think step by step and derive general principles for classifying news articles into categories 'World', 'Sports', 'Business', or 'Sci/Tech'.\n"
    "Avoid focusing on specific details of the provided samples. Instead, develop broader, example-agnostic guidelines that can be applied universally to classify any news article. Conclude your analysis with clear, concise bullet points outlining:\n"
    "- The general characteristics that typically define each category.\n"
    "- Common mistakes that might lead to misclassifications and how to avoid them.\n"
    "- Guidelines under which circumstances each label should be predicted.\n"
    "These principles should help in accurately predicting the category of a news article based on its content without additional context."
)

chat_history.append({'role': 'user', 'content': analyze_samples_request})

analysis_response = chat_with_context(chat_history)
chat_history.append({'role': 'assistant', 'content': analysis_response})
print("Analysis of Samples:")
print(analysis_response)

# Step 3: Generate Optimized Prompt
generate_prompt_request = (
    f"Based on the following analysis: {analysis_response}\nGenerate an optimized prompt for predicting "
    "whether a news article is 'World', 'Sports', 'Business', or 'Sci/Tech'.\n\n"
    "### Requirements for Optimized Prompt:\n"
    "1. The prompt must include a clear description of the task and the labels.\n"
    "2. It should provide a comprehensive criteria for classifying news articles as 'World', 'Sports', 'Business', or 'Sci/Tech' based on the analysis.\n"
    "3. The prompt must ensure that the model responds strictly with 'World', 'Sports', 'Business', or 'Sci/Tech'.\n"
    "4. The prompt should help the model avoid common pitfalls and misclassifications identified during the analysis.\n"
    "5. Ensure the language is unambiguous and tailored to maximize the model's prediction accuracy."
    "6. Encourage the model to think step by step."
    "Respond with no other explanation but only the content of the prompt that is ready for the model to predict\n"
    "Prompt:"
)

chat_history.append({'role': 'user', 'content': generate_prompt_request})

optimized_prompt_response = chat_with_context(chat_history)
chat_history.append({'role': 'assistant', 'content': optimized_prompt_response})
print("Optimized Prompt:")
print(optimized_prompt_response)

Generated Samples:
Here are five sample news articles that challenge the classification accuracy:

**Sample 1:**
Title: "NATO Leaders Meet to Discuss Global Security Threats"
Text: "Leaders from NATO member countries gathered in Brussels yesterday to discuss the growing threat of cyber attacks on global infrastructure. The meeting came after a series of high-profile hacks targeted major financial institutions and government agencies. While the focus was on cybersecurity, some delegates also touched on issues related to climate change and sustainable energy."

Category Challenge: This article blends international relations (World) with technology and security (Sci/Tech), making it difficult to categorize solely as one label.

**Sample 2:**
Title: "Serena Williams Dominates at Australian Open, Eyes Grand Slam Record"
Text: "Tennis superstar Serena Williams continued her remarkable comeback yesterday, defeating her opponent in straight sets to move closer to the record for most Grand Slam

In [118]:
# Function to predict the label of a given news article
def get_prediction(text):
    prompt = (
        "Follow the guidelines of the prompt step by step:\n"
        f"{optimized_prompt_response}\n\n"
        f"article:{text}\n"
        "### Requirements:\n"
        "1. Respond with only the label name ('World', 'Sports', 'Business', or 'Sci/Tech').\n"
        "2. Do not provide any additional text or explanation.\n"
        "Respond with only the label name:"
    )
    response = ollama.generate(model='llama3', prompt=prompt)

    # Extract the predicted label from the response
    prediction = response['response'].strip().replace("**", "").replace("'", "").replace('"', '')
    # if prediction is not valid, perform the prediction again
    if prediction not in ['World', 'Sports', 'Business', 'Sci/Tech']:
        prediction = get_prediction(text)
    # format the prediction so that if it keeps only 'World', 'Sports', 'Business', or 'Sci/Tech' as output
    return prediction

predictions = []
# Make predictions
for i, text in enumerate(texts):
    prediction = get_prediction(text)
    if i < 3:
        print(f"Predicted Label: {prediction}\n")
    if i % 1000 == 0:
        print(f"Predicted {i+1} samples out of {len(texts)}")
    if i == 10000:
        break
    predictions.append(prediction)

# Step 4: Evaluate the Model
true_labels = [mapping[label] for label in labels[:len(predictions)]]
accuracy = accuracy_score(true_labels[:len(predictions)], predictions)
print(f"Model Accuracy: {accuracy}")

Predicted Label: Business

Predicted 1 samples out of 10000
Predicted Label: World

Predicted Label: Sports

Predicted 1001 samples out of 10000
Predicted 2001 samples out of 10000
Predicted 3001 samples out of 10000
Predicted 4001 samples out of 10000
Predicted 5001 samples out of 10000
Predicted 6001 samples out of 10000
Predicted 7001 samples out of 10000
Predicted 8001 samples out of 10000
Predicted 9001 samples out of 10000
Model Accuracy: 0.7843


# zero shot COT Prompt

In [68]:
# Function to predict the label of a given news article
def get_prediction(text):
    prompt = (
        f"Predict the news article into one of following categories: ('World', 'Sports', 'Business', or 'Sci/Tech')\n\n"
        "Let's think step by step\n"
        f"article:{text}\n"
        "### Requirements:\n"
        "1. Respond with only the label name ('World', 'Sports', 'Business', or 'Sci/Tech').\n"
        "2. Do not provide any additional text or explanation.\n"
        "Respond with only the label name:"
    )
    response = ollama.generate(model='llama3', prompt=prompt)

    # Extract the predicted label from the response
    prediction = response['response'].strip().replace("**", "").replace("'", "").replace('"', '')
    if prediction not in ['World', 'Sports', 'Business', 'Sci/Tech']:
        prediction = "Invalid Prediction"
    
    # format the prediction so that if it keeps only 'World', 'Sports', 'Business', or 'Sci/Tech' as output
    return prediction

predictions = []
# Make predictions
for i, text in enumerate(texts):
    prediction = get_prediction(text)
    if i < 3:
        print(f"Predicted Label: {prediction}\n")
    if i % 1000 == 0:
        print(f"Predicted {i+1} samples out of {len(texts)}")
    if i == 10000:
        break
    predictions.append(prediction)

# Step 4: Evaluate the Model
true_labels = [mapping[label] for label in labels[:len(predictions)]]
accuracy = accuracy_score(true_labels, predictions)
print(f"Model Accuracy: {accuracy}")

Predicted Label: Business

Predicted 1 samples out of 10000
Predicted Label: Business

Predicted Label: Sports

Predicted 1001 samples out of 10000
Predicted 2001 samples out of 10000
Predicted 3001 samples out of 10000
Predicted 4001 samples out of 10000
Predicted 5001 samples out of 10000
Predicted 6001 samples out of 10000
Predicted 7001 samples out of 10000
Predicted 8001 samples out of 10000
Predicted 9001 samples out of 10000
Model Accuracy: 0.7592


# Few show Prompt

In [119]:
few_shot_texts = dataset["train"]["text"][:3]
few_shot_labels = dataset["train"]["label"][:3]

# Function to predict the label of a given news article
def get_prediction(text):
    prompt = (
        f"Here are few news articles and their categories:\n\n"
        f"1. {few_shot_texts[0]} - {mapping[few_shot_labels[0]]}\n"
        f"2. {few_shot_texts[1]} - {mapping[few_shot_labels[1]]}\n"
        f"3. {few_shot_texts[2]} - {mapping[few_shot_labels[2]]}\n\n"
        "Based on these examples, predict the category of the following news article into one of the following categories: ('World', 'Sports', 'Business', or 'Sci/Tech')\n\n"
        f"article:{text}\n"
        "### Requirements:\n"
        "1. Respond with only the label name ('World', 'Sports', 'Business', or 'Sci/Tech').\n"
        "2. Do not provide any additional text or explanation.\n"
        "Respond with only the label name:"
    )
    response = ollama.generate(model='llama3', prompt=prompt)

    # Extract the predicted label from the response
    prediction = response['response'].strip().replace("**", "").replace("'", "").replace('"', '')
    if prediction not in ['World', 'Sports', 'Business', 'Sci/Tech']:
        prediction = "Invalid Prediction"
    
    # format the prediction so that if it keeps only 'World', 'Sports', 'Business', or 'Sci/Tech' as output
    return prediction

predictions = []
# Make predictions
for i, text in enumerate(texts):
    prediction = get_prediction(text)
    if i < 3:
        print(f"Predicted Label: {prediction}\n")
    if i % 1000 == 0:
        print(f"Predicted {i+1} samples out of {len(texts)}")
    if i == 10000:
        break
    predictions.append(prediction)

# Step 4: Evaluate the Model
true_labels = [mapping[label] for label in labels[:len(predictions)]]
accuracy = accuracy_score(true_labels, predictions)
print(f"Model Accuracy: {accuracy}")

Predicted Label: Business

Predicted 1 samples out of 10000
Predicted Label: Business

Predicted Label: Sports

Predicted 1001 samples out of 10000
Predicted 2001 samples out of 10000
Predicted 3001 samples out of 10000
Predicted 4001 samples out of 10000
Predicted 5001 samples out of 10000
Predicted 6001 samples out of 10000
Predicted 7001 samples out of 10000
Predicted 8001 samples out of 10000
Predicted 9001 samples out of 10000
Model Accuracy: 0.7677


# Basic Prompt

In [70]:
# Function to predict the label of a given news article
def get_prediction(text):
    prompt = (
        f"Predict the news article into one of following categories: ('World', 'Sports', 'Business', or 'Sci/Tech')\n\n"
        f"article:{text}\n"
        "### Requirements:\n"
        "1. Respond with only the label name ('World', 'Sports', 'Business', or 'Sci/Tech').\n"
        "2. Do not provide any additional text or explanation.\n"
        "Respond with only the label name:"
    )
    response = ollama.generate(model='llama3', prompt=prompt)

    # Extract the predicted label from the response
    prediction = response['response'].strip().replace("**", "").replace("'", "").replace('"', '')
    if prediction not in ['World', 'Sports', 'Business', 'Sci/Tech']:
        prediction = "Invalid Prediction"
    
    # format the prediction so that if it keeps only 'World', 'Sports', 'Business', or 'Sci/Tech' as output
    return prediction

predictions = []
# Make predictions
for i, text in enumerate(texts):
    prediction = get_prediction(text)
    if i < 3:
        print(f"Predicted Label: {prediction}\n")
    if i % 1000 == 0:
        print(f"Predicted {i+1} samples out of {len(texts)}")
    if i == 10000:
        break
    predictions.append(prediction)

# Step 4: Evaluate the Model
true_labels = [mapping[label] for label in labels[:len(predictions)]]
accuracy = accuracy_score(true_labels[:len(predictions)], predictions)
print(f"Model Accuracy: {accuracy}")

Predicted Label: Business

Predicted 1 samples out of 10000
Predicted Label: Business

Predicted Label: Sports

Predicted 1001 samples out of 10000
Predicted 2001 samples out of 10000
Predicted 3001 samples out of 10000
Predicted 4001 samples out of 10000
Predicted 5001 samples out of 10000
Predicted 6001 samples out of 10000
Predicted 7001 samples out of 10000
Predicted 8001 samples out of 10000
Predicted 9001 samples out of 10000
Model Accuracy: 0.7646


# Two step answer retrival

In [42]:
import ollama
from sklearn.metrics import accuracy_score
# Function to interact with the Ollama API
def chat_with_context(history):
    response = ollama.chat(model='llama3', messages=history)
    return response['message']['content']

# Initialize the chat history
chat_history = []
num_samples = 5

# Step 1: Define Task Description for AG News Dataset
task_description = "We need to predict the category of a given news article. The labels are 'World', 'Sports', 'Business', and 'Sci/Tech'."
instruction = "Classify each news article as 'World', 'Sports', 'Business', or 'Sci/Tech'."

generate_samples_request = (
    f"As an advanced language model you should create {num_samples} samples for the task outlined below.\n"
    "Generate samples that are likely to be correctly classified as 'World', 'Sports', 'Business', or 'Sci/Tech' and samples that might be misclassified according to the task instructions.\n\n"
    f"### Task Description:\n{task_description}\n\n"
    f"### Task Instructions:\n{instruction}\n\n"
    "### Requirements for Samples:\n"
    "1. Each sample must present a unique and intricate challenge.\n"
    "2. The complexity of the samples should be such that simply applying the given task instruction would likely lead to incorrect or incomplete results for some samples.\n"
    "3. The samples should cover a diverse range of scenarios within the scope of the task, avoiding repetition and predictability.\n"
    "4. Ensure that the samples, while challenging, remain realistic and pertinent to the task's context.\n"
    "Generate the samples keeping these requirements in mind.\n###"
)

chat_history.append({'role': 'user', 'content': task_description})
chat_history.append({'role': 'user', 'content': generate_samples_request})

samples_response = chat_with_context(chat_history)
chat_history.append({'role': 'assistant', 'content': samples_response})
print("Generated Samples:")
print(samples_response)

# Step 2: Analyze Samples with Chain of Thought
analyze_samples_request = (
    f"Based on the following samples: {samples_response}\n"
    "Think step by step and derive general principles for classifying news articles into categories 'World', 'Sports', 'Business', or 'Sci/Tech'.\n"
    "Avoid focusing on specific details of the provided samples. Instead, develop broader, example-agnostic guidelines that can be applied universally to classify any news article. Conclude your analysis with clear, concise bullet points outlining:\n"
    "- The general characteristics that typically define each category.\n"
    "- Common mistakes that might lead to misclassifications and how to avoid them.\n"
    "- Guidelines under which circumstances each label should be predicted.\n"
    "These principles should help in accurately predicting the category of a news article based on its content without additional context."
)

chat_history.append({'role': 'user', 'content': analyze_samples_request})

analysis_response = chat_with_context(chat_history)
chat_history.append({'role': 'assistant', 'content': analysis_response})
print("Analysis of Samples:")
print(analysis_response)

# Step 3: Generate Optimized Prompt
generate_prompt_request = (
    f"Based on the following analysis: {analysis_response}\nGenerate an optimized prompt for predicting "
    "whether a news article is 'World', 'Sports', 'Business', or 'Sci/Tech'. Ensure the model responds only with 'World', 'Sports', 'Business', or 'Sci/Tech'.\n\n"
    "### Requirements for Optimized Prompt:\n"
    "1. The prompt must include a clear description of the task and the labels.\n"
    "2. It should provide a comprehensive criteria for classifying news articles as 'World', 'Sports', 'Business', or 'Sci/Tech' based on the analysis.\n"
    "3. The prompt should help the model avoid common pitfalls and misclassifications identified during the analysis.\n"
    "4. Ensure the language is unambiguous and tailored to maximize the model's prediction accuracy."
    "5. Encourage the model to think step by step."
    "Respond with no other explanation but only the content of the prompt that is ready for the model to predict\n"
    "Prompt:"
)

chat_history.append({'role': 'user', 'content': generate_prompt_request})

optimized_prompt_response = chat_with_context(chat_history)
chat_history.append({'role': 'assistant', 'content': optimized_prompt_response})
print("Optimized Prompt:")
print(optimized_prompt_response)

Generated Samples:
Here are five news article samples that present unique challenges for predicting their categories:

**Sample 1:**
Title: "New Study Reveals Impact of Climate Change on Global Food Supply"
Text: A recent study published in a leading scientific journal has found that climate change is having a significant impact on global food production, with some regions experiencing up to a 50% decline in crop yields. The findings suggest that urgent action is needed to mitigate the effects of climate change on agriculture.

Category Prediction Challenge: This sample challenges predictive models because it combines environmental science (Sci/Tech) with global implications (World), making it difficult to categorize solely as 'World' or 'Sci/Tech'.

**Sample 2:**
Title: "Lionel Messi Sets New Record for Most Goals Scored in International Competition"
Text: Argentine football star Lionel Messi has set a new record by scoring his 80th goal in international competition, surpassing the pr

In [44]:
# Function to predict the label of a given news article
def get_prediction(text):
    # First part: Get a detailed response following the general guidelines of the prompt
    initial_prompt = (
        "Follow the guidelines of the prompt:\n"
        f"{optimized_prompt_response}\n\n"
        f"Article: {text}\n"
        "### Initial Analysis:\n"
        "Provide your detailed analysis and suggest a category based on the content of the article."
    )
    
    initial_response = ollama.generate(model='llama3', prompt=initial_prompt)
    detailed_analysis = initial_response['response'].strip()

    # Second part: Narrow down to just the predicted label
    final_prompt = (
        "Based on the detailed analysis, respond with only the category name:\n"
        f"{detailed_analysis}\n"
        "### Requirements:\n"
        "1. Respond with only the label name ('World', 'Sports', 'Business', or 'Sci/Tech').\n"
        "2. Do not provide any additional text or explanation.\n"
        "Respond with only the label name:"
    )

    final_response = ollama.generate(model='llama3', prompt=final_prompt)
    prediction = final_response['response'].strip().replace("**", "").replace("'", "").replace('"', '')
    # if prediction is not valid, perform the prediction again
    if prediction not in ['World', 'Sports', 'Business', 'Sci/Tech']:
        prediction = get_prediction(text)
    # format the prediction so that if it keeps only 'World', 'Sports', 'Business', or 'Sci/Tech' as output
    return prediction
true_labels = [mapping[label] for label in labels]
predictions = []
# Make predictions
for i, text in enumerate(texts):
    prediction = get_prediction(text)
    predictions.append(prediction)
    if i < 3:
        print(f"Predicted Label: {prediction}\n")
    if i % 100  == 0:
        print(f"Predicted {i+1} samples out of {len(texts)}")
        acc = accuracy_score(true_labels[:len(predictions)], predictions)
        print(f"Accuracy after {i+1} samples: {acc}")
    if i == 10000:
        break

# Step 4: Evaluate the Model

accuracy = accuracy_score(true_labels[:len(predictions)], predictions)
print(f"Model Accuracy: {accuracy:.2f}")

Predicted Label: World

Predicted 1 samples out of 120000
Accuracy after 1 samples: 1.0
Predicted Label: Sci/Tech

Predicted Label: World

Predicted 101 samples out of 120000
Accuracy after 101 samples: 0.8217821782178217
Predicted 201 samples out of 120000
Accuracy after 201 samples: 0.8109452736318408
Predicted 301 samples out of 120000
Accuracy after 301 samples: 0.7940199335548173
Predicted 401 samples out of 120000
Accuracy after 401 samples: 0.770573566084788
Predicted 501 samples out of 120000
Accuracy after 501 samples: 0.7504990019960079
Predicted 601 samples out of 120000
Accuracy after 601 samples: 0.7554076539101497
Predicted 701 samples out of 120000
Accuracy after 701 samples: 0.7631954350927247
Predicted 801 samples out of 120000
Accuracy after 801 samples: 0.7715355805243446
Predicted 901 samples out of 120000
Accuracy after 901 samples: 0.7702552719200888
Predicted 1001 samples out of 120000
Accuracy after 1001 samples: 0.7752247752247752
Predicted 1101 samples out of 

KeyboardInterrupt: 