b77

In [2]:
import pandas as pd
import openai
import json
import re
from tqdm import tqdm
from typing import List, Dict
from collections import Counter

# Set OpenAI API Key
openai.api_key =  ""

# 1. Prompt Builder: Entity Extraction → Intent Reasoning
def build_prompt(user_input: str, topn_labels: List[str]) -> str:
    formatted_options = [label.replace("_", " ") for label in topn_labels]
    options_block = "\n".join(f"- {opt}" for opt in formatted_options)

    prompt = f"""Extract the key terms or entities in the sentence below that are important for intent understanding. 
Do not assign specific roles—just list the important words or phrases.

Then, based on these key terms, reason about which intent best matches the user query.

OPTIONS:
{options_block}

QUERY: {user_input}

Respond in the following format:
ENTITIES: ...
EXPLANATION: ...
LABEL: ... (must be in snake_case)
"""
    return prompt

# 2. Query OpenAI GPT model
def query_model(prompt: str) -> str:
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant for understanding and classifying intent."},
            {"role": "user", "content": prompt}
        ],
        temperature=0,
        max_tokens=500
    )
    return response.choices[0].message.content

# 3. Parse the GPT response
def parse_response(response: str) -> Dict[str, str]:
    try:
        entities = re.search(r"ENTITIES:\s*(.*?)(?:\n|EXPLANATION:)", response, re.IGNORECASE | re.DOTALL)
        explanation = re.search(r"EXPLANATION:\s*(.*?)(?:\n|LABEL:)", response, re.IGNORECASE | re.DOTALL)
        label = re.search(r"LABEL:\s*(.+)", response, re.IGNORECASE)

        return {
            "entities": entities.group(1).strip() if entities else "N/A",
            "explanation": explanation.group(1).strip() if explanation else "N/A",
            "predicted_label": label.group(1).strip() if label else "Unknown"
        }
    except Exception as e:
        print("⚠️ Parsing error:", e)
        return {"entities": "N/A", "explanation": "N/A", "predicted_label": "Unknown"}

# 4. Full Pipeline
def run_flat_entity_ablation(input_csv: str, output_json: str):
    df = pd.read_csv(input_csv)#.iloc[:10]
    results = []

    for _, row in tqdm(df.iterrows(), total=len(df)):
        text = row["text"]
        true_label = row["label"]
        top_k_predictions = eval(row["top_k_predictions"])  # Convert string to list

        prompt = build_prompt(text, top_k_predictions)
        response = query_model(prompt)
        parsed = parse_response(response)

        results.append({
            "id": int(row["id"]),
            "text": text,
            "true_label": true_label,
            "top_k_predictions": top_k_predictions,
            "entities": parsed["entities"],
            "explanation": parsed["explanation"],
            "predicted_label": parsed["predicted_label"]
        })

    # Save results
    with open(output_json, "w", encoding="utf-8") as f:
        json.dump(results, f, indent=2, ensure_ascii=False)
    print(f"✅ Results saved to {output_json}")
    return results

# 5. Accuracy Evaluation
def evaluate_accuracy(results: List[Dict]):
    total = len(results)
    correct = sum(1 for r in results if r["true_label"].strip() == r["predicted_label"].strip())
    accuracy = correct / total if total else 0
    print("\n📊 Accuracy:", f"{accuracy*100:.2f}%")

    mistakes = [r for r in results if r["true_label"].strip() != r["predicted_label"].strip()]
    if mistakes:
        with open("flat_entity_ablation_mistakes.json", "w", encoding="utf-8") as f:
            json.dump(mistakes, f, indent=2, ensure_ascii=False)
        print(f"🔍 Mistakes saved: {len(mistakes)}")

    return accuracy

# 6. Entry point
if __name__ == "__main__":
    input_csv = "/home/xrspace/Llama/Thesis/E2i/maccot_topk_formatted.csv"  # <--- Replace with your file path
    output_json = "b77_zs_ablition_flat_entity_gpt.json"
    
    results = run_flat_entity_ablation(input_csv, output_json)
    evaluate_accuracy(results)

100%|██████████| 308/308 [04:51<00:00,  1.06it/s]

✅ Results saved to b77_zs_ablition_flat_entity_gpt.json

📊 Accuracy: 45.78%
🔍 Mistakes saved: 167





fs b77 flat ner

In [4]:
import pandas as pd
import openai
import json
import re
from tqdm import tqdm
from typing import List, Dict
from collections import defaultdict

# Load few-shot support file (flat entity version)
with open("/home/xrspace/Llama/Thesis/E2i/ablition/ner_flat_b77_fs_sample.json", "r", encoding="utf-8") as f:
    FEWSHOT_DB = json.load(f)

# Organize few-shot examples by label
label_to_example = defaultdict(list)
for entry in FEWSHOT_DB:
    label_to_example[entry["true_label"]].append(entry)

# Set OpenAI API key
openai.api_key = "key"  # Replace with your actual key

# Build prompt using dynamic few-shot samples (flat entity style)
def build_prompt(user_input: str, topn_labels: List[str]) -> str:
    formatted_options = [label.replace("_", " ") for label in topn_labels]
    options_block = "\n".join(f"- {opt}" for opt in formatted_options)

    few_shots = []
    for label in topn_labels:
        if label in label_to_example:
            ex = label_to_example[label][0]
            few_shots.append(f"""Example:
Query: "{ex['text']}"
ENTITIES: {ex['entities']}
EXPLANATION: {ex['explanation']}
LABEL: {label}
---""")

    few_shot_block = "\n".join(few_shots)

    return f"""Extract the key terms or entities in the sentence below that are important for intent understanding. 
Do not assign specific roles—just list the important words or phrases.

Then, based on these key terms, reason about which intent best matches the user query.

{few_shot_block}

OPTIONS:
{options_block}

QUERY: {user_input}

Respond in the following format:
ENTITIES: ...
EXPLANATION: ...
LABEL: ... (must be in snake_case)
"""

# Parse response
def parse_response(response: str) -> Dict[str, str]:
    try:
        entities = re.search(r"ENTITIES:\s*(.*?)(?:\n|EXPLANATION:)", response, re.IGNORECASE | re.DOTALL)
        explanation = re.search(r"EXPLANATION:\s*(.*?)(?:\n|LABEL:)", response, re.IGNORECASE | re.DOTALL)
        label = re.search(r"LABEL:\s*(.+)", response, re.IGNORECASE)
        return {
            "entities": entities.group(1).strip() if entities else "N/A",
            "explanation": explanation.group(1).strip() if explanation else "N/A",
            "predicted_label": label.group(1).strip() if label else "Unknown"
        }
    except Exception as e:
        print("Parsing error:", e)
        return {"entities": "N/A", "explanation": "N/A", "predicted_label": "Unknown"}

# Query GPT model
def query_model(prompt: str) -> str:
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful AI assistant for intent classification."},
            {"role": "user", "content": prompt}
        ],
        temperature=0,
        max_tokens=600
    )
    return response.choices[0].message.content

# Run classification
def run_experiment(input_csv: str, output_json: str) -> List[Dict]:
    df = pd.read_csv(input_csv)#.iloc[:10]
    results = []
    for idx, row in tqdm(df.iterrows(), total=len(df)):
        text = row["text"]
        true_label = row["label"]
        topk_predictions = eval(row["top_k_predictions"])
        prompt = build_prompt(text, topk_predictions)
        response = query_model(prompt)
        parsed = parse_response(response)
        results.append({
            "id": int(row["id"]),
            "text": text,
            "true_label": true_label,
            "top_k_predictions": topk_predictions,
            "entities": parsed["entities"],
            "explanation": parsed["explanation"],
            "predicted_label": parsed["predicted_label"]
        })
    with open(output_json, "w", encoding="utf-8") as f:
        json.dump(results, f, indent=2, ensure_ascii=False)
    print(f"✅ Results saved to {output_json}")
    return results

# Accuracy check
def evaluate_accuracy(results: List[Dict], mistake_file: str = "b77_fs_ablition_mistake_flat_entity_gpt.json"):
    total = len(results)
    correct = sum(1 for r in results if r["true_label"].strip() == r["predicted_label"].strip())
    accuracy = correct / total if total else 0
    mistakes = [r for r in results if r["true_label"].strip() != r["predicted_label"].strip()]
    if mistakes:
        with open(mistake_file, "w", encoding="utf-8") as f:
            json.dump(mistakes, f, indent=2, ensure_ascii=False)
        print(f"🔍 Mistakes saved to: {mistake_file}")
    print(f"📊 Accuracy: {accuracy*100:.2f}%")
    return accuracy

# Main
if __name__ == "__main__":
    input_csv = "/home/xrspace/Llama/Thesis/E2i/maccot_topk_formatted.csv"  # Replace with your CSV path
    output_json = "b77_fs_ablition_predict_flat_entity_gpt.json"
    mistake_file = "b77_fs_ablition_mistake_flat_entity_gpt.json"
    results = run_experiment(input_csv, output_json)
    evaluate_accuracy(results, mistake_file)

  0%|          | 0/308 [00:00<?, ?it/s]

100%|██████████| 308/308 [15:34<00:00,  3.04s/it]   

✅ Results saved to b77_fs_ablition_predict_flat_entity_gpt.json
🔍 Mistakes saved to: b77_fs_ablition_mistake_flat_entity_gpt.json
📊 Accuracy: 51.62%





liu

In [14]:
import pandas as pd
import openai
import json
import re
from tqdm import tqdm
from typing import List, Dict
from collections import defaultdict

# Load few-shot support file (flat entity version)
with open("/home/xrspace/Llama/Thesis/E2i/ablition/liu_flat_fs_enttiy_sample.json", "r", encoding="utf-8") as f:
    FEWSHOT_DB = json.load(f)

# Organize few-shot examples by label
label_to_example = defaultdict(list)
for entry in FEWSHOT_DB:
    label_to_example[entry["true_label"]].append(entry)

# Set OpenAI API key
openai.api_key = "key"  # Replace with your actual key

# Build prompt using dynamic few-shot samples (flat entity style)
def build_prompt(user_input: str, topn_labels: List[str]) -> str:
    formatted_options = [label.replace("_", " ") for label in topn_labels]
    options_block = "\n".join(f"- {opt}" for opt in formatted_options)

    few_shots = []
    for label in topn_labels:
        if label in label_to_example:
            ex = label_to_example[label][0]
            few_shots.append(f"""Example:
Query: "{ex['text']}"
ENTITIES: {ex['entities']}
EXPLANATION: {ex['explanation']}
LABEL: {label}
---""")

    few_shot_block = "\n".join(few_shots)

    return f"""Extract the key terms or entities in the sentence below that are important for intent understanding. 
Do not assign specific roles—just list the important words or phrases.

Then, based on these key terms, reason about which intent best matches the user query.

{few_shot_block}

OPTIONS:
{options_block}

QUERY: {user_input}

Respond in the following format:
ENTITIES: ...
EXPLANATION: ...
LABEL: ... (must be in snake_case)
"""

# Parse response
def parse_response(response: str) -> Dict[str, str]:
    try:
        entities = re.search(r"ENTITIES:\s*(.*?)(?:\n|EXPLANATION:)", response, re.IGNORECASE | re.DOTALL)
        explanation = re.search(r"EXPLANATION:\s*(.*?)(?:\n|LABEL:)", response, re.IGNORECASE | re.DOTALL)
        label = re.search(r"LABEL:\s*(.+)", response, re.IGNORECASE)
        return {
            "entities": entities.group(1).strip() if entities else "N/A",
            "explanation": explanation.group(1).strip() if explanation else "N/A",
            "predicted_label": label.group(1).strip() if label else "Unknown"
        }
    except Exception as e:
        print("Parsing error:", e)
        return {"entities": "N/A", "explanation": "N/A", "predicted_label": "Unknown"}

# Query GPT model
def query_model(prompt: str) -> str:
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful AI assistant for intent classification."},
            {"role": "user", "content": prompt}
        ],
        temperature=0,
        max_tokens=600
    )
    return response.choices[0].message.content

# Run classification
def run_experiment(input_csv: str, output_json: str) -> List[Dict]:
    df = pd.read_csv(input_csv)#.iloc[:10]
    results = []
    for idx, row in tqdm(df.iterrows(), total=len(df)):
        text = row["text"]
        true_label = row["label"]
        topk_predictions = eval(row["top_k_predictions"])
        prompt = build_prompt(text, topk_predictions)
        response = query_model(prompt)
        parsed = parse_response(response)
        results.append({
            "id": int(row["id"]),
            "text": text,
            "true_label": true_label,
            "top_k_predictions": topk_predictions,
            "entities": parsed["entities"],
            "explanation": parsed["explanation"],
            "predicted_label": parsed["predicted_label"]
        })
    with open(output_json, "w", encoding="utf-8") as f:
        json.dump(results, f, indent=2, ensure_ascii=False)
    print(f"✅ Results saved to {output_json}")
    return results

# Accuracy check
def evaluate_accuracy(results: List[Dict], mistake_file: str = "liu_fs_ablition_mistake_flat_entity_gpt.json"):
    total = len(results)
    correct = sum(1 for r in results if r["true_label"].strip() == r["predicted_label"].strip())
    accuracy = correct / total if total else 0
    mistakes = [r for r in results if r["true_label"].strip() != r["predicted_label"].strip()]
    if mistakes:
        with open(mistake_file, "w", encoding="utf-8") as f:
            json.dump(mistakes, f, indent=2, ensure_ascii=False)
        print(f"🔍 Mistakes saved to: {mistake_file}")
    print(f"📊 Accuracy: {accuracy*100:.2f}%")
    return accuracy

# Main
if __name__ == "__main__":
    input_csv = "/home/xrspace/Llama/Thesis/E2i/maccot_topk_formatted_liu.csv"  # Replace with your CSV path
    output_json = "liu_fs_ablition_predict_flat_entity_gpt.json"
    mistake_file = "liu_fs_ablition_mistake_flat_entity_gpt.json"
    results = run_experiment(input_csv, output_json)
    evaluate_accuracy(results, mistake_file)

100%|██████████| 254/254 [03:36<00:00,  1.17it/s]

✅ Results saved to liu_fs_ablition_predict_flat_entity_gpt.json
🔍 Mistakes saved to: liu_fs_ablition_mistake_flat_entity_gpt.json
📊 Accuracy: 41.73%





clinc zs

In [10]:
import pandas as pd
import openai
import json
import re
from tqdm import tqdm
from typing import List, Dict
from collections import Counter

# Set OpenAI API Key
openai.api_key =  "key"

# 1. Prompt Builder: Entity Extraction → Intent Reasoning
def build_prompt(user_input: str, topn_labels: List[str]) -> str:
    formatted_options = [label.replace("_", " ") for label in topn_labels]
    options_block = "\n".join(f"- {opt}" for opt in formatted_options)

    prompt = f"""Extract the key terms or entities in the sentence below that are important for intent understanding. 
Do not assign specific roles—just list the important words or phrases.

Then, based on these key terms, reason about which intent best matches the user query.

OPTIONS:
{options_block}

QUERY: {user_input}

Respond in the following format:
ENTITIES: ...
EXPLANATION: ...
LABEL: ... (must be in snake_case)
"""
    return prompt

# 2. Query OpenAI GPT model
def query_model(prompt: str) -> str:
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant for understanding and classifying intent."},
            {"role": "user", "content": prompt}
        ],
        temperature=0,
        max_tokens=500
    )
    return response.choices[0].message.content

# 3. Parse the GPT response
def parse_response(response: str) -> Dict[str, str]:
    try:
        entities = re.search(r"ENTITIES:\s*(.*?)(?:\n|EXPLANATION:)", response, re.IGNORECASE | re.DOTALL)
        explanation = re.search(r"EXPLANATION:\s*(.*?)(?:\n|LABEL:)", response, re.IGNORECASE | re.DOTALL)
        label = re.search(r"LABEL:\s*(.+)", response, re.IGNORECASE)

        return {
            "entities": entities.group(1).strip() if entities else "N/A",
            "explanation": explanation.group(1).strip() if explanation else "N/A",
            "predicted_label": label.group(1).strip() if label else "Unknown"
        }
    except Exception as e:
        print("⚠️ Parsing error:", e)
        return {"entities": "N/A", "explanation": "N/A", "predicted_label": "Unknown"}

# 4. Full Pipeline
def run_flat_entity_ablation(input_csv: str, output_json: str):
    df = pd.read_csv(input_csv)#.iloc[:10]
    results = []

    for _, row in tqdm(df.iterrows(), total=len(df)):
        text = row["text"]
        true_label = row["label"]
        top_k_predictions = eval(row["top_k_predictions"])  # Convert string to list

        prompt = build_prompt(text, top_k_predictions)
        response = query_model(prompt)
        parsed = parse_response(response)

        results.append({
            "id": int(row["id"]),
            "text": text,
            "true_label": true_label,
            "top_k_predictions": top_k_predictions,
            "entities": parsed["entities"],
            "explanation": parsed["explanation"],
            "predicted_label": parsed["predicted_label"]
        })

    # Save results
    with open(output_json, "w", encoding="utf-8") as f:
        json.dump(results, f, indent=2, ensure_ascii=False)
    print(f"✅ Results saved to {output_json}")
    return results

# 5. Accuracy Evaluation
def evaluate_accuracy(results: List[Dict]):
    total = len(results)
    correct = sum(1 for r in results if r["true_label"].strip() == r["predicted_label"].strip())
    accuracy = correct / total if total else 0
    print("\n📊 Accuracy:", f"{accuracy*100:.2f}%")

    mistakes = [r for r in results if r["true_label"].strip() != r["predicted_label"].strip()]
    if mistakes:
        with open("clinc_zs_ablition_flat_entity_mistake_gpt.json", "w", encoding="utf-8") as f:
            json.dump(mistakes, f, indent=2, ensure_ascii=False)
        print(f"🔍 Mistakes saved: {len(mistakes)}")

    return accuracy

# 6. Entry point
if __name__ == "__main__":
    input_csv = "/home/xrspace/Llama/Thesis/E2i/maccot_topk_formatted_clinc.csv"  # <--- Replace with your file path
    output_json = "clinc_zs_ablition_flat_entity_prediction_gpt.json"
    
    results = run_flat_entity_ablation(input_csv, output_json)
    evaluate_accuracy(results)

100%|██████████| 225/225 [03:21<00:00,  1.12it/s]

✅ Results saved to clinc_zs_ablition_flat_entity_prediction_gpt.json

📊 Accuracy: 72.00%
🔍 Mistakes saved: 63





clinc fs NER

In [13]:
import pandas as pd
import openai
import json
import re
from tqdm import tqdm
from typing import List, Dict
from collections import defaultdict

# Load few-shot support file (flat entity version)
with open("/home/xrspace/Llama/Thesis/E2i/ablition/clinic_flat_fs_enttiy_sample.json", "r", encoding="utf-8") as f:
    FEWSHOT_DB = json.load(f)

# Organize few-shot examples by label
label_to_example = defaultdict(list)
for entry in FEWSHOT_DB:
    label_to_example[entry["true_label"]].append(entry)

# Set OpenAI API key
openai.api_key = "key"  # Replace with your actual key

# Build prompt using dynamic few-shot samples (flat entity style)
def build_prompt(user_input: str, topn_labels: List[str]) -> str:
    formatted_options = [label.replace("_", " ") for label in topn_labels]
    options_block = "\n".join(f"- {opt}" for opt in formatted_options)

    few_shots = []
    for label in topn_labels:
        if label in label_to_example:
            ex = label_to_example[label][0]
            few_shots.append(f"""Example:
Query: "{ex['text']}"
ENTITIES: {ex['entities']}
EXPLANATION: {ex['explanation']}
LABEL: {label}
---""")

    few_shot_block = "\n".join(few_shots)

    return f"""Extract the key terms or entities in the sentence below that are important for intent understanding. 
Do not assign specific roles—just list the important words or phrases.

Then, based on these key terms, reason about which intent best matches the user query.

{few_shot_block}

OPTIONS:
{options_block}

QUERY: {user_input}

Respond in the following format:
ENTITIES: ...
EXPLANATION: ...
LABEL: ... (must be in snake_case)
"""

# Parse response
def parse_response(response: str) -> Dict[str, str]:
    try:
        entities = re.search(r"ENTITIES:\s*(.*?)(?:\n|EXPLANATION:)", response, re.IGNORECASE | re.DOTALL)
        explanation = re.search(r"EXPLANATION:\s*(.*?)(?:\n|LABEL:)", response, re.IGNORECASE | re.DOTALL)
        label = re.search(r"LABEL:\s*(.+)", response, re.IGNORECASE)
        return {
            "entities": entities.group(1).strip() if entities else "N/A",
            "explanation": explanation.group(1).strip() if explanation else "N/A",
            "predicted_label": label.group(1).strip() if label else "Unknown"
        }
    except Exception as e:
        print("Parsing error:", e)
        return {"entities": "N/A", "explanation": "N/A", "predicted_label": "Unknown"}

# Query GPT model
def query_model(prompt: str) -> str:
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful AI assistant for intent classification."},
            {"role": "user", "content": prompt}
        ],
        temperature=0,
        max_tokens=600
    )
    return response.choices[0].message.content

# Run classification
def run_experiment(input_csv: str, output_json: str) -> List[Dict]:
    df = pd.read_csv(input_csv)#.iloc[:10]
    results = []
    for idx, row in tqdm(df.iterrows(), total=len(df)):
        text = row["text"]
        true_label = row["label"]
        topk_predictions = eval(row["top_k_predictions"])
        prompt = build_prompt(text, topk_predictions)
        response = query_model(prompt)
        parsed = parse_response(response)
        results.append({
            "id": int(row["id"]),
            "text": text,
            "true_label": true_label,
            "top_k_predictions": topk_predictions,
            "entities": parsed["entities"],
            "explanation": parsed["explanation"],
            "predicted_label": parsed["predicted_label"]
        })
    with open(output_json, "w", encoding="utf-8") as f:
        json.dump(results, f, indent=2, ensure_ascii=False)
    print(f"✅ Results saved to {output_json}")
    return results

# Accuracy check
def evaluate_accuracy(results: List[Dict], mistake_file: str = "clinc_fs_ablition_mistake_flat_entity_gpt.json"):
    total = len(results)
    correct = sum(1 for r in results if r["true_label"].strip() == r["predicted_label"].strip())
    accuracy = correct / total if total else 0
    mistakes = [r for r in results if r["true_label"].strip() != r["predicted_label"].strip()]
    if mistakes:
        with open(mistake_file, "w", encoding="utf-8") as f:
            json.dump(mistakes, f, indent=2, ensure_ascii=False)
        print(f"🔍 Mistakes saved to: {mistake_file}")
    print(f"📊 Accuracy: {accuracy*100:.2f}%")
    return accuracy

# Main
if __name__ == "__main__":
    input_csv = "/home/xrspace/Llama/Thesis/E2i/maccot_topk_formatted_clinc.csv"  # Replace with your CSV path
    output_json = "clinc_fs_ablition_predict_flat_entity_gpt.json"
    mistake_file = "clinc_fs_ablition_mistake_flat_entity_gpt.json"
    results = run_experiment(input_csv, output_json)
    evaluate_accuracy(results, mistake_file)

100%|██████████| 225/225 [03:12<00:00,  1.17it/s]

✅ Results saved to clinc_fs_ablition_predict_flat_entity_gpt.json
🔍 Mistakes saved to: clinc_fs_ablition_mistake_flat_entity_gpt.json
📊 Accuracy: 86.22%



