In [None]:
import pandas as pd
from sklearn.metrics import classification_report, precision_recall_fscore_support

#Config
input_path = "366_ARPs_for_extracting_Issue_Solution_Pairs.xlsx"
output_path = "DECA_PD_SP_results.xlsx"

# Keywords for heuristic rules
problem_keywords = [
    "What", "When", "Who", "Which", "How", "?", "I am trying to build", "I want to design",
    "How to architecture", "I am evaluating", "I am building", "The user should", "I need help",
    "I am developing", "Advise on", "I want to desing", "crash", "error", "bug", "problem",
    "issue", "wrong", "not working", "cannot", "unable"
]

solution_keywords = [
    "the best practice", "you should", "I am using", "you don't have to do", "In order to",
    "it is critical", "You should", "It is recommended", "A good approach is",
    "I suggest", "I propose", "fix", "I recommend", "refactor"
]

#Classification functions
def is_problem_discovery(sentence: str) -> bool:
    if not isinstance(sentence, str):
        return False
    sentence_lower = sentence.lower()
    return any(kw.lower() in sentence_lower for kw in problem_keywords)

def is_solution_proposal(sentence: str) -> bool:
    if not isinstance(sentence, str):
        return False
    sentence_lower = sentence.lower()
    return any(kw.lower() in sentence_lower for kw in solution_keywords)

def classify_sentence(sentence: str) -> str:
    if is_problem_discovery(sentence):
        return "Problem Discovery"
    elif is_solution_proposal(sentence):
        return "Solution Proposal"
    else:
        return "Other"

# Load Excel
data = pd.read_excel(input_path, sheet_name="Sheet1")

# Apply classification on question & answer bodies
data["Question_pred"] = data["Question_body_cleaned"].apply(classify_sentence)
data["Answer_pred"] = data["Answer_body_cleaned"].apply(classify_sentence)

# === Evaluation against gold labels ===
# Assuming gold labels exist in "Question_gold" and "Answer_gold"
if "Question_gold" in data.columns and "Answer_gold" in data.columns:
    # Combine predictions and gold labels for evaluation
    y_true = list(data["Question_gold"]) + list(data["Answer_gold"])
    y_pred = list(data["Question_pred"]) + list(data["Answer_pred"])

    print("\n=== Evaluation Report ===")
    print(classification_report(y_true, y_pred, digits=3))

    # Also export evaluation summary to Excel
    precision, recall, f1, support = precision_recall_fscore_support(
        y_true, y_pred, labels=["Problem Discovery", "Solution Proposal", "Other"], zero_division=0
    )

    eval_df = pd.DataFrame({
        "Label": ["Problem Discovery", "Solution Proposal", "Other"],
        "Precision": precision,
        "Recall": recall,
        "F1-score": f1,
        "Support": support
    })

    with pd.ExcelWriter(output_path, engine="openpyxl", mode="w") as writer:
        data.to_excel(writer, sheet_name="Predictions", index=False)
        eval_df.to_excel(writer, sheet_name="Evaluation", index=False)

    print(f"Saved predictions + evaluation results to {output_path}")
else:
    print( "Gold labels not found in the dataset. Skipping evaluation step.")



In [None]:
Mean Precision, Recall, F1 Scores for Questions:
Question_precision    0.682
Question_recall       0.556
Question_f1           0.540
dtype: float64

Mean Precision, Recall, F1 Scores for Answers:
Answer_precision    0.650
Answer_recall       0.611
Answer_f1           0.571
dtype: float64