In [1]:
import pandas as pd
import re
from difflib import SequenceMatcher
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import spacy
from rapidfuzz import fuzz

In [2]:
# Load English NLP model
nlp = spacy.load("en_core_web_sm")

In [3]:
# ---------- Similarity Functions ----------
def jaccard_similarity(a, b):
    a_set, b_set = set(a.split()), set(b.split())
    if not a_set or not b_set:
        return 0
    return len(a_set & b_set) / len(a_set | b_set)

def cosine_sim(a, b):
    vectorizer = CountVectorizer().fit([a, b])
    vectors = vectorizer.transform([a, b])
    return cosine_similarity(vectors)[0][1]

In [None]:
# ---------- Main Rule-Based Scoring ----------
def classify_response(response, correct_answer):
    # Ensure both are strings
    response = str(response).strip().lower()
    correct_answer = str(correct_answer).strip().lower()

    if response == "" or response in ["idk", "i don't know", "i dunno"]:
        return -1

    fuzzy_ratio = fuzz.token_sort_ratio(response, correct_answer) / 100
    jaccard = jaccard_similarity(response, correct_answer)
    cosine = cosine_sim(response, correct_answer)

    try: 
        semantic = nlp(response).similarity(nlp(correct_answer)) 
    except Exception: 
        semantic = 0 # fallback

    resp_nums = re.findall(r"\d+", response)
    corr_nums = re.findall(r"\d+", correct_answer)
    num_match = 1 if resp_nums and resp_nums == corr_nums else 0

    # Weighted score
    total_score = (
        0.65 * fuzzy_ratio +
        0.2 * jaccard +
        # 0.15 * semantic +
        0.15 * cosine 
        # 0.05 * num_match
    )

    if total_score > 0.60:
        return 1
    elif total_score > 0.59:
        return 0
    else:
        return -1

In [5]:
df = pd.read_csv("train_cleaned.csv", encoding="latin1")

In [6]:
# Apply classification
df["Predicted_Label"] = df.apply(
    lambda row: classify_response(row["Response"], row["CorrectAnswer"]),
    axis=1
)

  semantic = nlp(response).similarity(nlp(correct_answer))


In [7]:
# ---------- Evaluation ----------
if "label" in df.columns:
    total_rows = len(df)
    correct_predictions = (df["Predicted_Label"] == df["label"]).sum()
    accuracy = correct_predictions / total_rows * 100

    print(f"Correct predictions: {correct_predictions}/{total_rows}")
    print(f"Accuracy: {accuracy:.2f}%")

    # --- Precision, Recall, F1 ---
    # Positive class = 1
    TP = ((df["Predicted_Label"] == 1) & (df["label"] == 1)).sum()
    FP = ((df["Predicted_Label"] == 1) & (df["label"] != 1)).sum()
    FN = ((df["Predicted_Label"] != 1) & (df["label"] == 1)).sum()

    precision = TP / (TP + FP) if (TP + FP) > 0 else 0
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0

    # F1-score = harmonic mean of precision and recall
    if precision + recall > 0:
        f1 = 2 * (precision * recall) / (precision + recall)
    else:
        f1 = 0

    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1 Score: {f1:.2f}")


Correct predictions: 1662/2250
Accuracy: 73.87%
Precision: 0.93
Recall: 0.60
F1 Score: 0.73


In [8]:
# Save to a new CSV
df.to_csv("insert_name.csv", index=False)
print("Results saved to 'insert_name.csv'.")

Results saved to 'insert_name.csv'.
