In [1]:
import pandas as pd
import re
from difflib import SequenceMatcher
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import spacy
from rapidfuzz import fuzz
import os
import numpy as np

In [2]:
# Load English NLP model
nlp = spacy.load("en_core_web_sm")

In [3]:
# ---------- Similarity Functions ----------
def jaccard_similarity(a, b):
    a_set, b_set = set(a.split()), set(b.split())
    if not a_set or not b_set:
        return 0
    return len(a_set & b_set) / len(a_set | b_set)

def cosine_sim(a, b):
    vectorizer = CountVectorizer().fit([a, b])
    vectors = vectorizer.transform([a, b])
    return cosine_similarity(vectors)[0][1]

In [4]:
# ---------- Main Rule-Based Scoring ----------
def classify_response(response, correct_answer, high_threshold, low_threshold):
    response = str(response).strip().lower()
    correct_answer = str(correct_answer).strip().lower()

    if response == "" or response in ["idk", "i don't know", "i dunno"]:
        return -1

    fuzzy_ratio = fuzz.token_sort_ratio(response, correct_answer) / 100
    jaccard = jaccard_similarity(response, correct_answer)
    cosine = cosine_sim(response, correct_answer)

    try: 
        semantic = nlp(response).similarity(nlp(correct_answer)) 
    except Exception: 
        semantic = 0 # fallback

    resp_nums = re.findall(r"\d+", response)
    corr_nums = re.findall(r"\d+", correct_answer)
    num_match = 1 if resp_nums and resp_nums == corr_nums else 0

    # Weighted total score
    total_score = (
        fuzzy_ratio
    )

    # Threshold-based classification
    if total_score > high_threshold:
        return 1
    elif total_score > low_threshold:
        return 0
    else:
        return -1


In [5]:
# ---------- Main Loop ----------
df = pd.read_csv("train_cleaned.csv", encoding="latin1")
results = []

for high_threshold in [round(x, 2) for x in np.arange(0.60, 0.651, 0.01)]:
    for low_threshold in [round(x, 2) for x in np.arange(0.55, high_threshold, 0.01)]:
        temp_df = df.copy()

        temp_df["Predicted_Label"] = temp_df.apply(
            lambda row: classify_response(row["Response"], row["CorrectAnswer"], high_threshold, low_threshold),
            axis=1
        )

        # Evaluate accuracy
        if "label" in temp_df.columns:
            total_rows = len(temp_df)
            correct_predictions = (temp_df["Predicted_Label"] == temp_df["label"]).sum()
            accuracy = correct_predictions / total_rows * 100
        else:
            accuracy = 0

        print(f"Tested: high={high_threshold}, low={low_threshold} ‚Üí Accuracy={accuracy:.2f}%")

        results.append({
            "result_1_limit": high_threshold,
            "result_0_limit": low_threshold,
            "accuracy": accuracy
        })

        temp_df.to_csv("train_responses.csv", index=False)

        # Delete temp file
        if os.path.exists("train_responses.csv"):
            os.remove("train_responses.csv")

  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.6, low=0.55 ‚Üí Accuracy=72.67%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.6, low=0.56 ‚Üí Accuracy=73.20%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.6, low=0.57 ‚Üí Accuracy=73.42%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.6, low=0.58 ‚Üí Accuracy=73.60%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.6, low=0.59 ‚Üí Accuracy=73.87%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.61, low=0.55 ‚Üí Accuracy=72.22%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.61, low=0.56 ‚Üí Accuracy=72.76%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.61, low=0.57 ‚Üí Accuracy=72.98%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.61, low=0.58 ‚Üí Accuracy=73.16%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.61, low=0.59 ‚Üí Accuracy=73.42%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.61, low=0.6 ‚Üí Accuracy=73.69%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.62, low=0.55 ‚Üí Accuracy=71.96%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.62, low=0.56 ‚Üí Accuracy=72.49%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.62, low=0.57 ‚Üí Accuracy=72.71%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.62, low=0.58 ‚Üí Accuracy=72.89%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.62, low=0.59 ‚Üí Accuracy=73.16%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.62, low=0.6 ‚Üí Accuracy=73.42%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.62, low=0.61 ‚Üí Accuracy=73.47%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.63, low=0.55 ‚Üí Accuracy=71.20%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.63, low=0.56 ‚Üí Accuracy=71.73%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.63, low=0.57 ‚Üí Accuracy=71.96%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.63, low=0.58 ‚Üí Accuracy=72.13%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.63, low=0.59 ‚Üí Accuracy=72.40%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.63, low=0.6 ‚Üí Accuracy=72.67%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.63, low=0.61 ‚Üí Accuracy=72.71%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.63, low=0.62 ‚Üí Accuracy=72.80%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.64, low=0.55 ‚Üí Accuracy=70.89%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.64, low=0.56 ‚Üí Accuracy=71.42%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.64, low=0.57 ‚Üí Accuracy=71.64%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.64, low=0.58 ‚Üí Accuracy=71.82%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.64, low=0.59 ‚Üí Accuracy=72.09%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.64, low=0.6 ‚Üí Accuracy=72.36%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.64, low=0.61 ‚Üí Accuracy=72.40%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.64, low=0.62 ‚Üí Accuracy=72.49%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.64, low=0.63 ‚Üí Accuracy=72.58%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.65, low=0.55 ‚Üí Accuracy=70.40%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.65, low=0.56 ‚Üí Accuracy=70.93%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.65, low=0.57 ‚Üí Accuracy=71.16%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.65, low=0.58 ‚Üí Accuracy=71.33%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.65, low=0.59 ‚Üí Accuracy=71.60%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.65, low=0.6 ‚Üí Accuracy=71.87%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.65, low=0.61 ‚Üí Accuracy=71.91%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.65, low=0.62 ‚Üí Accuracy=72.00%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.65, low=0.63 ‚Üí Accuracy=72.09%


  semantic = nlp(response).similarity(nlp(correct_answer))


Tested: high=0.65, low=0.64 ‚Üí Accuracy=72.27%


In [6]:
# ---------- Save Final Summary ----------
results_df = pd.DataFrame(results)
results_df.to_csv("threshold_results_train.csv", index=False)
print("\n‚úÖ All threshold combinations tested.")
print("üìÑ Results saved to 'threshold_results.csv'.")


‚úÖ All threshold combinations tested.
üìÑ Results saved to 'threshold_results.csv'.


In [7]:
# ---------- Print Best Thresholds ----------
best = results_df.loc[results_df['accuracy'].idxmax()]
print(f"\nüèÜ Best thresholds found:")
print(f"Result=1 limit: {best['result_1_limit']}")
print(f"Result=0 limit: {best['result_0_limit']}")
print(f"Accuracy: {best['accuracy']:.2f}%")


üèÜ Best thresholds found:
Result=1 limit: 0.6
Result=0 limit: 0.59
Accuracy: 73.87%
