In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import ast
import torch
from langdetect import detect
# from huggingface_hub import snapshot_download
# from transformers import AutoTokenizer, AutoModelForCausalLM
from googletrans import Translator
from thefuzz import fuzz

In [2]:
# More Accurate Translator (Recommended to use in the main version of this project)
# def load_translator_model():
#     model_id = "scb10x/Typhoon-translate-4b"
#     model_snap_shot = snapshot_download(model_id)
#     # load tokenizer and model from the local path
#     tokenizer = AutoTokenizer.from_pretrained(model_snap_shot)
#     model = AutoModelForCausalLM.from_pretrained(
#         model_snap_shot,
#         torch_dtype=torch.bfloat16,
#         device_map="auto",
#     )
#     return (tokenizer, model)

# def translate_eng_to_thai(tokenizer, model, text):
#     messages = [
#     {"role": "system", "content": "Translate the following text into Thai."},
#     {"role": "user", "content": f"{text}"},
#     ]

#     input_ids = tokenizer.apply_chat_template(
#         messages,
#         add_generation_prompt=True,
#         return_tensors="pt",
#     ).to(model.device)

#     outputs = model.generate(
#         input_ids,
#         max_new_tokens=8192,
#         temperature=0.2,
#     )
#     response = outputs[0][input_ids.shape[-1]:]
#     return tokenizer.decode(response, skip_special_tokens=True)

# (tokenizer, model) = load_translator_model()

## Data Preparation Steps (Load, Extract, Clean, Transform)

In [3]:
df = pd.read_excel('./Data/[CONFIDENTIAL] AI symptom picker data (Agnos candidate assignment).xlsx', sheet_name=['ai_symptom_picker'])
df = df['ai_symptom_picker']
df

Unnamed: 0,gender,age,summary,search_term
0,male,28,"{""diseases"": [], ""procedures"": [], ""no_symptom...","มีเสมหะ, ไอ"
1,male,27,"{""diseases"": [], ""procedures"": [], ""no_symptom...","ไอ, น้ำมูกไหล"
2,female,26,"{""diseases"": [], ""procedures"": [], ""no_symptom...",ปวดท้อง
3,male,42,"{""diseases"": [], ""procedures"": [], ""no_symptom...",น้ำมูกไหล
4,female,40,"{""diseases"": [], ""procedures"": [], ""no_symptom...",ตาแห้ง
...,...,...,...,...
995,male,38,"{""diseases"": [], ""procedures"": [], ""no_symptom...","บวม, ปวดข้อ"
996,male,33,"{""diseases"": [], ""procedures"": [], ""no_symptom...",เจ็บคอ
997,male,45,"{""diseases"": [], ""procedures"": [], ""no_symptom...",ไข้
998,female,73,"{""diseases"": [], ""procedures"": [], ""no_symptom...",ไข้


In [4]:
def load_translator_model():
    return Translator()

def translate_eng_to_thai(translator, text):
    return translator.translate(text, src='en', dest='th')

def extracted_relevant_symptoms(translator, summary_dict):
    summary_dict = ast.literal_eval(summary_dict)
    yes_symptoms = summary_dict['yes_symptoms']
    # print(yes_symptoms)
    yes_symptoms_list = []
    lang_flag = 0
    for i in yes_symptoms:
        if detect(i['text']) != 'th':
            # i['text'] = translate_eng_to_thai(translator, i['text'])
            lang_flag = 1
            break
        if i['text'] == 'การรักษาก่อนหน้า' or ('treatment' in i['text']) or ('History' in i['text']) or ('ประวัติ' in i['text']):
            continue
        yes_symptoms_list.append(i['text'])
    
    if lang_flag == 1:
        eng_symptoms_list = []
        translated_symptoms_list = []
        for j in yes_symptoms:
            if j['text'] == 'การรักษาก่อนหน้า' or ('treatment' in j['text']) or ('History' in j['text']) or ('ประวัติ' in j['text']):
                continue
            eng_symptoms_list.append(j['text'])
        translated_symptoms_list = translate_eng_to_thai(translator, eng_symptoms_list)
        translated_symptoms_list = [k.text for k in translated_symptoms_list]
        # print(translated_symptoms_list)
        return translated_symptoms_list
            
    return yes_symptoms_list

    
def extracted_symptoms_with_description(): # Phase 2
    pass

translator = load_translator_model()
df['yes_symptoms'] = np.vectorize(extracted_relevant_symptoms, otypes=['object'])(translator, df['summary'])

In [5]:
def formatted_search_term(translator, search_term):
    if detect(search_term) != 'th':
        search_term = translate_eng_to_thai(translator, search_term).text
    search_term_list = [s.strip() for s in search_term.split(",") if s.strip()]
    return search_term_list

df['search_term'] = np.vectorize(formatted_search_term, otypes=['object'])(translator, df['search_term'])

In [6]:
df

Unnamed: 0,gender,age,summary,search_term,yes_symptoms
0,male,28,"{""diseases"": [], ""procedures"": [], ""no_symptom...","[มีเสมหะ, ไอ]","[เสมหะ, ไอ]"
1,male,27,"{""diseases"": [], ""procedures"": [], ""no_symptom...","[ไอ, น้ำมูกไหล]","[ไอ, น้ำมูกไหล]"
2,female,26,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ปวดท้อง],[ปวดท้อง]
3,male,42,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[น้ำมูกไหล],[น้ำมูกไหล]
4,female,40,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ตาแห้ง],[ตาแห้ง]
...,...,...,...,...,...
995,male,38,"{""diseases"": [], ""procedures"": [], ""no_symptom...","[บวม, ปวดข้อ]","[บวมตามร่างกาย, ปวดข้อ, ปวดขา, ปวดขา]"
996,male,33,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[เจ็บคอ],[เจ็บคอ]
997,male,45,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ไข้],[เป็นไข้]
998,female,73,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ไข้],[เป็นไข้]


In [7]:
def build_symptom_vocab(yes_symptom):
    normalised_list = []
    for i in yes_symptom:
        if ('อาการ' in i):
            i = i[5:]
        if (i.startswith("การ")):
            i = i[3:]
        if i == 'ไม่คงที่การสูญเสียความสมดุล':
            i = 'เดินเซ ทรงตัวไม่ได้'
        if i == 'ไข้':
            i = 'เป็นไข้'
        if i not in symptom_vocab:
            symptom_vocab.add(i)
        normalised_list.append(i)
    return normalised_list

symptom_vocab = set()
df['yes_symptoms'] = np.vectorize(build_symptom_vocab, otypes=['object'])(df['yes_symptoms'])
# symptom_vocab

In [8]:
def build_symptom_vocab_and_alias(symptom_vocab): # We can add more aliases for the official symptoms here. This is only the example of how we can scale up our application to make it more robust.
    symptom_vocab_alias = dict()
    for i in symptom_vocab:
        symptom_tmp_list = []
        symptom_tmp_list.append(i)
        if i == 'เจ็บคอ': 
            symptom_tmp_list.append('คันคอ')
        if i == 'ท้องเสีย':
            symptom_tmp_list.append('ถ่ายเหลว')
        symptom_vocab_alias[i] = symptom_tmp_list
    return symptom_vocab_alias

symptom_vocab_with_aliases = build_symptom_vocab_and_alias(symptom_vocab)
# symptom_vocab_with_aliases

## Recommender Model

In [9]:
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics.pairwise import cosine_similarity

def cosine_similarity_model(df):
    relevant_symptoms = df['yes_symptoms']

    mlb = MultiLabelBinarizer()
    patient_symptom_matrix = pd.DataFrame(
        mlb.fit_transform(relevant_symptoms),
        columns=mlb.classes_
    )

    symptom_similarity_matrix = cosine_similarity(patient_symptom_matrix.T)

    symptom_similarity_df = pd.DataFrame(
        symptom_similarity_matrix,
        index=patient_symptom_matrix.columns,
        columns=patient_symptom_matrix.columns
    )

    return symptom_similarity_df

## Full Pipeline of the Recommeder System (NLU + Recommender Model)

In [10]:
def boosting_examples():
    # Gender Boosting
    print('Gender Boosting Example, The Gender Boosting value that we get from for this specific symptom(ปวดท้อง)')
    display(df.loc[df['summary'].str.contains('ปวดท้อง')].value_counts(subset=['gender']))
    
    # Age Boosting
    print('Age Boosting Example, The Age Boosting value that we get from for this specific symptom(ปวดหลัง)')
    display(df.loc[(df['summary'].str.contains('ปวดหลัง'))])
    display(df.loc[(df['summary'].str.contains('ปวดหลัง')) & (df['age'] > 40)])

boosting_examples()

Gender Boosting Example, The Gender Boosting value that we get from for this specific symptom(ปวดท้อง)


gender
female    50
male      20
Name: count, dtype: int64

Age Boosting Example, The Age Boosting value that we get from for this specific symptom(ปวดหลัง)


Unnamed: 0,gender,age,summary,search_term,yes_symptoms
27,female,44,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ปวดหลัง],[ปวดหลัง]
43,female,38,"{""diseases"": [], ""procedures"": [], ""no_symptom...","[ปวดบั้นเอว, ปวดหลัง]","[ปวดเอว, ปวดหลัง]"
99,male,26,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ปวดหลัง],[ปวดหลัง]
157,male,34,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ปวดหลัง],[ปวดหลัง]
274,female,47,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ปวดหลัง],[ปวดหลัง]
489,female,43,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ปวดหลัง],[ปวดหลัง]
572,male,47,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ปวดหลัง],[ปวดหลัง]
646,female,55,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ปวดหลัง],[ปวดหลัง]
715,female,25,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ปวดหลัง],[ปวดหลัง]
855,female,60,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ปวดหลัง],[ปวดหลัง]


Unnamed: 0,gender,age,summary,search_term,yes_symptoms
27,female,44,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ปวดหลัง],[ปวดหลัง]
274,female,47,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ปวดหลัง],[ปวดหลัง]
489,female,43,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ปวดหลัง],[ปวดหลัง]
572,male,47,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ปวดหลัง],[ปวดหลัง]
646,female,55,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ปวดหลัง],[ปวดหลัง]
855,female,60,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ปวดหลัง],[ปวดหลัง]
900,male,60,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ปวดหลัง],[ปวดหลัง]
908,female,42,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ปวดหลัง],[ปวดหลัง]


In [11]:
ALIAS_TO_OFFICIAL_MAP = {
    alias.lower(): official_symptom
    for official_symptom, aliases in symptom_vocab_with_aliases.items()
    for alias in aliases
}

# We can add more Post-Filtering rules here in the main project.
GENDER_RULES = {
    "male": ["ปวดประจำเดือน"],
    "female": ["อัณฑะ"]
}

GENDER_BOOST_RULES = {
    "ปวดท้อง": {'gender': 'female', "boost_factor": 2.5},
}

AGE_BOOST_RULES = {
    "ปวดหลัง": {"min_age": 40, "boost_factor": round(14 / 8, 1)},
}

def extract_symptoms_from_search_terms(raw_text, knowledge_base, confidence_threshold=70):
    if not isinstance(raw_text, str):
        return []
    
    clean_text = raw_text.strip().lower()
    raw_text_split = raw_text.split(', ')
    for i in raw_text_split:
        if i in ALIAS_TO_OFFICIAL_MAP:
            return [ALIAS_TO_OFFICIAL_MAP[i]]

    found_symptoms = set()
    for official_symptom, aliases in knowledge_base.items():
        sorted_aliases = sorted(aliases, key=len, reverse=True)
        for alias in sorted_aliases:
            score = fuzz.partial_ratio(alias.lower(), clean_text)
            if score >= confidence_threshold:
                found_symptoms.add(official_symptom)
                break
    found_symptoms = sorted(found_symptoms, reverse=True)
    return list(found_symptoms)


def filter_and_rerank(recommendations, user): 
    # We have two approaches, Pre-Filter (find a small group of patients who are very similar to our current user (e.g., females aged 30-40)) and Post-Filter. 
    # In this case, I decided to use Post-Filter because we have small-sized datasets/samples.
    final_recs = {}
    for symptom, score in recommendations.items():
        
        # Gender Filtering
        symptoms_to_remove = GENDER_RULES.get(user["gender"], [])
        if symptom in symptoms_to_remove:
            continue
        
        # Gender Boosting
        if symptom in GENDER_BOOST_RULES:
            rule = GENDER_BOOST_RULES[symptom]
            if user["gender"] == rule.get("gender"):
                score *= rule.get("boost_factor")
        
            
        # Age Boosting
        if symptom in AGE_BOOST_RULES:
            rule = AGE_BOOST_RULES[symptom]
            if user["age"] >= rule.get("min_age"):
                score *= rule.get("boost_factor")
        
        final_recs[symptom] = score
        
    sorted_recs = dict(sorted(final_recs.items(), key=lambda item: item[1], reverse=True))
    return sorted_recs

def get_symtom_recommendations(search_term, similarity_df, symptom_vocab_with_aliases, age=None, gender=None, top_n=5):
    # Normalised user_search_term to Official Symptoms
    user_search_term_sypmtom = extract_symptoms_from_search_terms(search_term, symptom_vocab_with_aliases)
    print(f"1. NLU Extraction: '{search_term}' -> {user_search_term_sypmtom}")
    
    # Recommend Next Official Symptoms to user
    final_scores = {}
    for symptom in user_search_term_sypmtom:
        if symptom in similarity_df.columns:
            similar_scores = similarity_df[symptom]
            for rec_symptom, score in similar_scores.items():
                if rec_symptom not in user_search_term_sypmtom:
                    final_scores[rec_symptom] = final_scores.get(rec_symptom, 0) + score
    
    # Average the scores
    for symptom in final_scores:
        final_scores[symptom] /= len(user_search_term_sypmtom)
        
    candidate_recs = dict(sorted(final_scores.items(), key=lambda item: item[1], reverse=True))
    print(f"2. Next Symptoms Generation -> {list(candidate_recs.keys())[:top_n]}")

    # User's Personalised Next Official Symptoms
    if (age != None) and (gender != None):
        user_profile = {"age": age, "gender": gender}
        personalized_recs = filter_and_rerank(candidate_recs, user_profile)
        print(f"3. Personalisation -> {list(personalized_recs.keys())[:top_n]}")
        return {
            'initial_user_search_term': search_term,
            'official_user_symptom_from_search_term': user_search_term_sypmtom,
            'next_symptom_recommendations': list(personalized_recs.keys())[:top_n]
        }
    
    return {
        'initial_user_search_term': search_term,
        'official_user_symptom_from_search_term': user_search_term_sypmtom,
        'next_symptom_recommendations': list(candidate_recs.keys())[:top_n]
    }
    

# ---- Main ----
similarity_df = cosine_similarity_model(df)
recommendations = get_symtom_recommendations('จุกแน่น แน่นท้อง', similarity_df, symptom_vocab_with_aliases)
recommendations = get_symtom_recommendations('ปวดหัว', similarity_df, symptom_vocab_with_aliases)
recommendations = get_symtom_recommendations('ไอ, มีเสมหะน้ำมูกไหล', similarity_df, symptom_vocab_with_aliases)
recommendations = get_symtom_recommendations('ไอ, น้ำมูกไหล', similarity_df, symptom_vocab_with_aliases)
recommendations = get_symtom_recommendations('ไอ', similarity_df, symptom_vocab_with_aliases)
recommendations = get_symtom_recommendations('คันคอ', similarity_df, symptom_vocab_with_aliases)
recommendations = get_symtom_recommendations('คันคอ, เจ็บคอ', similarity_df, symptom_vocab_with_aliases)
recommendations = get_symtom_recommendations('ปวดท้อง', similarity_df, symptom_vocab_with_aliases)
recommendations = get_symtom_recommendations('ท้องอืด', similarity_df, symptom_vocab_with_aliases)
recommendations = get_symtom_recommendations('ท้องอืด', similarity_df, symptom_vocab_with_aliases, gender='female', age=47)
recommendations = get_symtom_recommendations('ท้องอืด', similarity_df, symptom_vocab_with_aliases, gender='male', age=47)
recommendations = get_symtom_recommendations('มีเสมหะน้ำมูกไหล', similarity_df, symptom_vocab_with_aliases)

# recommendations

1. NLU Extraction: 'จุกแน่น แน่นท้อง' -> ['ปวดท้อง', 'ท้องอืด', 'ท้องผูก']
2. Next Symptoms Generation -> ['คลื่นไส้', 'อาเจียน', 'ถ่ายเป็นเลือด', 'ท้องเสีย', 'ปวดท้องน้อย']
1. NLU Extraction: 'ปวดหัว' -> ['ปวดหัว']
2. Next Symptoms Generation -> ['ปวดบริเวณใบหน้า', 'ชา', 'เดินเซ ทรงตัวไม่ได้', 'เป็นไข้', 'เวียนศีรษะ บ้านหมุน']
1. NLU Extraction: 'ไอ, มีเสมหะน้ำมูกไหล' -> ['ไอ']
2. Next Symptoms Generation -> ['เสมหะ', 'น้ำมูกไหล', 'เจ็บคอ', 'จมูกน้ำมูกไหล', 'เสียงฮืด ๆ']
1. NLU Extraction: 'ไอ, น้ำมูกไหล' -> ['ไอ']
2. Next Symptoms Generation -> ['เสมหะ', 'น้ำมูกไหล', 'เจ็บคอ', 'จมูกน้ำมูกไหล', 'เสียงฮืด ๆ']
1. NLU Extraction: 'ไอ' -> ['ไอ']
2. Next Symptoms Generation -> ['เสมหะ', 'น้ำมูกไหล', 'เจ็บคอ', 'จมูกน้ำมูกไหล', 'เสียงฮืด ๆ']
1. NLU Extraction: 'คันคอ' -> ['เจ็บคอ']
2. Next Symptoms Generation -> ['เสมหะ', 'ไอ', 'เป็นไข้', 'คำพูดที่เบลอ', 'ปวดหู']
1. NLU Extraction: 'คันคอ, เจ็บคอ' -> ['เจ็บคอ']
2. Next Symptoms Generation -> ['เสมหะ', 'ไอ', 'เป็นไข้', 'คำพูดที่เบลอ', 'ปวดหู'

In [None]:
## Save model's artefacts to make it reproducable when creating the API Interface for developers.
import json

all_configs = {
    "symptom_knowledge_base": symptom_vocab_with_aliases,
    "alias_to_official": ALIAS_TO_OFFICIAL_MAP,
    "gender_rules": GENDER_RULES,
    "gender_boost_rules": GENDER_BOOST_RULES,
    "age_boost_rules": AGE_BOOST_RULES
}

similarity_df.to_csv('./Model_Artefacts/symptom_similarity_model.csv')
with open("./Model_Artefacts/model_configs.json", "w", encoding="utf-8") as file:
    json.dump(all_configs, file, ensure_ascii=False, indent=4)


## Recommender Model Evaluation

In [16]:
from sklearn.model_selection import train_test_split

    
def get_symtom_without_print_recommendations(search_term, similarity_df, symptom_vocab_with_aliases, age=None, gender=None, top_n=5):
    # Normalised user_search_term to Official Symptoms
    user_search_term_sypmtom = extract_symptoms_from_search_terms(search_term, symptom_vocab_with_aliases)
    # print(f"1. NLU Extraction: '{search_term}' -> {user_search_term_sypmtom}")
    
    # Recommend Next Official Symptoms to user
    final_scores = {}
    for symptom in user_search_term_sypmtom:
        if symptom in similarity_df.columns:
            similar_scores = similarity_df[symptom]
            for rec_symptom, score in similar_scores.items():
                if rec_symptom not in user_search_term_sypmtom:
                    final_scores[rec_symptom] = final_scores.get(rec_symptom, 0) + score
    
    # Average the scores
    for symptom in final_scores:
        final_scores[symptom] /= len(user_search_term_sypmtom)
        
    candidate_recs = dict(sorted(final_scores.items(), key=lambda item: item[1], reverse=True))
    # print(f"2. Next Symptoms Generation -> {list(candidate_recs.keys())[:top_n]}")

    # User's Personalised Next Official Symptoms
    if (age != None) and (gender != None):
        user_profile = {"age": age, "gender": gender}
        personalized_recs = filter_and_rerank(candidate_recs, user_profile)
        # print(f"3. Personalisation -> {list(personalized_recs.keys())[:top_n]}")
        return {
            'initial_user_search_term': search_term,
            'official_user_symptom_from_search_term': user_search_term_sypmtom,
            'next_symptom_recommendations': list(personalized_recs.keys())[:top_n]
        }
    
    return {
        'initial_user_search_term': search_term,
        'official_user_symptom_from_search_term': user_search_term_sypmtom,
        'next_symptom_recommendations': list(candidate_recs.keys())[:top_n]
    }



def evaluate_recommender_system(all_yes_symptoms, symptom_vocab_with_aliases, k=5):
    
    # Split the entire dataset into training and testing sets of patients
    train_yes_symptoms, test_yes_symptoms = train_test_split(all_yes_symptoms, test_size=0.2, random_state=42)

    # Train the similarity model ONLY on the training data
    print(f"Training model on {len(train_yes_symptoms)} patients.")
    mlb = MultiLabelBinarizer()
    train_matrix = mlb.fit_transform(train_yes_symptoms)
    
    # Ensure all symptoms from the test set are in the model's columns, even if with all zeros
    all_symptoms = set(s for sublist in all_yes_symptoms for s in sublist)
    mlb.classes_ = np.array(sorted(list(all_symptoms)))
    
    # Re-transform with the full symptom vocabulary
    train_matrix = mlb.fit_transform(train_yes_symptoms)
    train_df = pd.DataFrame(train_matrix, columns=mlb.classes_)

    similarity_matrix_train_only = cosine_similarity(train_df.T)
    similarity_df_train_only = pd.DataFrame(
        similarity_matrix_train_only,
        index=train_df.columns,
        columns=train_df.columns
    )
    print("Model training complete.\n")

    # Evaluate ONLY on the test data
    print(f"Evaluating model on {len(test_yes_symptoms)} unseen patients.")
    all_hits = []
    all_precisions = []
    all_recalls = []
    mrr = []
    all_test_set_patients = 0

    for symptom_list in test_yes_symptoms:
        if len(symptom_list) < 2: # We can only evaluate if there are enough symptoms to split
            continue
        
        input_set, hold_out_set = train_test_split(symptom_list, test_size=0.5, random_state=42) # Split the data into what the model sees (input) and what it should predict (hold-out)
        if not input_set or not hold_out_set: 
            continue
        
        all_test_set_patients += 1
        input_set = ', '.join(input_set)
        # print(input_set)
        # print(hold_out_set)
        # Use the model trained ONLY on training data to make predictions (similarity_df_train_only)
        recommendations = get_symtom_without_print_recommendations(input_set, similarity_df_train_only, symptom_vocab_with_aliases, top_n=k)['next_symptom_recommendations']

        # Compare recommendations to the hold-out set to find hits
        hits = len(set(recommendations) & set(hold_out_set))
        all_hits.append(hits)
        all_precisions.append(hits / k)
        all_recalls.append(hits / len(hold_out_set))
        try:
            mrr.append(1 / (recommendations.index(hold_out_set[0]) + 1))
        except:
            mrr.append(0)

    # Calculate average metrics across all patients
    hit_rate = np.mean([1 if h > 0 else 0 for h in all_hits])
    mrr_rate = np.mean(mrr)
    avg_recall = np.mean(all_recalls) if all_recalls else 0
    
    return {"hit_rate_at_k": hit_rate , "recall_at_k": avg_recall, 'mrr_rate': mrr_rate}


# --- Main Evaluation ---
K_VALUE = 5
results = evaluate_recommender_system(df['yes_symptoms'], symptom_vocab_with_aliases, k=K_VALUE)

print(f"\n--- Evaluation Results (Top {K_VALUE} Recommendations) ---")
print(f"Hit Rate@{K_VALUE}: {results['hit_rate_at_k']:.4f}")
print(f"Average Recall@{K_VALUE}: {results['recall_at_k']:.4f}")
print(f"Mean Reciprocal Rate(MRR)@{K_VALUE}: {results['mrr_rate']:.4f}") # Rank of first relevant item -> In this case, it means that 

Training model on 800 patients.
Model training complete.

Evaluating model on 200 unseen patients.

--- Evaluation Results (Top 5 Recommendations) ---
Hit Rate@5: 0.7778
Average Recall@5: 0.7611
Mean Reciprocal Rate(MRR)@5: 0.5009


## Evaluate on the Full Pipeline (NLU + Recommender System)

In [18]:
df

Unnamed: 0,gender,age,summary,search_term,yes_symptoms
0,male,28,"{""diseases"": [], ""procedures"": [], ""no_symptom...","[มีเสมหะ, ไอ]","[เสมหะ, ไอ]"
1,male,27,"{""diseases"": [], ""procedures"": [], ""no_symptom...","[ไอ, น้ำมูกไหล]","[ไอ, น้ำมูกไหล]"
2,female,26,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ปวดท้อง],[ปวดท้อง]
3,male,42,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[น้ำมูกไหล],[น้ำมูกไหล]
4,female,40,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ตาแห้ง],[ตาแห้ง]
...,...,...,...,...,...
995,male,38,"{""diseases"": [], ""procedures"": [], ""no_symptom...","[บวม, ปวดข้อ]","[บวมตามร่างกาย, ปวดข้อ, ปวดขา, ปวดขา]"
996,male,33,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[เจ็บคอ],[เจ็บคอ]
997,male,45,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ไข้],[เป็นไข้]
998,female,73,"{""diseases"": [], ""procedures"": [], ""no_symptom...",[ไข้],[เป็นไข้]


In [17]:
def evaluate_full_recommender_system(full_df, symptom_vocab_with_aliases, k=5):
    # display(full_df)
    # Split the entire dataset into training and testing sets of patients
    train_df, test_df = train_test_split(full_df, test_size=0.2, random_state=42)

    # Train the similarity model ONLY on the training data
    print(f"Training model on {len(train_df)} patients.")
    mlb = MultiLabelBinarizer()
    train_matrix = mlb.fit_transform(train_df['yes_symptoms'])
    
    # Ensure all symptoms from the test set are in the model's columns, even if with all zeros
    all_symptoms = set(s for sublist in full_df['yes_symptoms'] for s in sublist)
    mlb.classes_ = np.array(sorted(list(all_symptoms)))
    
    # Re-transform with the full symptom vocabulary
    train_matrix = mlb.fit_transform(train_df['yes_symptoms'])
    train_df = pd.DataFrame(train_matrix, columns=mlb.classes_)

    similarity_matrix_train_only = cosine_similarity(train_df.T)
    similarity_df_train_only = pd.DataFrame(
        similarity_matrix_train_only,
        index=train_df.columns,
        columns=train_df.columns
    )
    print("Model training complete.\n")

    # Evaluate ONLY on the test data
    print(f"Evaluating model on {len(test_df)} unseen patients.")
    all_hits = []
    all_precisions = []
    all_recalls = []
    mrr = []
    all_test_set_patients = 0
    for _ , patient in test_df.iterrows():
        search_term_input = patient['search_term']
        true_symptoms_set = set(patient['yes_symptoms'])
        # print(1, search_term_input)
        # print(2, true_symptoms_set)
        nlu_extracted_symptoms = set(extract_symptoms_from_search_terms(', '.join(search_term_input), symptom_vocab_with_aliases))
        # print(extract_symptoms_from_search_terms(', '.join(search_term_input), symptom_vocab_with_aliases))
        # print(3, nlu_extracted_symptoms)
        hold_out_set = set(true_symptoms_set) - nlu_extracted_symptoms
        # print(4, hold_out_set)
        if not hold_out_set:
            continue
        
        all_test_set_patients += 1
        input_set = ', '.join(search_term_input)
        # print(input_set)
        # print(hold_out_set)
        # Use the model trained ONLY on training data to make predictions (similarity_df_train_only)
        recommendations = get_symtom_without_print_recommendations(input_set, similarity_df_train_only, symptom_vocab_with_aliases, top_n=k)['next_symptom_recommendations']


        # Compare recommendations to the hold-out set to find hits
        hits = len(set(recommendations) & set(hold_out_set))
        all_hits.append(hits)
        all_precisions.append(hits / k)
        all_recalls.append(hits / len(hold_out_set))
        try:
            mrr.append(1 / (recommendations.index(list(hold_out_set)[0]) + 1))
        except:
            mrr.append(0)

    # Calculate average metrics across all patients
    hit_rate = np.mean([1 if h > 0 else 0 for h in all_hits])
    mrr_rate = np.mean(mrr)
    avg_recall = np.mean(all_recalls) if all_recalls else 0
    
    return {"hit_rate_at_k": hit_rate , "recall_at_k": avg_recall, 'mrr_rate': mrr_rate}


# --- Main Evaluation ---
K_VALUE = 5
results = evaluate_full_recommender_system(df, symptom_vocab_with_aliases, k=K_VALUE)

print(f"\n--- Evaluation Results (Top {K_VALUE} Recommendations) ---")
print(f"Hit Rate@{K_VALUE}: {results['hit_rate_at_k']:.4f}")
print(f"Average Recall@{K_VALUE}: {results['recall_at_k']:.4f}")
print(f"Mean Reciprocal Rate(MRR)@{K_VALUE}: {results['mrr_rate']:.4f}") # Rank of first relevant item

Training model on 800 patients.
Model training complete.

Evaluating model on 200 unseen patients.

--- Evaluation Results (Top 5 Recommendations) ---
Hit Rate@5: 0.6915
Average Recall@5: 0.6613
Mean Reciprocal Rate(MRR)@5: 0.4853
