<a href="https://colab.research.google.com/github/Ajogeorge29/100-Days-of-Python_in_MedImages/blob/main/experiemnets_Dental_quiz_chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ====================================== #
# STEP 0: Mount Google Drive & Load Data #
# ====================================== #
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import json, random
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from difflib import SequenceMatcher

# Path to your files (adjust folder if needed)
base_path = "/content/drive/MyDrive/IIT_ropar_submission/dental_quiz_dataset/"

files = {
    "pages": "pedodontics_pages_181_185.json",
    "mcq": "pedodontics_mcq_101_118.json",  # This file is referenced but not provided
    "concepts": "pedodontics_concepts_keywords_101_118.json",
    "combined": "pedodontics_combined_all_pages.json"
}

# Load all datasets with error handling
datasets = {}
for key, fname in files.items():
    try:
        with open(base_path + fname, "r") as f:
            datasets[key] = json.load(f)
        print(f"Successfully loaded {key}: {len(datasets[key])} entries")
    except FileNotFoundError:
        print(f"Warning: {fname} not found. Creating empty dataset for {key}.")
        datasets[key] = []
    except Exception as e:
        print(f"Error loading {fname}: {e}")
        datasets[key] = []

pages_data = datasets["pages"]
mcq_data = datasets["mcq"]
concepts_data = datasets["concepts"]
combined_data = datasets["combined"]

print("\nDataset Summary:")
print(f"Pages: {len(pages_data)} | MCQs: {len(mcq_data)} | Concepts: {len(concepts_data)} | Combined: {len(combined_data)}")

Successfully loaded pages: 65 entries
Successfully loaded mcq: 23 entries
Successfully loaded concepts: 79 entries
Successfully loaded combined: 88 entries

Dataset Summary:
Pages: 65 | MCQs: 23 | Concepts: 79 | Combined: 88


In [4]:

# ======================
# STEP 1: Build Knowledge Base
# ======================
knowledge_base = []
for entry in combined_data:
    kb_entry = {
        "concept": entry.get("concept", "Unknown concept"),
        "explanation": entry.get("explanation", "No explanation available"),
        "keywords": entry.get("keywords", []),
        "id": entry.get("id", "unknown"),
        "questions": []  # Will be filled in next step
    }
    knowledge_base.append(kb_entry)

print(f"\nBuilt knowledge base with {len(knowledge_base)} entries")
if knowledge_base:
    print("Sample KB entry:", knowledge_base[0])


Built knowledge base with 88 entries
Sample KB entry: {'concept': 'Fones method for mixed/young adult dentition', 'explanation': 'According to Finn, the Fones technique is the best method of brushing for mixed and young adult dentitions.', 'keywords': ['Fones method', 'tooth brushing', 'mixed dentition', 'young adult'], 'id': 26, 'questions': []}


In [5]:


# ======================
# STEP 2: Rule-based Question Templates
# ======================
question_templates = [
    "What is {concept}?",
    "Which of the following is true about {concept}?",
    "Describe {concept}.",
    "What are the key features of {concept}?",
    "How would you define {concept}?",
    "What do you know about {concept}?"
]

def generate_questions(concept_entry):
    """Generate questions for a concept"""
    concept = concept_entry["concept"]
    explanation = concept_entry["explanation"]

    # Generate answer from first sentence of explanation
    answer = explanation.split('.')[0].strip()
    if not answer:
        answer = explanation[:100] + "..." if len(explanation) > 100 else explanation

    questions = []
    for template in question_templates[:3]:  # Use first 3 templates
        question = template.format(concept=concept)
        questions.append({
            "q": question,
            "a": answer,
            "full_explanation": explanation
        })

    return questions

# Attach generated questions to knowledge base
for entry in knowledge_base:
    entry["questions"] = generate_questions(entry)

print(f"Generated questions for knowledge base")
if knowledge_base and knowledge_base[0]["questions"]:
    print("Sample question:", knowledge_base[0]["questions"][0])

Generated questions for knowledge base
Sample question: {'q': 'What is Fones method for mixed/young adult dentition?', 'a': 'According to Finn, the Fones technique is the best method of brushing for mixed and young adult dentitions', 'full_explanation': 'According to Finn, the Fones technique is the best method of brushing for mixed and young adult dentitions.'}


In [6]:
# ======================
# STEP 3: NLP Explanation Retrieval (TF-IDF)
# ======================
if knowledge_base:
    explanations = [entry["explanation"] for entry in knowledge_base]
    vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)

    try:
        X = vectorizer.fit_transform(explanations)

        def get_best_explanation(query, top_k=1):
            """Get best matching explanation for a query"""
            try:
                query_vec = vectorizer.transform([query])
                sims = cosine_similarity(query_vec, X).flatten()
                best_indices = sims.argsort()[-top_k:][::-1]

                if sims[best_indices[0]] > 0.1:  # Minimum similarity threshold
                    return explanations[best_indices[0]]
                else:
                    return "No relevant explanation found."
            except Exception as e:
                print(f"Error in explanation retrieval: {e}")
                return "Explanation retrieval failed."

        print("TF-IDF vectorizer initialized successfully")
        print("Example explanation retrieval:", get_best_explanation("What is dental caries?")[:100] + "...")

    except Exception as e:
        print(f"Error initializing TF-IDF: {e}")
        def get_best_explanation(query, top_k=1):
            return "Explanation system not available."
else:
    def get_best_explanation(query, top_k=1):
        return "No knowledge base available."


TF-IDF vectorizer initialized successfully
Example explanation retrieval: Caries proceeds in episodes of demineralization and re‑deposition rather than a continuous process....


In [7]:
# ======================
# STEP 4: Adaptive Question Selection
# ======================
student_history = {}
if knowledge_base:
    student_history = {entry["concept"]: {"correct": 0, "wrong": 0} for entry in knowledge_base}

def select_next_question():
    """Select next question based on student performance"""
    if not knowledge_base:
        return None, None

    # Calculate difficulty scores (higher = needs more practice)
    scores = {}
    for concept, history in student_history.items():
        total_attempts = history["correct"] + history["wrong"]
        if total_attempts == 0:
            scores[concept] = 1  # New concepts get priority
        else:
            # Score based on wrong/total ratio
            scores[concept] = history["wrong"] / total_attempts

    # Select concept with highest score
    if not scores:
        worst_concept = random.choice(list(student_history.keys()))
    else:
        worst_concept = max(scores, key=scores.get)

    # Find the concept entry
    concept_entry = next((item for item in knowledge_base if item["concept"] == worst_concept), None)

    if concept_entry and concept_entry["questions"]:
        selected_question = random.choice(concept_entry["questions"])
        return selected_question, worst_concept
    else:
        # Fallback to random selection
        random_entry = random.choice(knowledge_base)
        if random_entry["questions"]:
            return random.choice(random_entry["questions"]), random_entry["concept"]

    return None, None

# ======================
# Helper Functions
# ======================
def is_correct(student_ans, correct_ans, threshold=0.6):
    """Check if student answer is sufficiently close to correct answer"""
    if not student_ans or not correct_ans:
        return False

    student_clean = student_ans.strip().lower()
    correct_clean = correct_ans.strip().lower()

    # Exact match
    if student_clean == correct_clean:
        return True

    # Fuzzy matching
    similarity = SequenceMatcher(None, student_clean, correct_clean).ratio()
    return similarity >= threshold

def safe_input(prompt, default=""):
    """Safe input function with error handling"""
    try:
        response = input(prompt)
        return response if response.strip() else default
    except (KeyboardInterrupt, EOFError):
        print("\nInput interrupted. Using default.")
        return default
    except Exception as e:
        print(f"Input error: {e}. Using default.")
        return default


In [22]:
# ======================
# STEP 5: Enhanced Quiz Engine
# ======================
quiz_logs = []

def run_quiz(rounds=5, use_mcq=False):
    """Run the quiz with improved error handling and user experience"""
    print(f"\n{'='*50}")
    print(f"Starting {'MCQ' if use_mcq else 'Rule-based'} Quiz - {rounds} questions")
    print(f"{'='*50}")

    if use_mcq and not mcq_data:
        print("No MCQ data available. Switching to rule-based mode.")
        use_mcq = False

    if not use_mcq and not knowledge_base:
        print("No knowledge base available. Cannot run quiz.")
        return

    for i in range(rounds):
        print(f"\n--- Question {i+1} of {rounds} ---")

        try:
            if use_mcq:
                success = run_mcq_question(i)
            else:
                success = run_rule_based_question(i)

            if not success:
                print("Skipping question due to error.")
                continue

        except KeyboardInterrupt:
            print("\nQuiz interrupted by user.")
            break
        except Exception as e:
            print(f"Error in question {i+1}: {e}")
            continue

    print(f"\n{'='*50}")
    print("Quiz completed!")
    display_quiz_summary()

def run_mcq_question(question_num):
    """Run a single MCQ question"""
    if not mcq_data:
        return False

    q = random.choice(mcq_data)
    concept = q.get("topic", "general")

    print(f"Topic: {concept}")
    print(f"Question: {q['question']}")

    options = q.get("options", [])
    if not options:
        print("Error: No options available for this question.")
        return False

    for idx, option in enumerate(options, start=1):
        print(f"{idx}. {option}")

    # Get correct answer
    answer_key = q.get("answer", "A").upper()
    try:
        correct_idx = ord(answer_key) - ord('A')
        if 0 <= correct_idx < len(options):
            correct_ans = options[correct_idx]
        else:
            correct_ans = options[0]
            print("Warning: Invalid answer key, using first option.")
    except:
        correct_ans = options[0]
        print("Warning: Error processing answer key, using first option.")

    # Get student answer
    while True:
        try:
            ans_input = safe_input(f"Your answer (1-{len(options)}): ")
            if ans_input.lower() in ['quit', 'exit']:
                return False

            ans = int(ans_input)
            if 1 <= ans <= len(options):
                student_choice = options[ans - 1]
                break
            else:
                print(f"Please enter a number between 1 and {len(options)}")
        except ValueError:
            print("Please enter a valid number.")

    # Check answer
    correct = (student_choice == correct_ans)

    # Log the result
    quiz_logs.append({
        "question_num": question_num + 1,
        "type": "MCQ",
        "question": q["question"],
        "student_answer": student_choice,
        "correct_answer": correct_ans,
        "correct": correct,
        "concept": concept
    })

    # Provide feedback
    if correct:
        print("✓ Correct!")
    else:
        print(f"✗ Wrong! The correct answer is: {correct_ans}")
        explanation = q.get("explanation") or get_best_explanation(q["question"])
        if explanation:
            print(f"Explanation: {explanation}")

    return True

def run_rule_based_question(question_num):
    """Run a single rule-based question"""
    q_data, concept = select_next_question()
    if not q_data or not concept:
        print("No questions available.")
        return False

    print(f"Topic: {concept}")
    print(f"Question: {q_data['q']}")

    # Get student answer
    ans = safe_input("Your answer: ")
    if ans.lower() in ['quit', 'exit']:
        return False

    # Check answer using the new answer_correct function
    correct = answer_correct(ans, q_data["a"])

    # Update student history
    if concept in student_history:
        if correct:
            student_history[concept]["correct"] += 1
        else:
            student_history[concept]["wrong"] += 1

    # Log the result
    quiz_logs.append({
        "question_num": question_num + 1,
        "type": "Rule-based",
        "question": q_data["q"],
        "student_answer": ans,
        "correct_answer": q_data["a"],
        "correct": correct,
        "concept": concept
    })

    # Provide feedback
    if correct:
        print("✓ Correct!")
    else:
        print(f"✗ Wrong! Expected answer: {q_data['a']}")
        explanation = q_data.get("full_explanation") or get_best_explanation(q_data["q"])
        if explanation:
            print(f"Explanation: {explanation}")

    return True

def display_quiz_summary():
    """Display quiz results summary"""
    if not quiz_logs:
        print("No quiz data to summarize.")
        return

    df = pd.DataFrame(quiz_logs)

    print("\nQuiz Summary:")
    print(f"Total questions: {len(quiz_logs)}")

    if len(quiz_logs) > 0:
        correct_count = df['correct'].sum()
        total_count = len(quiz_logs)
        percentage = (correct_count / total_count) * 100

        print(f"Correct answers: {correct_count}/{total_count} ({percentage:.1f}%)")

        # Performance by concept
        concept_performance = df.groupby('concept')['correct'].agg(['sum', 'count']).reset_index()
        concept_performance['percentage'] = (concept_performance['sum'] / concept_performance['count']) * 100

        print("\nPerformance by concept:")
        for _, row in concept_performance.iterrows():
            print(f"  {row['concept']}: {row['sum']}/{row['count']} ({row['percentage']:.1f}%)")

        # Detailed log
        print("\nDetailed Results:")
        for log in quiz_logs:
            status = "✓" if log['correct'] else "✗"
            print(f"Q{log['question_num']} [{log['type']}] {status} {log['concept']}")

In [9]:
# ======================
# STEP 6: Run Quiz Demo
# ======================

if __name__ == "__main__":
    print("Dental Quiz System Initialized!")

    # Run demonstration
    print("\nRunning demo quiz...")

    # Check available data
    if knowledge_base:
        print(f"Knowledge base loaded with {len(knowledge_base)} concepts")
        run_quiz(3, use_mcq=False)
    else:
        print("No knowledge base available for rule-based questions.")

    if mcq_data:
        print(f"MCQ database loaded with {len(mcq_data)} questions")
        run_quiz(3, use_mcq=True)
    else:
        print("No MCQ data available.")

    # Show final performance
    if student_history:
        print("\nFinal Student Performance Summary:")
        for concept, perf in student_history.items():
            total = perf['correct'] + perf['wrong']
            if total > 0:
                accuracy = (perf['correct'] / total) * 100
                print(f"{concept}: {perf['correct']}/{total} ({accuracy:.1f}%)")

Dental Quiz System Initialized!

Running demo quiz...
Knowledge base loaded with 88 concepts

Starting Rule-based Quiz - 3 questions

--- Question 1 of 3 ---
Topic: Fones method for mixed/young adult dentition
Question: Describe Fones method for mixed/young adult dentition.
Your answer: brushing technique
✗ Wrong! Expected answer: According to Finn, the Fones technique is the best method of brushing for mixed and young adult dentitions
Explanation: According to Finn, the Fones technique is the best method of brushing for mixed and young adult dentitions.

--- Question 2 of 3 ---
Topic: Fones method for mixed/young adult dentition
Question: What is Fones method for mixed/young adult dentition?
Your answer: brushing
✗ Wrong! Expected answer: According to Finn, the Fones technique is the best method of brushing for mixed and young adult dentitions
Explanation: According to Finn, the Fones technique is the best method of brushing for mixed and young adult dentitions.

--- Question 3 of 3 -

In [14]:
# ======================
# STEP 7: Improved Gradio Interface
# ======================
try:
    import gradio as gr

    # Global state for Gradio session
    quiz_state = {
        "round": 0,
        "use_mcq": False,
        "current_question": None,
        "current_concept": None,
        "current_options": [],
        "current_correct_answer": "",
        "total_rounds": 10,
        "score": 0,
        "session_logs": []
    }

    def start_quiz_gradio(mode, total_rounds=10):
        """Initialize Gradio quiz session"""
        try:
            quiz_state["round"] = 1
            quiz_state["use_mcq"] = (mode == "MCQ")
            quiz_state["total_rounds"] = int(total_rounds)
            quiz_state["score"] = 0
            quiz_state["session_logs"] = []

            return get_next_question()

        except Exception as e:
            return f"Error starting quiz: {e}", [], f"Score: 0/0", "", gr.update(visible=False)

    def get_next_question():
        """Get the next question for Gradio interface"""
        try:
            if quiz_state["round"] > quiz_state["total_rounds"]:
                return finish_quiz()

            if quiz_state["use_mcq"] and mcq_data:
                return get_mcq_question()
            elif not quiz_state["use_mcq"] and knowledge_base:
                return get_rule_based_question()
            else:
                return "No questions available", [], f"Score: {quiz_state['score']}/{quiz_state['round']-1}", "", gr.update(visible=False)

        except Exception as e:
            return f"Error getting question: {e}", [], f"Score: {quiz_state['score']}/{quiz_state['round']-1}", "", gr.update(visible=False)

    def get_mcq_question():
        """Get MCQ question for Gradio"""
        q = random.choice(mcq_data)
        quiz_state["current_question"] = q
        quiz_state["current_concept"] = q.get("topic", "general")
        quiz_state["current_options"] = q.get("options", [])

        # Determine correct answer
        answer_key = q.get("answer", "A").upper()
        try:
            correct_idx = ord(answer_key) - ord('A')
            quiz_state["current_correct_answer"] = quiz_state["current_options"][correct_idx]
        except:
            quiz_state["current_correct_answer"] = quiz_state["current_options"][0] if quiz_state["current_options"] else ""

        question_text = f"Q{quiz_state['round']}: {q['question']}"
        score_text = f"Score: {quiz_state['score']}/{quiz_state['round']-1}"

        return question_text, gr.update(choices=quiz_state["current_options"], visible=True), score_text, "", gr.update(visible=True)

    def get_rule_based_question():
        """Get rule-based question for Gradio"""
        q_data, concept = select_next_question()
        if not q_data:
            return "No rule-based questions available", [], f"Score: {quiz_state['score']}/{quiz_state['round']-1}", "", gr.update(visible=False)

        quiz_state["current_question"] = q_data
        quiz_state["current_concept"] = concept
        quiz_state["current_options"] = []
        quiz_state["current_correct_answer"] = q_data["a"]

        question_text = f"Q{quiz_state['round']}: {q_data['q']}"
        score_text = f"Score: {quiz_state['score']}/{quiz_state['round']-1}"

        return question_text, gr.update(choices=[], visible=False), score_text, "", gr.update(visible=True)

    def submit_answer_gradio(selected_option, text_answer):
        """Process submitted answer in Gradio"""
        try:
            # Determine student's answer
            if quiz_state["use_mcq"]:
                student_answer = selected_option
                correct = (student_answer == quiz_state["current_correct_answer"])
            else:
                student_answer = text_answer.strip()
                correct = is_correct(student_answer, quiz_state["current_correct_answer"])

                # Update student history for rule-based questions
                concept = quiz_state["current_concept"]
                if concept in student_history:
                    if correct:
                        student_history[concept]["correct"] += 1
                    else:
                        student_history[concept]["wrong"] += 1

            # Update score
            if correct:
                quiz_state["score"] += 1

            # Log the result
            quiz_state["session_logs"].append({
                "round": quiz_state["round"],
                "question": quiz_state["current_question"]["question"] if quiz_state["use_mcq"] else quiz_state["current_question"]["q"],
                "student_answer": student_answer,
                "correct_answer": quiz_state["current_correct_answer"],
                "correct": correct,
                "concept": quiz_state["current_concept"]
            })

            # Prepare feedback
            if correct:
                feedback = "✓ Correct!"
            else:
                feedback = f"✗ Wrong! Correct answer: {quiz_state['current_correct_answer']}"

                # Add explanation
                if quiz_state["use_mcq"]:
                    explanation = quiz_state["current_question"].get("explanation", "")
                    if not explanation:
                        explanation = get_best_explanation(quiz_state["current_question"]["question"])
                else:
                    explanation = quiz_state["current_question"].get("full_explanation", "")
                    if not explanation:
                        explanation = get_best_explanation(quiz_state["current_question"]["q"])

                if explanation:
                    feedback += f"\n\nExplanation: {explanation}"

            # Move to next question
            quiz_state["round"] += 1

            # Get next question
            next_q, next_options, score_display, _, submit_visible = get_next_question()

            return next_q, next_options, score_display, feedback, submit_visible, "", ""  # Clear inputs

        except Exception as e:
            error_msg = f"Error processing answer: {e}"
            return quiz_state.get("current_question", "Error"), [], f"Score: {quiz_state['score']}/{quiz_state['round']-1}", error_msg, gr.update(visible=True), "", ""

    def finish_quiz():
        """Finish the quiz and show results"""
        if quiz_state["session_logs"]:
            correct_count = sum(1 for log in quiz_state["session_logs"] if log["correct"])
            total_count = len(quiz_state["session_logs"])
            percentage = (correct_count / total_count) * 100 if total_count > 0 else 0

            results = f"Quiz Complete!\n\nFinal Score: {correct_count}/{total_count} ({percentage:.1f}%)\n\n"

            # Add concept breakdown
            concept_stats = {}
            for log in quiz_state["session_logs"]:
                concept = log["concept"]
                if concept not in concept_stats:
                    concept_stats[concept] = {"correct": 0, "total": 0}
                concept_stats[concept]["total"] += 1
                if log["correct"]:
                    concept_stats[concept]["correct"] += 1

            results += "Performance by Topic:\n"
            for concept, stats in concept_stats.items():
                pct = (stats["correct"] / stats["total"]) * 100
                results += f"• {concept}: {stats['correct']}/{stats['total']} ({pct:.1f}%)\n"
        else:
            results = "Quiz completed with no logged results."

        return results, [], f"Final Score: {quiz_state['score']}/{quiz_state['total_rounds']}", "", gr.update(visible=False)

    # Create Gradio Interface
    with gr.Blocks(title="Dental Quiz System", theme=gr.themes.Soft()) as quiz_interface:
        gr.Markdown("# 🦷 Adaptive Dental Quiz System")
        gr.Markdown("Test your knowledge of pediatric dentistry with adaptive questioning!")

        with gr.Row():
            mode_radio = gr.Radio(
                choices=["Rule-based", "MCQ"],
                label="Quiz Mode",
                value="Rule-based",
                info="Rule-based: Adaptive text questions | MCQ: Multiple choice questions"
            )
            rounds_slider = gr.Slider(
                minimum=5,
                maximum=20,
                step=1,
                label="Number of Questions",
                value=10
            )

        start_btn = gr.Button("🚀 Start Quiz", variant="primary", size="lg")

        with gr.Row():
            with gr.Column(scale=2):
                question_display = gr.Textbox(
                    label="Question",
                    interactive=False,
                    lines=3
                )

                mcq_options = gr.Radio(
                    choices=[],
                    label="Select your answer:",
                    visible=False
                )

                text_answer = gr.Textbox(
                    label="Type your answer:",
                    placeholder="Enter your answer here...",
                    visible=False
                )

                submit_btn = gr.Button("Submit Answer", variant="secondary", visible=False)

            with gr.Column(scale=1):
                score_display = gr.Textbox(
                    label="Score",
                    interactive=False,
                    value="Score: 0/0"
                )

                feedback_display = gr.Textbox(
                    label="Feedback",
                    interactive=False,
                    lines=5
                )

        # Event handlers
        start_btn.click(
            fn=start_quiz_gradio,
            inputs=[mode_radio, rounds_slider],
            outputs=[question_display, mcq_options, score_display, feedback_display, submit_btn]
        )

        submit_btn.click(
            fn=submit_answer_gradio,
            inputs=[mcq_options, text_answer],
            outputs=[question_display, mcq_options, score_display, feedback_display, submit_btn, mcq_options, text_answer]
        )

        # Show appropriate input based on quiz mode
        def update_interface_mode(mode):
            if mode == "MCQ":
                return gr.update(visible=True), gr.update(visible=False)
            else:
                return gr.update(visible=False), gr.update(visible=True)

        mode_radio.change(
            fn=update_interface_mode,
            inputs=[mode_radio],
            outputs=[mcq_options, text_answer]
        )

    print("\nGradio interface created successfully!")
    print("To launch the interface, run: quiz_interface.launch()")

except ImportError:
    print("Gradio not available. Install with: !pip install gradio")
    quiz_interface = None

print("\n" + "="*60)
print("DENTAL QUIZ SYSTEM READY!")
print("="*60)
print("Features fixed:")
print("✓ Improved error handling and data validation")
print("✓ Better question selection algorithm")
print("✓ Enhanced user feedback and explanations")
print("✓ Robust input validation")
print("✓ Comprehensive logging and performance tracking")
print("✓ Graceful handling of missing data files")
print("✓ Improved Gradio interface with better UX")
print("="*60)


Gradio interface created successfully!
To launch the interface, run: quiz_interface.launch()

DENTAL QUIZ SYSTEM READY!
Features fixed:
✓ Improved error handling and data validation
✓ Better question selection algorithm
✓ Enhanced user feedback and explanations
✓ Robust input validation
✓ Comprehensive logging and performance tracking
✓ Graceful handling of missing data files
✓ Improved Gradio interface with better UX


In [15]:
quiz_interface.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://dbfade965d2eb60357.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [16]:
!pip install -q sentence-transformers rapidfuzz scikit-learn

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/3.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━[0m [32m1.8/3.3 MB[0m [31m81.4 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m3.3/3.3 MB[0m [31m41.8 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m3.3/3.3 MB[0m [31m41.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [17]:
QA_BANK = [
    {"q": "What is pericoronitis?", "a": "Inflammation of the soft tissues around a partially erupted tooth, often mandibular third molars."},
    {"q": "List clinical signs of pericoronitis.", "a": "Pain, swelling, trismus, erythema, and sometimes purulent discharge."},
    # ... add all your items
]

In [18]:
from typing import List, Dict, Tuple
import numpy as np

# Try transformer embeddings
try:
    from sentence_transformers import SentenceTransformer
    EMB_MODEL = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")  # ~80MB, fast CPU inference
    use_embeddings = True
except Exception as e:
    print("Embedding model unavailable, will use TF-IDF fallback.", e)
    use_embeddings = False

# Build indices
QUESTIONS = [x["q"] for x in QA_BANK]
ANSWERS   = [x["a"] for x in QA_BANK]

if use_embeddings:
    Q_EMB = EMB_MODEL.encode(QUESTIONS, batch_size=64, show_progress_bar=False, normalize_embeddings=True)
else:
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.metrics.pairwise import cosine_similarity
    VECT = TfidfVectorizer(ngram_range=(1,2), min_df=1, max_df=0.95)
    Q_TFIDF = VECT.fit_transform(QUESTIONS)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [19]:
from sklearn.metrics.pairwise import cosine_similarity
def retrieve_question(user_text: str, k: int = 3) -> List[Tuple[int, float]]:
    if use_embeddings:
        qv = EMB_MODEL.encode([user_text], normalize_embeddings=True)
        sims = (Q_EMB @ qv.T).ravel()  # cosine because normalized
    else:
        qv = VECT.transform([user_text])
        sims = cosine_similarity(Q_TFIDF, qv).ravel()
    idx = np.argsort(-sims)[:k]
    return [(int(i), float(sims[i])) for i in idx]

In [20]:
# Example of using the retrieve_question function
user_query = "What are the symptoms of pericoronitis?"
similar_questions = retrieve_question(user_query)

print(f"User Query: {user_query}\n")
print("Similar Questions found in QA_BANK:")
for index, similarity_score in similar_questions:
    print(f"- Question: {QUESTIONS[index]}")
    print(f"  Answer: {ANSWERS[index]}")
    print(f"  Similarity: {similarity_score:.4f}")

User Query: What are the symptoms of pericoronitis?

Similar Questions found in QA_BANK:
- Question: List clinical signs of pericoronitis.
  Answer: Pain, swelling, trismus, erythema, and sometimes purulent discharge.
  Similarity: 0.8926
- Question: What is pericoronitis?
  Answer: Inflammation of the soft tissues around a partially erupted tooth, often mandibular third molars.
  Similarity: 0.6995


In [21]:
import re
from rapidfuzz.distance import Levenshtein

def normalize(s: str) -> str:
    return re.sub(r'\W+', ' ', s.lower()).strip()

def token_overlap(a: str, b: str) -> float:
    A, B = set(normalize(a).split()), set(normalize(b).split())
    if not A or not B: return 0.0
    return len(A & B) / len(A | B)

def answer_correct(user_ans: str, gold_ans: str, overlap_thr: float=0.45, lev_thr: float=0.30) -> bool:
    ua, ga = normalize(user_ans), normalize(gold_ans)
    # token Jaccard
    j = token_overlap(ua, ga)
    if j >= overlap_thr:
        return True
    # Levenshtein ratio (1 - normalized distance)
    max_len = max(len(ua), len(ga)) or 1
    lev_ratio = 1 - (Levenshtein.distance(ua, ga) / max_len)
    return lev_ratio >= (1 - lev_thr)

In [23]:
# Define a simple sanity test set
sanity_tests = [
    ("What are the symptoms of pericoronitis?", "Pain, swelling, trismus"),
    ("define pericoronitis", "Inflammation of the soft tissues around a partially erupted tooth"),
    ("tell me about dental caries", "Caries proceeds in episodes of demineralization and re-deposition"),
]

print("Running Sanity Tests:")
print("======================")

for query, expected_answer in sanity_tests:
    print(f"\nQuery: {query}")

    # 1. Test Question Retrieval
    similar_questions = retrieve_question(query, k=1)

    if similar_questions:
        best_match_index, similarity_score = similar_questions[0]
        matched_question = QUESTIONS[best_match_index]
        actual_answer_in_qa_bank = ANSWERS[best_match_index]

        print(f"  Best matched QA_BANK question: {matched_question} (Similarity: {similarity_score:.4f})")

        # 2. Test Answer Correctness using the provided expected answer
        is_ans_correct = answer_correct(expected_answer, actual_answer_in_qa_bank)
        print(f"  Is provided expected answer '{expected_answer}' correct for matched answer '{actual_answer_in_qa_bank}'? {is_ans_correct}")

    else:
        print("  No similar question found in QA_BANK.")

print("\n======================")
print("Sanity Tests Complete.")

Running Sanity Tests:

Query: What are the symptoms of pericoronitis?
  Best matched QA_BANK question: List clinical signs of pericoronitis. (Similarity: 0.8926)
  Is provided expected answer 'Pain, swelling, trismus' correct for matched answer 'Pain, swelling, trismus, erythema, and sometimes purulent discharge.'? False

Query: define pericoronitis
  Best matched QA_BANK question: What is pericoronitis? (Similarity: 0.9267)
  Is provided expected answer 'Inflammation of the soft tissues around a partially erupted tooth' correct for matched answer 'Inflammation of the soft tissues around a partially erupted tooth, often mandibular third molars.'? True

Query: tell me about dental caries
  Best matched QA_BANK question: What is pericoronitis? (Similarity: 0.2347)
  Is provided expected answer 'Caries proceeds in episodes of demineralization and re-deposition' correct for matched answer 'Inflammation of the soft tissues around a partially erupted tooth, often mandibular third molars.'? F

In [24]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

def plot_quiz_performance_by_concept(quiz_logs):
    """Plots quiz performance by concept."""
    if not quiz_logs:
        print("No quiz logs available to plot.")
        return

    df = pd.DataFrame(quiz_logs)

    if df.empty:
        print("Quiz logs DataFrame is empty.")
        return

    # Calculate performance by concept
    concept_performance = df.groupby('concept')['correct'].agg(['sum', 'count']).reset_index()
    concept_performance['percentage'] = (concept_performance['sum'] / concept_performance['count']) * 100

    if concept_performance.empty:
        print("No concept performance data to plot.")
        return

    # Sort by percentage for better visualization
    concept_performance = concept_performance.sort_values('percentage', ascending=False)

    plt.figure(figsize=(10, 6))
    sns.barplot(x='percentage', y='concept', data=concept_performance, palette='viridis')
    plt.xlabel("Accuracy (%)")
    plt.ylabel("Concept")
    plt.title("Quiz Performance by Concept")
    plt.xlim(0, 100)
    plt.tight_layout()
    plt.show()

# Example Usage (assuming you have run a quiz and quiz_logs is populated):
# plot_quiz_performance_by_concept(quiz_logs)