In [2]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from pymongo import MongoClient
from PIL import Image
import numpy as np
import pytesseract
import fitz
import os
import io
from docx import Document
import regex as re
import ollama
import json
client = MongoClient("mongodb+srv://daktrboys05_db_user:gdgclubproject@to-do-list.qmqixqe.mongodb.net/")
db = client["tries_db"]
questions_collection = db["questions"]

  from .autonotebook import tqdm as notebook_tqdm


FOR UPDATING DB WITH ANSWER KEY

In [3]:
file_path = "test.pdf"
answer_file_path = "test_answer.pdf"

In [4]:
def extract_text_from_docx(docx_path):
    doc = Document(docx_path)
    collected = []

    # Body paragraphs
    for para in doc.paragraphs:
        text = para.text.strip()
        if text:
            collected.append(text)

    # Tables
    for table in doc.tables:
        for row in table.rows:
            for cell in row.cells:
                cell_text = cell.text.strip()
                if cell_text:
                    collected.append(cell_text)

    # Headers & footers
    for section in doc.sections:
        header = section.header
        footer = section.footer

        for para in header.paragraphs:
            if para.text.strip():
                collected.append(para.text.strip())

        for para in footer.paragraphs:
            if para.text.strip():
                collected.append(para.text.strip())

    # Embedded images → OCR
    for rel in doc.part.rels.values():
        if "image" in rel.target_ref:
            image_bytes = rel.target_part.blob
            img = Image.open(io.BytesIO(image_bytes))
            ocr_text = pytesseract.image_to_string(
                img,
                lang="eng",
                config="--psm 6"
            )
            if ocr_text.strip():
                collected.append(ocr_text.strip())

    return "\n".join(collected)

In [5]:
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    texts = []

    for page_num, page in enumerate(doc):
        # Digital text
        page_text = page.get_text().strip()
        if page_text:
            texts.append(page_text)

        # OCR embedded images
        images = page.get_images(full=True)
        for img in images:
            xref = img[0]
            base_image = doc.extract_image(xref)
            image_bytes = base_image["image"]

            img_pil = Image.open(io.BytesIO(image_bytes))
            ocr_text = pytesseract.image_to_string(
                img_pil,
                lang="eng",
                config="--psm 6"
            ).strip()

            if ocr_text:
                texts.append(ocr_text)

        # Full-page OCR fallback
        if not page_text and not images:
            pix = page.get_pixmap(dpi=300)
            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
            ocr_text = pytesseract.image_to_string(img, lang="eng").strip()

            if ocr_text:
                texts.append(ocr_text)

    return texts


In [6]:
def extract_text_from_image(image_path):
    img = Image.open(image_path)
    return pytesseract.image_to_string(img).strip()

SPLITTING FOR TEXT FROM ANSWER KEY

In [7]:
def split_by_questions(text: str) -> list[str]:
    
    pattern = (
        r"(\[\d+\s*marks?\]\s*Question:.*?)(?=\[\d+\s*marks?\]\s*Question:|$)"
    )
    matches = re.findall(pattern, text, flags=re.IGNORECASE | re.DOTALL)

    question_blocks = []
    for idx, block in enumerate(matches, start=1):
        question_blocks.append(
            f"[QUESTION_{idx}_START]\n{block.strip()}"
        )
    return question_blocks

SPLITTING FOR TEXT FROM ANSWERS BY STUDENTS

In [8]:
def split_by_answers(text: str) -> list[str]:
    pattern = r"(Question\s+\d+[\s\S]*?)(?=Question\s+\d+|$)"
    matches = re.findall(pattern, text, flags=re.IGNORECASE)

    answer_blocks = []
    for idx, block in enumerate(matches, start=1):
        answer_blocks.append(
            f"[ANSWER_{idx}_START]\n{block.strip()}"
        )

    return answer_blocks

FUNCTION TO EXTRACT TEXT FROM ANSWER KEY

In [9]:
def extract_text(file_path):
    ext = os.path.splitext(file_path)[1].lower()

    if ext == ".pdf":
        pdf_texts = extract_text_from_pdf(file_path)
        full_text = "\n".join(pdf_texts)
        return split_by_questions(full_text)
    
    elif ext == ".docx":
        return (list(extract_text_from_docx(file_path)))

    elif ext in [".png", ".jpg", ".jpeg"]:
        return (list(extract_text_from_image(file_path)))

    else:
        raise ValueError("Unsupported file type")

FUNCTION TO EXTRACT TEXT FROM STUDENT ANSWERS

In [10]:
def extract_student_text(answer_file_path):
    ext = os.path.splitext(answer_file_path)[1].lower()

    if ext == ".pdf":
        pdf_texts = extract_text_from_pdf(answer_file_path)
        full_text = "\n".join(pdf_texts)
        return split_by_answers(full_text)

    elif ext == ".docx":
        full_text = extract_text_from_docx(answer_file_path)
        return split_by_answers(full_text)

    elif ext in [".png", ".jpg", ".jpeg"]:
        full_text = extract_text_from_image(answer_file_path)
        return split_by_answers(full_text)

    else:
        raise ValueError("Unsupported file type")


CHUNKING THE EXTRACTED TEXT 

In [11]:
class embeddingManager:
  def __init__(self,model_name : str = "all-MiniLM-L6-v2"):
    #hugging face model for sentence embedding
    self.model_name = model_name
    self.model = None
    self._load_model()

  def _load_model(self):
    try:
      print(f"Loading embedding model: {self.model_name}")
      self.model = SentenceTransformer(self.model_name)
      print(f"Embedding model loaded successfully.Embedding dimension: {self.model.get_sentence_embedding_dimension()}")
    except Exception as e:
      print(f"Error loading embedding model: {e}")

  def generate_embeddings(self,texts:list[str]) -> np.ndarray:#returns numpy array
    if self.model is None:
      self._load_model()
    print(f"Generating embedding for {len(texts)} texts....")
    embeddings = self.model.encode(texts, show_progress_bar = True)
    print("Embedding generated successfully.")
    return embeddings

EMBEDDING FOR ANSWER KEY

In [12]:
embedding_manager = embeddingManager()
texts = extract_text(file_path)
print("number of records: ",len(texts))
print("extracted texts:", texts)
for text in texts:
    print("starts:",text)
    print("\n")
embedding_manager.generate_embeddings(texts)

Loading embedding model: all-MiniLM-L6-v2
Embedding model loaded successfully.Embedding dimension: 384
number of records:  6
extracted texts: ['[QUESTION_1_START]\n[10 marks]\nQuestion: Explain the concept of ACID properties in database transactions. Discuss how each property ensures data\nconsistency and provide real-world scenarios where violation of these properties could lead to problems.\nEvaluation Rubric:\nTrait\nWeight\nDescription\nConcept Coverage\n40%\nComprehensive explanation of all 4 ACID properties (Atomicity, Consistency, Isolation\nReal-World Application\n30%\nClear examples of transactions and scenarios where violations cause problems\nLogical Flow\n20%\nWell-organized answer with clear connections between properties\nClarity & Language\n10%\nClear writing, appropriate terminology usage\nQuestion 2', '[QUESTION_2_START]\n[8 marks]\nQuestion: Write a SQL query to find the top 5 departments by average salary, excluding departments with fewer than\n10 employees. Include 

Batches: 100%|██████████| 1/1 [00:00<00:00,  2.70it/s]

Embedding generated successfully.





array([[ 0.01815865,  0.03243175, -0.08225077, ...,  0.04441799,
         0.02949115, -0.05169402],
       [ 0.01402685,  0.04991702,  0.10901149, ..., -0.08241593,
        -0.01403602,  0.08955504],
       [-0.00900713, -0.00058494, -0.02288687, ...,  0.06090765,
        -0.02194507,  0.0628505 ],
       [-0.0082739 ,  0.03229178, -0.04765171, ..., -0.06478573,
        -0.04062334,  0.03412547],
       [ 0.05345527,  0.08826678, -0.02615643, ...,  0.0467669 ,
        -0.07550272,  0.01124337],
       [ 0.01976677,  0.10287796,  0.00367258, ..., -0.1330917 ,
        -0.05959409, -0.00589298]], shape=(6, 384), dtype=float32)

EMBEDDING FOR ANSWERS FROM STUDENT

In [13]:
embedding_manager = embeddingManager()

Loading embedding model: all-MiniLM-L6-v2
Embedding model loaded successfully.Embedding dimension: 384


GRADING TECHNICAL QUESTION

In [14]:
def semantic_fallback(question, student_answer, embedding_manager) -> int:
    """
    Fallback scoring using semantic similarity
    when rubric config or LLM grading fails.
    """

    if not student_answer.strip():
        return 0

    student_emb = embedding_manager.generate_embeddings([student_answer])
    anchor_emb = embedding_manager.generate_embeddings([question["question_text"]])

    similarity = cosine_similarity(student_emb, anchor_emb)[0][0]

    max_marks = question["max_marks"]

    # Convert similarity → marks
    score = int(similarity * max_marks)

    # Safety floor so good answers never get 0
    if score == 0:
        score = max(1, int(0.3 * max_marks))

    return score

In [15]:
def keyword_score(student_answer: str, keywords: list[str]) -> float:
    if not keywords:
        return 0.0

    text = student_answer.lower()
    hits = sum(1 for kw in keywords if kw.lower() in text)
    return hits / len(keywords)


In [16]:
def solution_chunk_score(
    student_answer: str,
    solution_chunks: list[str],
    embedding_manager
) -> float:

    if not solution_chunks:
        return 0.0

    student_emb = embedding_manager.generate_embeddings([student_answer])[0]

    matched = 0
    for chunk in solution_chunks:
        chunk_emb = embedding_manager.generate_embeddings([chunk])[0]
        sim = cosine_similarity([student_emb], [chunk_emb])[0][0]

        if sim >= 0.65:   # semantic threshold
            matched += 1

    return matched / len(solution_chunks)



In [17]:
def numeric_rule_score(student_answer: str, numeric_rules: dict) -> float:
    if not numeric_rules:
        return 0.0

    expected_numbers = numeric_rules.get("expected_numbers", [])

    if not expected_numbers:
        return 1.0   # nothing expected → full marks

    text = student_answer.lower()
    matched = 0

    for num in expected_numbers:
        if str(num).lower() in text:
            matched += 1

    return matched / len(expected_numbers)


In [18]:
def semantic_similarity_score(
    student_answer: str,
    model_answer: str,
    embedding_manager
) -> float:

    student_emb = embedding_manager.generate_embeddings([student_answer])
    model_emb = embedding_manager.generate_embeddings([model_answer])

    return cosine_similarity(student_emb, model_emb)[0][0]


GRADING WITH LLM

In [19]:
def grade_technical_question(
    student_answer: str,
    technical_config: dict,
    embedding_manager,
    max_marks: int
) -> int:

    # 1. Semantic similarity
    semantic_score = semantic_similarity_score(
        student_answer,
        technical_config["model_answer"],
        embedding_manager
    )

    # 2. Solution chunk coverage
    chunk_score = solution_chunk_score(
        student_answer,
        technical_config.get("solution_chunks", []),
        embedding_manager
    )

    # 3. Keyword coverage
    kw_score = keyword_score(
        student_answer,
        technical_config.get("keywords", [])
    )

    # 4. Numeric / rule-based score
    numeric_score = numeric_rule_score(
        student_answer,
        technical_config.get("numeric_rules", {})
    )

    # Final weighted score
    final_score = (
        0.4 * semantic_score +
        0.3 * chunk_score +
        0.2 * kw_score +
        0.1 * numeric_score
    ) * max_marks

    return round(final_score)


In [20]:
def grade_descriptive_question(
    question: dict,
    student_answer: str,
    embedding_manager
) -> int:

    print("Grading descriptive question...")

    max_marks = question["max_marks"]

    descriptive_cfg = question.get("descriptive_config")
    if descriptive_cfg is None:
        print("⚠️ descriptive_config missing → semantic fallback")
        return semantic_fallback(question, student_answer, embedding_manager)

    rubric = descriptive_cfg.get("rubric")
    if not rubric:
        print("⚠️ rubric missing → semantic fallback")
        return semantic_fallback(question, student_answer, embedding_manager)

    if not student_answer.strip():
        return 0

    rubric_prompt = ""
    trait_max_map = {}

    for r in rubric:
        trait = r["trait"]
        trait_marks = round(r["weight"] * max_marks)
        trait_max_map[trait] = trait_marks

        rubric_prompt += f"""
Trait: {trait}
Max Marks: {trait_marks}
Description: {r['description']}
"""

    prompt = f"""
You are an experienced university examiner.

Evaluate the student answer STRICTLY using the rubric below.

Question:
{question['question_text']}

Rubric (use ONLY these traits and max marks):
{rubric_prompt}

Student Answer:
{student_answer}

SCORING RULES:
- Assign INTEGER marks only.
- Score each trait independently.
- Use values from 0 up to Max Marks.
- Partial credit is allowed.
- Do NOT invent traits.
- If the answer meaningfully addresses a trait, award non-zero marks.

OUTPUT FORMAT (STRICT JSON ONLY):
{{
  "scores": {{
    "<trait_name>": <integer_marks>
  }}
}}
"""

    try:
        response = ollama.generate(
            model="llama3:latest",
            prompt=prompt.strip()
        )

        data = json.loads(response["response"])
        scores = data.get("scores", {})

        total = 0
        for trait, max_trait_marks in trait_max_map.items():
            awarded = int(scores.get(trait, 0))
            awarded = max(0, min(awarded, max_trait_marks))
            total += awarded

        return min(total, max_marks)

    except Exception as e:
        print("⚠️ LLM failed:", e)
        return semantic_fallback(question, student_answer, embedding_manager)


FETCHING FROM BACKEND

In [21]:
def fetch_question(exam_id: str, question_number: int):
    question = questions_collection.find_one(
        {
            "exam_id": exam_id,
            "question_number": question_number
        },
        {
            "_id": 0
        }
    )
    return question


In [22]:
exam_id = "CS_ADV_2025"
final_results = []

answer_texts = extract_student_text(answer_file_path)
print("Extracted student answers:", answer_texts)
print(f"Total answers extracted: {len(answer_texts)}") 

for i, student_answer in enumerate(answer_texts):
    question_number = i + 1

    question = fetch_question(exam_id, question_number)
    print(
        f"Q{question_number} question_type raw = {repr(question['question_type'])}"
    )
    if question is None:
        continue

    marks = 0
    
    if question["question_type"] == "TECHNICAL":
        tech = question["technical_config"]
        marks = grade_technical_question(
            student_answer=student_answer,
            technical_config=tech,
            embedding_manager=embedding_manager,
            max_marks=question["max_marks"]
    )


    elif question["question_type"] == "DESCRIPTIVE":
        marks = grade_descriptive_question(question, student_answer, embedding_manager)

    final_results.append({
        "question_number": question_number,
        "marks_awarded": marks,
        "max_marks": question["max_marks"]
    })
for i in final_results:
    print(final_results)

Extracted student answers: ['[ANSWER_1_START]\nQuestion 1\nAnswer:\nDatabase transactions are governed by the ACID properties to ensure reliability and data integrity.\nThese properties are Atomicity, Consistency, Isolation, and Durability.\n1. Atomicity: This property ensures that a transaction is treated as a single unit of work. Either all\nsteps in the transaction complete successfully, or none of them do. If a failure occurs during the\ntransaction, the database is rolled back to its state before the transaction started.\nReal-world violation: Consider a bank transfer where money is deducted from Account A but the system\ncrashes before adding it to Account B. Without atomicity, the money simply vanishes. Atomicity\nensures that if the credit fails, the debit is rolled back.\n2. Consistency: This ensures that a transaction brings the database from one valid state to another,\nmaintaining all defined rules, constraints, and cascades.\nReal-world violation: If a database requires ev

Batches: 100%|██████████| 1/1 [00:00<00:00,  2.15it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  2.38it/s]


Embedding generated successfully.
Q2 question_type raw = 'TECHNICAL'
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  3.40it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  4.42it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  2.63it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  5.69it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  6.45it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00, 14.98it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  2.50it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00, 11.17it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  7.10it/s]


Embedding generated successfully.
Q3 question_type raw = 'DESCRIPTIVE'
Grading descriptive question...
⚠️ descriptive_config missing → semantic fallback
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  3.22it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  3.47it/s]


Embedding generated successfully.
Q4 question_type raw = 'TECHNICAL'
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  2.46it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  3.66it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  2.66it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  5.05it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  3.85it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  4.94it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  2.37it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  4.67it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  3.94it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  4.85it/s]


Embedding generated successfully.
Q5 question_type raw = 'DESCRIPTIVE'
Grading descriptive question...
⚠️ LLM failed: Expecting value: line 1 column 1 (char 0)
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  2.09it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  4.07it/s]


Embedding generated successfully.
Q6 question_type raw = 'TECHNICAL'
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  3.32it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  1.71it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  4.53it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  4.02it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  5.31it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  5.80it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  6.55it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00, 15.11it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  5.03it/s]


Embedding generated successfully.
Generating embedding for 1 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  3.25it/s]

Embedding generated successfully.
[{'question_number': 1, 'marks_awarded': 4, 'max_marks': 10}, {'question_number': 2, 'marks_awarded': 5, 'max_marks': 8}, {'question_number': 3, 'marks_awarded': 7, 'max_marks': 12}, {'question_number': 4, 'marks_awarded': 4, 'max_marks': 7}, {'question_number': 5, 'marks_awarded': 5, 'max_marks': 9}, {'question_number': 6, 'marks_awarded': 3, 'max_marks': 6}]
[{'question_number': 1, 'marks_awarded': 4, 'max_marks': 10}, {'question_number': 2, 'marks_awarded': 5, 'max_marks': 8}, {'question_number': 3, 'marks_awarded': 7, 'max_marks': 12}, {'question_number': 4, 'marks_awarded': 4, 'max_marks': 7}, {'question_number': 5, 'marks_awarded': 5, 'max_marks': 9}, {'question_number': 6, 'marks_awarded': 3, 'max_marks': 6}]
[{'question_number': 1, 'marks_awarded': 4, 'max_marks': 10}, {'question_number': 2, 'marks_awarded': 5, 'max_marks': 8}, {'question_number': 3, 'marks_awarded': 7, 'max_marks': 12}, {'question_number': 4, 'marks_awarded': 4, 'max_marks': 7




COSINE SIMILARITY FOR STUDENT ANSWER

In [23]:
def compute_cosine_similarity(
    student_embeddings: np.ndarray,
    reference_embeddings: np.ndarray
) -> np.ndarray:
    return cosine_similarity(student_embeddings, reference_embeddings)

GRADING ANSWERS

In [24]:
def grade_answers(
    similarity_matrix: np.ndarray,
    max_marks: int = 10
) -> list[int]:

    scores = []

    for i in range(len(similarity_matrix)):
        sim = similarity_matrix[i][i] 

        if sim >= 0.85:
            marks = max_marks
        elif sim >= 0.70:
            marks = int(0.7 * max_marks)
        elif sim >= 0.50:
            marks = int(0.4 * max_marks)
        else:
            marks = 0

        scores.append(marks)

    return scores


SAMPLE IP

In [25]:
# Student answers (from your extractor)
student_answers = extract_student_text(answer_file_path)

# Reference answers (from answer key / MongoDB / file)
reference_answers = [
    "ACID properties ensure atomicity, consistency, isolation, and durability...",
    "SELECT department, AVG(salary) FROM employees GROUP BY department ORDER BY AVG(salary) DESC LIMIT 5;",
    "Supervised learning uses labeled data while unsupervised learning does not...",
    "Use a hash set to find the longest subarray in O(n) time...",
    "Network latency affects throughput and response time...",
    "db.orders.aggregate([...]) groups and filters documents..."
]

embedding_manager = embeddingManager()

# Generate embeddings
student_embeddings = embedding_manager.generate_embeddings(student_answers)
reference_embeddings = embedding_manager.generate_embeddings(reference_answers)

# Compute similarity
similarity_matrix = compute_cosine_similarity(
    student_embeddings,
    reference_embeddings
)

# Grade
scores = grade_answers(similarity_matrix)

print("Similarity matrix:\n", similarity_matrix)
print("Final scores:", scores)


Loading embedding model: all-MiniLM-L6-v2
Embedding model loaded successfully.Embedding dimension: 384
Generating embedding for 6 texts....


Batches: 100%|██████████| 1/1 [00:01<00:00,  1.35s/it]


Embedding generated successfully.
Generating embedding for 6 texts....


Batches: 100%|██████████| 1/1 [00:00<00:00,  4.62it/s]

Embedding generated successfully.
Similarity matrix:
 [[ 3.9327002e-01 -4.2582285e-03  4.3389931e-02 -6.7810401e-02
   7.1838066e-02  8.7313883e-02]
 [-8.2793981e-02  8.1464076e-01 -3.4929711e-02  4.7338054e-02
   4.8571780e-02  2.6686987e-01]
 [ 1.6020734e-02 -1.4208573e-01  6.6754258e-01 -1.7946105e-02
  -9.4776250e-02  3.8414530e-02]
 [-2.0837912e-02 -6.9741458e-02  3.5219837e-02  5.3178853e-01
   7.7400036e-02  8.5538082e-02]
 [ 1.5591764e-01 -5.4917157e-02 -6.9770478e-02 -2.7748611e-02
   5.8341110e-01 -2.7397433e-02]
 [-7.3646575e-02  2.7191347e-01  1.7034076e-04  9.5546991e-02
  -3.4243561e-02  6.2526649e-01]]
Final scores: [0, 7, 4, 4, 4, 4]



