In [14]:
from joblib import load
from sentence_transformers import SentenceTransformer

In [15]:
queries_path = '../../fau_rag_opt/knowledgebase/extracted_queries.jsonl'
model_path = '../../fau_rag_opt/classifiers/semantic_classifier_mlp.joblib'
embedder = SentenceTransformer("sentence-transformers/all-mpnet-base-v2")

In [16]:
classifier = load(model_path)

In [17]:
import random
import json

with open(queries_path, 'r') as file:
    queries = [json.loads(line)["query"] for line in file]

In [18]:
sampled_queries = random.sample(queries, 10)

In [19]:
X_sampled = embedder.encode(sampled_queries, show_progress_bar = True)
predictions = classifier.predict(X_sampled)

Batches: 100%|██████████| 1/1 [00:01<00:00,  1.21s/it]


In [20]:
label_map = {0: "dense", 1: "sparse", 2: "hybrid"}
results = [(query, label_map[pred]) for query, pred in zip(sampled_queries, predictions)]

In [21]:
for query, label in results:
    print(f"🧠 \"{query}\" → predicted: {label}")

🧠 "What kind of courses does FAU offer for its students?" → predicted: dense
🧠 "I've heard about natural skincare products causing allergies. Can you tell me more about this?" → predicted: dense
🧠 "I've heard about FAU and its research projects. Can you tell me more about the research areas and projects at FAU?" → predicted: dense
🧠 "I'm considering an international study or internship experience. What are some of the key things to keep in mind when preparing for such an opportunity?" → predicted: dense
🧠 "I'm interested in learning more about the elective seminars offered in International Economic Studies. Can you tell me about the different topics and what students can expect to learn?" → predicted: dense
🧠 "What kind of support does FAU offer for multimedia-related questions?" → predicted: hybrid
🧠 "I've been wondering about the role of startups in the job market. Do they provide employment opportunities for disadvantaged workers?" → predicted: dense
🧠 "I heard there's a big event c