In [None]:
import numpy as np
import pandas as pd
from groq import Groq
from sentence_transformers import SentenceTransformer
import faiss
import re
from sklearn.metrics import classification_report, accuracy_score
from tqdm import tqdm
import random
from time import sleep

client = Groq(api_key="")

index = faiss.read_index("faiss_index.bin")
df = pd.read_excel("train_split_new.xlsx")
texts = df["text"].tolist()
labels = df["label"].tolist()
model = SentenceTransformer('all-MiniLM-L6-v2')

categories = sorted(df['label'].unique())
categories_text = ", ".join(categories)

In [3]:
def get_similar_examples(query, k=3):
    query_emb = model.encode([query], convert_to_numpy=True).astype("float32")
    
    distances, indices = index.search(query_emb, k)
    
    top_examples = [texts[i] for i in indices[0]]
    top_labels = [labels[i] for i in indices[0]]
    
    return top_examples, top_labels, distances[0]

In [4]:
def create_few_shot_prompt_structure():
    """Recreate the exact few-shot prompt structure from original notebook"""
    
    
    def create_example(row):
        line1 = "Input Text: " + row['text'] + "\n"
        label = row['label']  
        line2 = "Output: " + label + "\n"
        return line1 + line2

    # Build the examples section
    prompt_examples_icl = ""
    for idx, row in df.sample(5).iterrows():  
        ex = create_example(row)
        prompt_examples_icl += ex
    
    prompt_examples_icl = prompt_examples_icl + "Input Text: "
    
    return prompt_examples_icl

prompt_examples_icl = create_few_shot_prompt_structure()

print("Few-Shot Prompt Structure Created:")
print(prompt_examples_icl)


Few-Shot Prompt Structure Created:
Input Text: لتطوير مهارات الطلاب يجب ان نستخدم interactive platforms خلال online courses لتحقيق افضل النتايج
Output: education
Input Text: هل لاحظت كيف ان economic indicators توثر علي investment decisions في الاسواق العالميه
Output: finance
Input Text: من المهم ان تقارن بين الاسعار في shopping malls المختلفه للحصول علي افضل discounts علي المنتجات
Output: shopping
Input Text: هل تعتقد ان personalized medicine ستغير طريقه تعاملنا مع العلاجات في cardiology خلال السنوات القادمه
Output: medical
Input Text: عند اختيار grocery shopping حاول استخدام mobile apps لتوفير المال والوقت
Output: shopping
Input Text: 


In [5]:
def create_hybrid_prompt(query, rag_examples, rag_labels, few_shot_examples):
    
    rag_section = "\n=== Similar Examples from Database ===\n"
    for i, (example, label) in enumerate(zip(rag_examples, rag_labels)):
        rag_section += f"Similar Example {i+1}:\nText: {example}\nLabel: {label}\n\n"
    
    prompt = f"""
Classify the following Combined Arabic and English text into one of these categories: {categories_text}.

Your output should only be one of these exact category names like in the examples below.

{few_shot_examples}

{rag_section}

Now classify this text:
"{query}"

Output: """
    
    return prompt

In [14]:
def hybrid_classify(query, k=3):
    
    rag_examples, rag_labels, distances = get_similar_examples(query, k)
    
    prompt = create_hybrid_prompt(query, rag_examples, rag_labels, prompt_examples_icl)
    
    response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.1,
        max_tokens=50
    )
    
    model_response = response.choices[0].message.content.strip()
    predicted_label = post_process_response(model_response)
    return predicted_label, rag_examples, rag_labels, prompt

def post_process_response(output_string):
    output_clean = output_string.strip().lower()
    
    for category in categories:
        if category in output_clean:
            return category
    
    return output_string.strip().split()[0] if output_string.strip() else 'unknown'

In [None]:
test_text = "اذا كنت ترغب في تحسين mental health يجب عليك ممارسه mindfulness بانتظام"

predicted_label, rag_examples, rag_labels, used_prompt = hybrid_classify(test_text)

print("=== HYBRID RAG + FEW-SHOT CLASSIFICATION ===")
print(f"Input Text: {test_text}")
print(f"Predicted Label: {predicted_label}")
print(f"\nRAG Retrieved Examples:")
for i, (example, label) in enumerate(zip(rag_examples, rag_labels)):
    print(f"  {i+1}. {example} :{label}")

=== HYBRID RAG + FEW-SHOT CLASSIFICATION ===
Input Text: اذا كنت ترغب في تحسين mental health يجب عليك ممارسه mindfulness بانتظام
Predicted Label: health

RAG Retrieved Examples:
  1. اذا كنت ترغب في تحسين mental health يمكنك ممارسه تقنيات mindfulness بانتظام :health
  2. اذا كنت ترغب في تحسين mental health يجب ان تمارس mindfulness بانتظام وتناول غذاء صحي :health
  3. اذا كنت ترغب في تحسين mental health يجب ان تمارس mindfulness بانتظام وتتناول طعاما صحيا :health


In [None]:
def evaluate_hybrid_approach(test_data_path, sample_size=50):
    
    test_df = pd.read_excel(test_data_path)
    if sample_size:
        test_df = test_df.sample(n=sample_size, random_state=42)
    
    texts_test = test_df["text"].astype(str).tolist()
    true_labels = test_df["label"].astype(str).tolist()
    
    predicted_labels = []
    all_rag_examples = []
    
    print("Evaluating Hybrid RAG + Few-Shot approach...")
    for i, text in enumerate(tqdm(texts_test)):
        predicted_label, rag_examples, rag_labels, distances= hybrid_classify(text)
        predicted_labels.append(predicted_label)
        all_rag_examples.append(rag_examples)
        
        # Avoid rate limiting
        sleep(1)
    
    # Calculate metrics
    accuracy = accuracy_score(true_labels, predicted_labels)
    report = classification_report(true_labels, predicted_labels)
    
    print(f"\n=== EVALUATION RESULTS ===")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"\nClassification Report:")
    print(report)
    
    results_df = pd.DataFrame({
        'text': texts_test,
        'true_label': true_labels,
        'predicted_label': predicted_labels,
        'correct': [true == pred for true, pred in zip(true_labels, predicted_labels)],
        'rag_examples_count': [len(examples) for examples in all_rag_examples]
    })
    
    return results_df, accuracy

In [18]:
results_df, accuracy = evaluate_hybrid_approach("test_split_new.xlsx", sample_size=50)
print(f"Overall Accuracy: {accuracy:.4f}")

Evaluating Hybrid RAG + Few-Shot approach...


100%|██████████| 50/50 [01:47<00:00,  2.14s/it]


=== EVALUATION RESULTS ===
Accuracy: 0.9800

Classification Report:
              precision    recall  f1-score   support

    business       1.00      0.80      0.89         5
   education       1.00      1.00      1.00         5
     finance       1.00      1.00      1.00         4
      health       1.00      1.00      1.00         4
     medical       1.00      1.00      1.00         6
    shopping       1.00      1.00      1.00         5
      social       1.00      1.00      1.00         7
      sports       1.00      1.00      1.00         8
        tech       0.86      1.00      0.92         6

    accuracy                           0.98        50
   macro avg       0.98      0.98      0.98        50
weighted avg       0.98      0.98      0.98        50

Overall Accuracy: 0.9800



