In [None]:
from groq import Groq
from sentence_transformers import SentenceTransformer
import faiss
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, classification_report

client = Groq(api_key="")
index = faiss.read_index("faiss_index.bin")  
df = pd.read_excel("train_split_new.xlsx")
texts = df["text"].tolist()
labels = df["label"].tolist()
model = SentenceTransformer('all-MiniLM-L6-v2')


  from .autonotebook import tqdm as notebook_tqdm





In [2]:
def classify_text(query, k=3):
    query_emb = model.encode([query], convert_to_numpy=True).astype("float32")
    distances, indices = index.search(query_emb, k)

    top_examples = [texts[i] for i in indices[0]]
    top_labels = [labels[i] for i in indices[0]]

    context = "\n".join([
        f"Example {i+1}:\nText: {top_examples[i]}\nLabel: {top_labels[i]}"
        for i in range(len(top_examples))
    ])

    prompt = f"""
You are a text classification assistant. 
Based on the following labeled examples, determine the most likely label for the new text.

Labeled examples:
{context}

Now classify this new text:
"{query}"

Respond only with the most appropriate label.
"""

    response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[{"role": "user", "content": prompt}]
    )

    model_label = response.choices[0].message.content.strip()

    return model_label


In [3]:
df = pd.read_excel("test_split_new.xlsx")
sample_df = df.sample(n=100, random_state=42).reset_index(drop=True)
texts_test = sample_df["text"].astype(str).tolist()
true_labels = sample_df["label"].astype(str).tolist()
predicted_labels = []
for i, query_text in enumerate(texts_test):
    predicted = classify_text(query_text)
    predicted_labels.append(predicted)


accuracy = accuracy_score(true_labels, predicted_labels)
print(f"{accuracy:.2f}")
print("Classification Report:")
print(classification_report(true_labels, predicted_labels))




0.99
Classification Report:
              precision    recall  f1-score   support

    business       1.00      0.93      0.96        14
   education       1.00      1.00      1.00         8
     finance       1.00      1.00      1.00         9
      health       1.00      1.00      1.00         9
     medical       1.00      1.00      1.00        13
    shopping       1.00      1.00      1.00         9
      social       1.00      1.00      1.00        16
      sports       1.00      1.00      1.00        14
        tech       0.89      1.00      0.94         8

    accuracy                           0.99       100
   macro avg       0.99      0.99      0.99       100
weighted avg       0.99      0.99      0.99       100

