In [None]:
from groq import Groq
import pandas as pd
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

client = Groq(api_key="")
def chat_completion(prompt):
    response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content


  from .autonotebook import tqdm as notebook_tqdm





In [7]:
df = pd.read_excel("train_split.xlsx")
df["combined"] = df.astype(str).agg(" ".join, axis=1)
texts = df["text"].astype(str).tolist()
labels = df["label"].astype(str).tolist()
print(f"Loaded {len(df)} samples.")

Loaded 75976 samples.


In [12]:

model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(texts, convert_to_numpy=True).astype("float32")

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)
print(f"FAISS index built with {index.ntotal} vectors.")



FAISS index built with 75976 vectors.


In [13]:
faiss.write_index(index, "faiss_index.bin")
print("FAISS index saved to faiss_index.bin")

FAISS index saved to faiss_index.bin


In [18]:
def classify_text(query, k=3):
    query_emb = model.encode([query], convert_to_numpy=True).astype("float32")
    distances, indices = index.search(query_emb, k)

    top_examples = [texts[i] for i in indices[0]]
    top_labels = [labels[i] for i in indices[0]]

    context = "\n".join([
        f"Example {i+1}:\nText: {top_examples[i]}\nLabel: {top_labels[i]}"
        for i in range(len(top_examples))
    ])

    prompt = f"""
You are a text classification assistant. 
Based on the following labeled examples, determine the most likely label for the new text.

Labeled examples:
{context}

Now classify this new text:
"{query}"

Respond only with the most appropriate label.
"""

    response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[{"role": "user", "content": prompt}]
    )

    model_label = response.choices[0].message.content.strip()

    print(f"Nearest neighbor labels: {top_labels}")
    print(f"Groq model predicted label: {model_label}")

    return model_label


In [None]:
query_text = "اذا كنت تبحث عن ملابس جديده تحقق دايما من seasonal sales في fashion stores المحليه"
predicted_label = classify_text(query_text)
print("Final predicted label:", predicted_label)


Nearest neighbor labels: ['shopping', 'shopping', 'shopping']
Groq model predicted label: shopping
✅ Final predicted label: shopping
