In [2]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import os
import sys



In [18]:
sys.path.append(r"data/test")

# Step 1: Load the CSV data
data = pd.read_csv('data.csv')  # Replace with your CSV file path
sentences = data['Examples'].tolist()  # List of text examples
labels = data['Labels'].tolist()        # List of corresponding labels

# Step 2: Generate embeddings
model = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v2')
embeddings = model.encode(sentences)

# Step 3: Create prototypes for each label
label_prototypes = {}
for label in set(labels):
    # Get all embeddings for the current label
    label_embeddings = embeddings[np.array(labels) == label]
    print(label)
    # Average the embeddings to get the prototype
    label_prototypes[label] = np.mean(label_embeddings, axis=0)

# Step 4: Classify new examples
def classify_example(new_example):
    new_embedding = model.encode([new_example])
    similarities = {
        label: cosine_similarity(new_embedding, prototype.reshape(1, -1))[0][0]
        for label, prototype in label_prototypes.items()
    }
    predicted_label = max(similarities, key=similarities.get)
    return predicted_label

# Example usage
new_example = "who is elon musk?"
predicted_label = classify_example(new_example)
print(f'The predicted label for the new example is: {predicted_label}')




[ True  True False ...  True False  True]
general
[False False  True ... False  True False]
realtime
The predicted label for the new example is: realtime


In [16]:
new_example = "do you love me "
predicted_label = classify_example(new_example)
print(f'The predicted label for the new example is: {predicted_label}')

The predicted label for the new example is: general
