In [57]:
import json

In [58]:
import pandas as pd

In [59]:
from openai import OpenAI
api_key = ""

In [60]:
train_df = pd.read_csv('reasoning.csv')

In [61]:
import pandas as pd
import json
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.docstore.document import Document

emotions = ['anger', 'fear', 'joy', 'sadness', 'surprise']

def extract_features_labels(df):
    train_labels = df[emotions].values.tolist()
    train_text = df['text'].tolist()
    reasoning = df['reasoning'].tolist()

    data = []
    for t, l, r in zip(train_text, train_labels, reasoning):
        data.append({
            'text': t,
            'label': l,
            'reasoning': r
        })

    with open("data/train_features_labels.json", "w") as file:
        json.dump(data, file, indent=4)

def load_and_store():
    with open("data/train_features_labels.json", "r") as file:
        data = json.load(file)
    
    docs = [
        Document(page_content=item["text"], metadata={"label": item["label"], "reasoning": item["reasoning"]})
        for item in data
    ]    
    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    split_docs = text_splitter.split_documents(docs)
    
    embeddings = OpenAIEmbeddings(model="text-embedding-ada-002", openai_api_key=api_key)
    vector_db = FAISS.from_documents(split_docs, embeddings)
    vector_db.save_local("data/faiss_index")

In [62]:
extract_features_labels(train_df)
load_and_store()

In [63]:
def retrieve_similar(query_text, k=3):
    embeddings = OpenAIEmbeddings(model="text-embedding-ada-002", openai_api_key=api_key)
    vector_db = FAISS.load_local("data/faiss_index", embeddings, allow_dangerous_deserialization=True)
    results = vector_db.similarity_search(query_text, k=k)
    
    for i, doc in enumerate(results):
        print(doc.page_content, doc.metadata['label'], doc.metadata['reasoning'])
    return results

In [64]:
query = "I am feeling very happy today!"
q = retrieve_similar(query)

A smile spread across my face. [0, 0, 1, 0, 0] Tweet: "A smile spread across my face."
Explanation: The tweet "A smile spread across my face" clearly conveys joy through the explicit mention of a smile, which is commonly associated with happiness and positive emotions. The phrase suggests a spontaneous and genuine reaction, highlighting a moment of delight. The tone is upbeat and positive, with no hint of negative or distressing language that could indicate anger, fear, sadness, or surprise. Overall, the straightforward expression of a joyful physical response reinforces its categorization in joy, while excluding other emotions.
Emotions: ['joy']


My heart is aching today. [0, 1, 0, 1, 0] Tweet: "My heart is aching today."
Explanation: The phrase "My heart is aching today" conveys a deep sense of emotional pain, directly indicating sadness through the metaphor of a heartache, which is commonly associated with grief or sorrow. The use of the word "aching" suggests a heavy burden, which