In [1]:
import streamlit as st
from llama_index.core import VectorStoreIndex, Document, Settings, StorageContext, load_index_from_storage
from llama_index.readers.file import PandasCSVReader
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import Settings
from llama_index.vector_stores.faiss import FaissVectorStore
import faiss
import openai
import os

In [2]:
openai.api_key = st.secrets["secrets"]["openai_key"]
st.header("Chat with the Streamlit docs 💬 📚")

if "messages" not in st.session_state.keys(): # Initialize the chat message history
    st.session_state.messages = [
        {"role": "assistant", "content": "Ask me a question about Spotify reviews!"}
    ]




2025-03-22 19:23:58.127 
  command:

    streamlit run C:\Users\seanw\AppData\Roaming\Python\Python312\site-packages\ipykernel_launcher.py [ARGUMENTS]


FileNotFoundError: No secrets found. Valid paths for a secrets.toml file or secret directories are: C:\Users\seanw\.streamlit\secrets.toml, c:\Users\seanw\Documents\Data D\Mekari\playground\.streamlit\secrets.toml

In [None]:
def get_embedding_dim(embed_model):
    return len(embed_model.get_text_embedding("test"))

embed_model = OpenAIEmbedding()

In [None]:

system_prompt = """You are a summary agent to answer users questions on Spotify reviews.

Follow these rules in order to answer the user's question:
1) Your answer should be short (maximum 3 short coherent sentences).
2) Use the contextual information on Google Store reviews for Spotify to extract actionable insights.
3) Your answer should be a coherent question answering the question with the given context.
4) Keep your answers technical and based on facts do not hallucinate features.
5) If the question is not clear, ask for clarification.
6) If the question is out of scope, politely decline.

"""

FAISS_STORAGE_PATH = "./storage"
DOCS_PATH = os.path.join("./storage", "default__vector_store.json")

@st.cache_resource(show_spinner=False)
def load_data():
    # Create FAISS index

    if os.path.exists(FAISS_STORAGE_PATH) and os.path.exists(DOCS_PATH):
        print("Loading Index from cache")
        vector_store = FaissVectorStore.from_persist_dir("./storage")
        storage_context = StorageContext.from_defaults(vector_store=vector_store, persist_dir=FAISS_STORAGE_PATH)
        index = load_index_from_storage(storage_context=storage_context)
        return index

    with st.spinner(text="Loading and indexing the docs! This should take 3-4 minutes."):
        print("Loading Data")
        parser = PandasCSVReader(concat_rows=False, pandas_config={"usecols": ["review_text"], "nrows": 10000})
        docs = parser.load_data("data/spotify_reviews_dedup.csv")
        print("Reading LLM")
        llm = OpenAI(model="gpt-4o-mini", temperature=0.5, system_prompt=system_prompt)
        Settings.llm = llm
        
        print("Generating index")
        faiss_index = faiss.IndexFlatL2(get_embedding_dim(embed_model))  # Adjust based on embedding dimensions
        vector_store = FaissVectorStore(faiss_index)
        storage_context = StorageContext.from_defaults(vector_store=vector_store)
        index = VectorStoreIndex.from_documents(docs, storage_context=storage_context, show_progress=True)
        
        print("Saving index")
        # save index to disk
        index.storage_context.persist()

        return index


index = load_data()


Loading Index from cache


ValueError: 
******
Could not load OpenAI embedding model. If you intended to use OpenAI, please check your OPENAI_API_KEY.
Original error:
No API key found for OpenAI.
Please set either the OPENAI_API_KEY environment variable or openai.api_key prior to initialization.
API keys can be found or created at https://platform.openai.com/account/api-keys

Consider using embed_model='local'.
Visit our documentation for more embedding options: https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings.html#modules
******

In [None]:
index = load_data()
chat_engine = index.as_query_engine(chat_mode="condense_question", verbose=True)
response = chat_engine.query("What do users like about the app?")
response

Loading Index from cache


Response(response='Users appreciate the app for its overall functionality and user-friendly interface. Many find it enjoyable and easy to use. Would you like to know more about specific features?', source_nodes=[NodeWithScore(node=TextNode(id_='5cac734d-6713-4ccc-806a-ec78ebb38321', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='0ee562ad-6fb4-4c17-a564-02f0a6ac91fe', node_type='4', metadata={}, hash='111705f619fc6a858ea0b1cc89d0c8dabde425b3911fcde43c6c930c0f70eea8')}, metadata_template='{key}: {value}', metadata_separator='\n', text='really like app', mimetype='text/plain', start_char_idx=0, end_char_idx=15, metadata_seperator='\n', text_template='{metadata_str}\n\n{content}'), score=0.2373386025428772)], metadata={'5cac734d-6713-4ccc-806a-ec78ebb38321': {}})

In [None]:
import pickle
#save the embeddings in cache
cache_file = "open_ai_embeddings_cache.pkl"
with open(cache_file, 'wb') as f:
    pickle.dump(index, f)



In [None]:
chat_engine = index.as_chat_engine(chat_mode="condense_question", verbose=True)

In [None]:
if prompt := st.chat_input("Your question"): # Prompt for user input and save to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})

for message in st.session_state.messages: # Display the prior chat messages
    with st.chat_message(message["role"]):
        st.write(message["content"])



In [None]:
# If last message is not from assistant, generate a new response
if st.session_state.messages[-1]["role"] != "assistant":
    with st.chat_message("assistant"):
        with st.spinner("Thinking..."):
            response = chat_engine.chat(prompt)
            st.write(response.response)
            message = {"role": "assistant", "content": response.response}
            st.session_state.messages.append(message) # Add response to message history



In [None]:
# Test out the chat engine by writing to it

prompt = "What do users like about this app"

response = chat_engine.chat(prompt)
response

Querying with: What do users like about this app


AttributeError: 'SimpleVectorStore' object has no attribute 'data'

In [None]:
index.storage_context.persist(persist_dir="./faiss_index")