In [1]:
import os
os.chdir('..')
os.getcwd()

'd:\\Programming\\SpeakSnap-core'

In [2]:
os.getcwd()

'd:\\Programming\\SpeakSnap-core'

In [3]:
from pathlib import Path
from src.speaksnap.utils.common import load_json

path_to_json = Path.cwd() / "Artifacts" / "data" / "test.json"
text = load_json(path_to_json)
print(text)

[2025-05-10 19:34:08,353: INFO: common: json file loaded successfully from: d:\Programming\SpeakSnap-core\Artifacts\data\test.json]
{'text': "Hey team, just a quick update from the marketing call earlier today. We're planning to launch the new ad campaign by next Wednesday. Design team needs to finalize creatives by Monday. Also, there's a budget review scheduled for Friday. Please ensure your departmental spending reports are ready by then.", 'userid': 'abc@123'}


In [4]:
type(text.text)

str

In [5]:
from langchain_google_genai import ChatGoogleGenerativeAI
from dotenv import load_dotenv
load_dotenv()
model = ChatGoogleGenerativeAI(model="gemini-2.0-flash")

In [12]:
chat_history = [
    "Last week, we discussed pushing the campaign to mid-May, but it seems we're going with next Wednesday now.",
    "Design team previously had issues with turnaround time. Need to make sure they can meet Monday deadline.",
    "There was a budget overrun in Q1 — finance might ask tough questions during Friday’s review."
  ]

In [7]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

chat_history = []
template = ChatPromptTemplate([
    ('system', 'You are a helpful assistant summarizing spoken meetings or conversations.'),
    MessagesPlaceholder(variable_name="chat_history"),
    ('human', '{query}')
])
message = template.invoke({"chat_history":chat_history, "query": text.text})
print(message)

messages=[SystemMessage(content='You are a helpful assistant summarizing spoken meetings or conversations.', additional_kwargs={}, response_metadata={}), HumanMessage(content="Hey team, just a quick update from the marketing call earlier today. We're planning to launch the new ad campaign by next Wednesday. Design team needs to finalize creatives by Monday. Also, there's a budget review scheduled for Friday. Please ensure your departmental spending reports are ready by then.", additional_kwargs={}, response_metadata={})]


In [8]:
path_to_schema = Path.cwd() / "data_schema" / "response_schema.json"
model_schema = load_json(path_to_schema)
model_schema

[2025-05-10 19:34:27,983: INFO: common: json file loaded successfully from: d:\Programming\SpeakSnap-core\data_schema\response_schema.json]


ConfigBox({'title': 'Summary', 'type': 'object', 'properties': {'summary': {'type': 'string', 'description': 'A brief summary of the text'}, 'sentiment': {'type': 'string', 'enum': ['pos', 'neu', 'neg'], 'description': 'Return the sentiment of the review as positive, neutral or negative'}, 'name': {'type': ['string', 'null'], 'description': "The speaker's name, if available. Use null if the speaker is unidentified or not mentioned in the text."}, 'contextual_explanations': {'type': 'array', 'items': {'type': 'object', 'properties': {'term': {'type': 'string', 'description': 'The special or domain-specific word found in the text use null if unidentified'}, 'explanation': {'type': 'string', 'description': 'A short explanation of what the word or term means in this context'}}, 'required': ['term', 'explanation']}, 'description': 'List of special terms used in the conversation with their brief explanations'}}, 'required': ['summary', 'sentiment']})

In [9]:
Model = model.with_structured_output(model_schema)

In [10]:
result = Model.invoke(message)
print(result)



{'summary': 'Marketing call update: Ad campaign launches next Wednesday. Design team, finalize creatives by Monday. Budget review Friday; departmental spending reports due.', 'sentiment': 'neu', 'contextual_explanations': []}


In [13]:
chat_history.append(result['summary'])
chat_history

["Last week, we discussed pushing the campaign to mid-May, but it seems we're going with next Wednesday now.",
 'Design team previously had issues with turnaround time. Need to make sure they can meet Monday deadline.',
 'There was a budget overrun in Q1 — finance might ask tough questions during Friday’s review.',
 'Marketing call update: Ad campaign launches next Wednesday. Design team, finalize creatives by Monday. Budget review Friday; departmental spending reports due.']

In [14]:
chathistory = ""
for line in chat_history:
    chathistory += line
chathistory

"Last week, we discussed pushing the campaign to mid-May, but it seems we're going with next Wednesday now.Design team previously had issues with turnaround time. Need to make sure they can meet Monday deadline.There was a budget overrun in Q1 — finance might ask tough questions during Friday’s review.Marketing call update: Ad campaign launches next Wednesday. Design team, finalize creatives by Monday. Budget review Friday; departmental spending reports due."

In [15]:
from langchain.schema import Document

doc = Document(
    page_content=chathistory,
    metadata={"userid":text.userid}
)
doc

Document(metadata={'userid': 'abc@123'}, page_content="Last week, we discussed pushing the campaign to mid-May, but it seems we're going with next Wednesday now.Design team previously had issues with turnaround time. Need to make sure they can meet Monday deadline.There was a budget overrun in Q1 — finance might ask tough questions during Friday’s review.Marketing call update: Ad campaign launches next Wednesday. Design team, finalize creatives by Monday. Budget review Friday; departmental spending reports due.")

In [21]:
from langchain.embeddings import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

[2025-05-10 19:47:31,038: INFO: SentenceTransformer: Use pytorch device_name: cpu]
[2025-05-10 19:47:31,039: INFO: SentenceTransformer: Load pretrained SentenceTransformer: all-MiniLM-L6-v2]


  embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


In [23]:
from langchain.vectorstores import Chroma

vector_store = Chroma(
    embedding_function = embedding_model,
    persist_directory="chroma_db",
    collection_name='sample'
)

In [29]:
doc

Document(metadata={'userid': 'abc@123'}, page_content="Last week, we discussed pushing the campaign to mid-May, but it seems we're going with next Wednesday now.Design team previously had issues with turnaround time. Need to make sure they can meet Monday deadline.There was a budget overrun in Q1 — finance might ask tough questions during Friday’s review.Marketing call update: Ad campaign launches next Wednesday. Design team, finalize creatives by Monday. Budget review Friday; departmental spending reports due.")

In [32]:
vector_store.add_documents([doc])

['bec351e3-e1da-4cc4-ace9-0b398a51cce2']

In [None]:
res = vector_store.similarity_search(
    query="last week",
    filter={"userid":text.userid}
    k=1
)
res

[Document(metadata={'userid': 'abc@123'}, page_content="Last week, we discussed pushing the campaign to mid-May, but it seems we're going with next Wednesday now.Design team previously had issues with turnaround time. Need to make sure they can meet Monday deadline.There was a budget overrun in Q1 — finance might ask tough questions during Friday’s review.Marketing call update: Ad campaign launches next Wednesday. Design team, finalize creatives by Monday. Budget review Friday; departmental spending reports due.")]

In [None]:
import redis
import json

r = redis.StrictRedis(host='localhost', port=6379, db=0)

def buffer_message(user_id, message):
    key = f"user:{user_id}:buffer"
    # Get current buffer
    current = json.loads(r.get(key) or "[]")
    current.append(message)
    r.setex(key, 90, json.dumps(current))  # 900 seconds = 15 mins TTL
