In [1]:
import logging
import os
from datetime import datetime, timedelta

import numpy as np
from dotenv import load_dotenv
from pyarrow import json_

load_dotenv()

COLLECTION_NAME = 'timescale_WA_v1'

# Set up the logger
logging.basicConfig(level=logging.INFO)

###  Load Data

In [2]:
import pandas as pd

DATA_FILE = "../data/WhatsAppCleaned/WhatsAppCombined.tsv"

def add_context(chat_df, col_to_cat='MESSAGE', new_col_name='CONTEXTUALIZED_MESSAGE', context_len=3, author_col='SENDER'):
  temp_col = col_to_cat+'_TMP'
  chat_df[temp_col] = chat_df[author_col] + ' ~ ' + chat_df[col_to_cat]

  neg_cols_added = [f'{temp_col}_neg_{i}' for i in range(1, 1 + context_len)]
  plus_cols_added = [f'{temp_col}_plus_{i}' for i in range(1, 1 + context_len)]

  for i in range(1, context_len + 1):
    chat_df[f'{temp_col}_plus_{i}'] = chat_df[temp_col].shift(-i)
    chat_df[f'{temp_col}_neg_{i}'] = chat_df[temp_col].shift(i)

  chat_df[new_col_name] = chat_df[[*neg_cols_added, temp_col, *plus_cols_added]].fillna('').agg('\n'.join, axis=1).str.strip()
  chat_df.drop(columns=[temp_col, *neg_cols_added, *plus_cols_added], inplace=True)
  return chat_df


data = pd.read_csv(DATA_FILE, sep='\t', parse_dates=['DATETIME'])
print(data.shape)
data.dropna(inplace=True)
print(data.shape)
data = add_context(data, col_to_cat='MESSAGE', new_col_name='CONTEXTUALIZED_MESSAGE', context_len=3)

(6731, 6)
(6709, 6)


  data = pd.read_csv(DATA_FILE, sep='\t', parse_dates=['DATETIME'])


In [3]:
#preview the csv file
data.head()

Unnamed: 0,SENDER,MESSAGE,DATETIME,PLATFORM,CHAT,MSG_ID,CONTEXTUALIZED_MESSAGE
0,Arya GJ,Bruv group chat of group chat 😂😂😂,2024-02-24 17:30:00,WhatsApp,CHAT_Hamza homies,WA_Hamza homies_0,Arya GJ ~ Bruv group chat of group chat 😂😂😂\nZ...
1,Zareef Amyeen,since the big chat has lowk been saturated,2024-02-24 17:30:00,WhatsApp,CHAT_Hamza homies,WA_Hamza homies_1,Arya GJ ~ Bruv group chat of group chat 😂😂😂\n\...
2,Soham Raut,Yoo we can talk shit about everyone else letsgooo,2024-02-24 17:32:00,WhatsApp,CHAT_Hamza homies,WA_Hamza homies_2,Zareef Amyeen ~ since the big chat has lowk be...
3,Zareef Amyeen,@14255353544 can you let me in when i get there,2024-02-24 17:32:00,WhatsApp,CHAT_Hamza homies,WA_Hamza homies_3,Soham Raut ~ Yoo we can talk shit about everyo...
4,Soham Raut,Just kidding lmfao,2024-02-24 17:32:00,WhatsApp,CHAT_Hamza homies,WA_Hamza homies_4,Zareef Amyeen ~ @14255353544 can you let me in...


In [4]:
from timescale_vector import client

def create_uuid(date_string: str):
  if date_string is None:
    return None
  # 2024-02-24T17:30:00.000
  time_format = "%Y-%m-%dT%H:%M:%S.%f"
  datetime_obj = datetime.strptime(date_string, time_format)
  uuid = client.uuid_from_time(datetime_obj)
  return str(uuid)

def extract_metadata(record, metadata) -> dict:
  # metadata = dict()
  metadata["ID"] = create_uuid(record["DATETIME"])
  metadata["MSG_ID"] = record["MSG_ID"]
  metadata["DATETIME"] = record["DATETIME"] #datetime.strftime(record["DATETIME"], '%Y-%m-%d %H:%M')
  metadata["MESSAGE"] = record["MESSAGE"]
  metadata["SENDER"] = record["SENDER"]
  metadata["PLATFORM"] = record["PLATFORM"]
  metadata["CHAT"] = record["CHAT"]

  del metadata['source']
  del metadata['seq_num']
  return metadata

def create_uuid2(datetime_obj):
  if datetime_obj is None:
    return None
  uuid = client.uuid_from_time(datetime_obj.tz_localize('US/Pacific'))
  return str(uuid)

def create_date(dt):
    if dt is None:
        return None

    # Extract relevant information
    tz_info = dt.tz_localize('US/Pacific').utcoffset()
    tz_str = f'{"+" if tz_info.days >= 0 else "-"}{np.abs(24*tz_info.days+tz_info.seconds//3600):02}{((tz_info.seconds%3600)//60):02}'
    # Create a formatted string for the timestamptz in PostgreSQL format
    timestamp_tz_str = (
        f"{dt.year}-{dt.month:02}-{dt.day:02} {dt.hour:02}:{dt.minute:02}:{dt.second:02}{tz_str}"
    )
    return timestamp_tz_str

def extract_metadata2(row) -> dict:
  metadata = dict()
  metadata["ID"] = create_uuid2(row["DATETIME"])
  metadata["MSG_ID"] = row["MSG_ID"]
  metadata["DATETIME"] = create_date(row["DATETIME"])
  metadata["MESSAGE"] = row["MESSAGE"]
  metadata["SENDER"] = row["SENDER"]
  metadata["PLATFORM"] = row["PLATFORM"]
  metadata["CHAT"] = row["CHAT"]

  return metadata

In [5]:
from langchain_core.documents import Document

inp_docs = [
  Document(
    page_content=row['CONTEXTUALIZED_MESSAGE'],
    metadata=extract_metadata2(row)
  ) for _, row in data.iterrows()
]

In [6]:
inp_docs[0]

Document(metadata={'ID': '642b5c00-d37d-11ee-a26d-d6476db3fa42', 'MSG_ID': 'WA_Hamza homies_0', 'DATETIME': '2024-02-24 17:30:00-0800', 'MESSAGE': 'Bruv group chat of group chat 😂😂😂', 'SENDER': 'Arya GJ', 'PLATFORM': 'WhatsApp', 'CHAT': 'CHAT_Hamza homies'}, page_content='Arya GJ ~ Bruv group chat of group chat 😂😂😂\nZareef Amyeen ~ since the big chat has lowk been saturated\nSoham Raut ~ Yoo we can talk shit about everyone else letsgooo\nZareef Amyeen ~ @14255353544 can you let me in when i get there')

In [7]:
# from langchain_community.document_loaders.json_loader import JSONLoader
#
# # save to JSON so it can be read by timestore
# data.to_json('../data/WhatsAppCleaned/WhatsAppCombined.json', 'table', index=False)
#
# # Load data from JSON file and extract metadata
# loader = JSONLoader(
#   file_path='../data/WhatsAppCleaned/WhatsAppCombined.json',
#   jq_schema=".data[]",
#   content_key='CONTEXTUALIZED_MESSAGE',
#   text_content=True,
#   metadata_func=extract_metadata,
# )
#
# documents = loader.load()

# print(len(documents))
# print(documents[0])

# inp_docs = documents

In [8]:
# from langchain_text_splitters import CharacterTextSplitter
#
# # Split the documents into chunks for embedding
# CHUNK_SIZE = 10000
# text_splitter = CharacterTextSplitter(
#   chunk_size=CHUNK_SIZE,
#   chunk_overlap=CHUNK_SIZE//5,
# )
# docs = text_splitter.split_documents(documents)

###  Import Libraries and enviornment variables

In [26]:
### LLMs
os.environ["GOOGLE_API_KEY"] = os.environ["API_KEY"]
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_google_genai import ChatGoogleGenerativeAI

LLAMA_3B_NAME = 'llama3.2'
# DEEPSEEK_1_5B_NAME = 'deepseek-r1:1.5b'

big_llm = ChatOllama(model=LLAMA_3B_NAME, temperature=0.)
ret_llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0,
    timeout=None,
    max_retries=2,
)
# small_llm = ChatOllama(model=DEEPSEEK_1_5B_NAME)

embed_model = OllamaEmbeddings(model=LLAMA_3B_NAME)

### Create Vectorstore

In [10]:
from langchain_community.vectorstores.timescalevector import TimescaleVector

# Create a Timescale Vector instance from the collection of documents
db = TimescaleVector.from_documents(
  embedding=embed_model,
  ids=[doc.metadata["ID"] for doc in inp_docs],
  documents=inp_docs,
  collection_name=COLLECTION_NAME,
  service_url=os.environ['TIMESCALE_SERVICE_URL'],
  time_partition_interval=timedelta(days=2),
)

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"


### Read Vectorstore

In [11]:
from langchain_community.vectorstores.timescalevector import TimescaleVector
import os

db = TimescaleVector(
    collection_name=COLLECTION_NAME,
    service_url=os.environ['TIMESCALE_SERVICE_URL'],
    embedding=embed_model,
)

# db.create_index(index_type="tsv")

### Question

In [12]:
question = "What were the last 5 ToDos papa gave?"

### Retrieve docs from DB + Add Additional Context

In [13]:
# from datetime import datetime
# start_dt = datetime(2025, 1, 1)  # Start date = Jan 1, 2025
# end_dt = datetime.now() # End date = 30 August 2023, 22:10:35
# td = timedelta(days=7)  # Time delta = 7 days
#
# Set timescale vector as a retriever and specify start and end dates via kwargs
retriever = db.as_retriever(
  search_type="similarity",
  search_kwargs={'k': 10}
  # search_kwargs={"start_date": start_dt, "end_date": end_dt, 'k': 10}
)

In [23]:
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_community.query_constructors.timescalevector import TimescaleVectorTranslator

# Give LLM info about the metadata fields
metadata_field_info = [
    AttributeInfo(
        name="DATETIME",
        description="The time the message was sent. **A high priority filter**",
        type="timestamp",
    ),
    AttributeInfo(
        name="SENDER",
        description="The *case sensitive* name or ID of the message's author. **A high priority filter**",
        type="string",
    ),
    AttributeInfo(
        name="ID",
        description="A UUID v1 generated from the timestamp of the message",
        type="uuid",
    ),
    AttributeInfo(
        name="PLATFORM",
        description="The app where the message was sent. Valid values are ['Discord', 'WhatsApp']",
        type="string",
    ),
    AttributeInfo(
        name="CHAT",
        description=f"The name of the chat room where the message was sent, will be invoked as 'the chat' or 'the chats'. Valid values are [{[f'\'{name}\'' for name in sorted(data.CHAT.unique())]}]",
        type="string",
    ),
]
document_content_description = "A conversation with a sequence of authors and their messages"

vectorstore = TimescaleVector(
    service_url=os.environ['TIMESCALE_SERVICE_URL'],
    embedding=embed_model,
    collection_name=COLLECTION_NAME
)

# Instantiate the self-query retriever from an LLM

retriever = SelfQueryRetriever.from_llm(
    ret_llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
    structured_query_translator=TimescaleVectorTranslator(),
    enable_limit=True,
    use_original_query=True,
    verbose=True
)

In [24]:
from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from typing import List
from types import MethodType
from logging import getLogger
logger = getLogger(__name__)

def my_get_relevant_documents(self, query: str, *, run_manager: CallbackManagerForRetrieverRun) -> List[Document]:
        """Get documents relevant for a query.

        Args:
            query: string to find relevant documents for

        Returns:
            List of relevant documents
        """
        structured_query = self.query_constructor.invoke(
            {"query": query}, config={"callbacks": run_manager.get_child()}
        )
        if self.verbose:
            logger.info(f"Generated Query: {structured_query}")
        new_query, search_kwargs = self._prepare_query(query, structured_query)
        # ################# BEGIN: MY INTRODUCTION #################
        # Double the requested message count, and return at least 10
        search_kwargs['k'] = search_kwargs.get('k', 10)
        search_kwargs['k'] = search_kwargs['k']*2
        if search_kwargs['k'] < 10:
          search_kwargs['k'] = 10
        if self.verbose:
            logger.info(f"Final Query: {new_query} with args {search_kwargs}")
        # #################  END: MY INTRODUCTION  #################
        docs = self._get_docs_with_query(new_query, search_kwargs)
        return docs

retriever._get_relevant_documents = MethodType(my_get_relevant_documents, retriever)

In [25]:
docs = retriever.invoke(question)

INFO:__main__:Generated Query: query='Todos' filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='SENDER', value='Papa') limit=4
INFO:__main__:Final Query: What were the last 5 ToDos papa gave? with args {'predicates': AND(('SENDER', '==', 'Papa')), 'k': 10}
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"


In [17]:
def retrieve_more_context(msg_id, platform, chat, n_addl_msgs=10):
  """
  Given a message with ID `msg_id`, get the `addl_msgs` preceding and following messages for context

  :param msg_id: the ID of a retrieved message
  :param platform: the platform of the retrieved message
  :param chat: the chat of the retrieved message
  :param n_addl_msgs: number of additional messages before and after msg `msg_id` to retrieve
  :return: a string
  """
  msg_info = data[data['MSG_ID'] == msg_id]

  chat_hist = data[
      (data['PLATFORM'] == platform) &
      (data['CHAT'] == chat)
  ]

  context_lo = max(chat_hist.index[0], msg_info.index[0] - n_addl_msgs)
  context_hi = min(chat_hist.index[-1], msg_info.index[0] + n_addl_msgs)

  within_context_df = data.iloc[context_lo:context_hi, :].copy()
  within_context_df['VERBOSE'] = within_context_df['PLATFORM'] + ' : ' + within_context_df['CHAT'] + '\t' + within_context_df['DATETIME'].dt.strftime('%A %B %d, %Y %H:%M') + '\t' + within_context_df['SENDER'] + ' ~ ' + within_context_df['MESSAGE']

  return within_context_df['VERBOSE'].str.cat(sep='\n')

In [18]:
fuller_context = [(doc.metadata['MSG_ID'], retrieve_more_context(doc.metadata['MSG_ID'], doc.metadata['PLATFORM'], doc.metadata['CHAT'])) for doc in docs]

### Filter Docs w/ LLM

In [19]:
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field

# Data model
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""

    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )


# LLM with function call
structured_llm_grader = big_llm.with_structured_output(GradeDocuments)

# Prompt
system = """You are a grader assessing relevance of a retrieved document to a user question. \n
    If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

retrieval_grader = grade_prompt | structured_llm_grader

In [20]:
docs_to_use = []

for (msg_id, msg_context) in fuller_context:
    print(msg_context, '\n', '-'*50)
    res = retrieval_grader.invoke({"question": question, "document": msg_context})
    print(res,'\n\n\n')
    if res and res.binary_score == 'yes':
        docs_to_use.append({'MSG_ID' : msg_id, 'FULL_CONTEXT' : msg_context})

WhatsApp : CHAT_Papa	Friday February 25, 2022 16:57	Papa ~ Sure
Give me 5 minutes
WhatsApp : CHAT_Papa	Friday February 25, 2022 16:57	Krishna Saxena ~ ok
WhatsApp : CHAT_Papa	Friday February 25, 2022 17:00	Papa ~ On zoom ur link
WhatsApp : CHAT_Papa	Friday February 25, 2022 17:03	Krishna Saxena ~ sure
i am back
WhatsApp : CHAT_Papa	Friday February 25, 2022 19:44	Papa ~ Me too
Lost wifi
WhatsApp : CHAT_Papa	Friday February 25, 2022 20:03	Krishna Saxena ~ oh... i will wait
If it's not too late, I can call now
WhatsApp : CHAT_Papa	Sunday February 27, 2022 17:50	Papa ~ Ok
WhatsApp : CHAT_Papa	Sunday February 27, 2022 17:51	Krishna Saxena ~ your zoom?
WhatsApp : CHAT_Papa	Sunday February 27, 2022 18:07	Papa ~ Yes
WhatsApp : CHAT_Papa	Sunday February 27, 2022 19:39	Krishna Saxena ~ I am finished with dinner
I can call when you are free
WhatsApp : CHAT_Papa	Sunday April 03, 2022 15:20	Papa ~ Done with taxes... Taking a break, let's regroup at 3:45
WhatsApp : CHAT_Papa	Sunday April 03, 2022 15

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='yes' 



WhatsApp : CHAT_Saxena Family	Friday November 08, 2019 12:04	Papa ~ Cool, be safe, have fun...
WhatsApp : CHAT_Saxena Family	Friday November 08, 2019 21:44	Krishna Saxena ~ Yes  Written by  Sophia Loren  _" When I got enough confidence, the stage was gone….. When I was sure of Losing, I won……. When I needed People the most, they Left me……. When I learnt to dry my Tears, I found a shoulder to Cry on…… When I mastered the Skill of Hating, Someone started Loving me from the core of the Heart…… And, while waiting for Light for Hours when I fell asleep, the Sun came out….. That’s LIFE!! No matter what you Plan, you never know what Life has Planned for you…… Success introduces you to the World……. But Failure introduces the World to you……. ……Always be Happy!! Often when we lose Hope and think this is the end… God smiles from above and says, “Relax Sweetheart; It’s just a Bend, not the End..!"_  WORTH READING IT AGAIN & AGAIN
It was fun.
No, I will send photos tomorrow
W

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='no' 



WhatsApp : CHAT_Papa	Tuesday October 11, 2022 21:14	Papa ~ K betu, can u send me the copy of the selenium notebook.
Thank u
Would u have some time now?
Just a few minutes
WhatsApp : CHAT_Papa	Saturday October 15, 2022 21:26	Krishna Saxena ~ Sure
WhatsApp : CHAT_Papa	Saturday October 15, 2022 21:26	Papa ~ Give me 2 min
On zoom
My id
Good morning Krishna let me know whenever you have some time...
Hackaday: 3-DOF Robot Arm Wrist Without The Motor Weight. https://hackaday.com/2022/11/03/3-dof-robot-arm-wrist-without-the-motor-weight/
Hi Krishna, just saw your missed calls... Sorry
How are you doing? Busy with homework
WhatsApp : CHAT_Papa	Sunday November 06, 2022 18:02	Krishna Saxena ~ Did most of my work for this week today
WhatsApp : CHAT_Papa	Sunday November 06, 2022 18:03	Papa ~ Sure, take some rest and dinner.. we can get together after dinner...
WhatsApp : CHAT_Papa	Sunday November 06, 2022 18:03	Krishna Saxena ~ Ok
Approximately what time?
WhatsApp : CHAT_Papa	

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='no' 



WhatsApp : CHAT_Papa	Saturday January 08, 2022 22:11	Papa ~ I finished one more difficult homework, I will explain what I struggled with... Tomorrow... Let me know when you r done with your homework
WhatsApp : CHAT_Papa	Saturday January 08, 2022 22:11	Krishna Saxena ~ Ok
I am finished with my homework
WhatsApp : CHAT_Papa	Sunday January 09, 2022 14:17	Papa ~ Let's meet at 2:45. I will finish one more lecture.
WhatsApp : CHAT_Papa	Sunday January 09, 2022 14:17	Krishna Saxena ~ Alright
WhatsApp : CHAT_Papa	Sunday January 09, 2022 15:01	Papa ~ Online
WhatsApp : CHAT_Papa	Sunday January 09, 2022 19:08	Krishna Saxena ~ Ready when you are
WhatsApp : CHAT_Papa	Sunday January 09, 2022 19:40	Papa ~ Logging in
How's it going
I spoke to the author about the xgboost based predictions.
WhatsApp : CHAT_Papa	Tuesday January 11, 2022 17:20	Krishna Saxena ~ just finished reviewing a classmate's English paper
If you can share, I am free now
WhatsApp : CHAT_Papa	Tuesday January 11, 

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='no' 



WhatsApp : CHAT_Papa	Friday January 28, 2022 20:48	Papa ~ Online now...
WhatsApp : CHAT_Papa	Saturday January 29, 2022 15:53	Krishna Saxena ~ I can call now
WhatsApp : CHAT_Papa	Saturday January 29, 2022 22:33	Papa ~ Did u find something interesting... Sorry, still working on the ui part. Just drop a short email when u r about to sleep
WhatsApp : CHAT_Papa	Saturday January 29, 2022 22:35	Krishna Saxena ~ i sent an email with the important parts of 2 of the articles highlighted
WhatsApp : CHAT_Papa	Saturday January 29, 2022 22:35	Papa ~ Oh thank you
Had a really long weekend but now a bit more about AWS. Let me know when you are free
Know a bit more
How are you doing
WhatsApp : CHAT_Papa	Friday February 04, 2022 15:30	Krishna Saxena ~ Busy. Lots of midterm assignments were due this week
WhatsApp : CHAT_Papa	Friday February 04, 2022 15:31	Papa ~ Sure, take ur time.
WhatsApp : CHAT_Papa	Friday February 04, 2022 15:31	Krishna Saxena ~ I will let you know when I finish

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='yes' 



WhatsApp : CHAT_Papa	Sunday January 16, 2022 17:53	Papa ~ Oh sorry missed your message
WhatsApp : CHAT_Papa	Sunday January 16, 2022 17:53	Krishna Saxena ~ no worries
WhatsApp : CHAT_Papa	Sunday January 16, 2022 17:53	Papa ~ Calling
Two minutes
WhatsApp : CHAT_Papa	Sunday January 16, 2022 17:58	Krishna Saxena ~ ok
I am back
WhatsApp : CHAT_Papa	Sunday January 16, 2022 19:52	Papa ~ Me too
Let's first call Baba
WhatsApp : CHAT_Papa	Sunday January 16, 2022 19:53	Krishna Saxena ~ sure
WhatsApp : CHAT_Papa	Sunday January 16, 2022 19:56	Papa ~ Are you calling?
WhatsApp : CHAT_Papa	Sunday January 16, 2022 19:56	Krishna Saxena ~ Yes
I can call now
WhatsApp : CHAT_Papa	Monday January 17, 2022 15:37	Papa ~ Give me 5 minutes
Getting coffee
WhatsApp : CHAT_Papa	Monday January 17, 2022 15:37	Krishna Saxena ~ ok
WhatsApp : CHAT_Papa	Monday January 17, 2022 15:49	Papa ~ Ready
WhatsApp : CHAT_Papa	Monday January 17, 2022 15:49	Krishna Saxena ~ same
i am back
WhatsApp : CHAT_Papa	

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='no' 



WhatsApp : CHAT_Saxena Family	Saturday December 14, 2024 15:23	Ma ~ Safe travels
Quote of the year “A person is not great because they have never failed; a peron is great because failure doesn’t stop them or changed them for worse. “ <This message was edited>
WhatsApp : CHAT_Saxena Family	Sunday December 15, 2024 00:48	Papa ~ Reached Frankfurt next flight in 3 hours
WhatsApp : CHAT_Saxena Family	Sunday December 15, 2024 01:34	Ma ~ Safe travels 💕
WhatsApp : CHAT_Saxena Family	Sunday December 15, 2024 03:53	Papa ~ <Media omitted>
Landed in Delhi
WhatsApp : CHAT_Saxena Family	Sunday December 15, 2024 12:42	Ma ~ Message when you reach home
Reached??
WhatsApp : CHAT_Saxena Family	Sunday December 15, 2024 13:42	Papa ~ On the way
Reached
Will talk in the evening
<Media omitted>
WhatsApp : CHAT_Saxena Family	Sunday December 15, 2024 21:34	Ma ~ Nice
Allahabadi?
@15038405146 can you call Indigo locally and inform them for family reasons we need to end our trip @delhi and lo

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='no' 



WhatsApp : CHAT_Papa	Tuesday November 16, 2021 22:58	Papa ~ All the best!!
WhatsApp : CHAT_Papa	Tuesday November 16, 2021 23:00	Krishna Saxena ~ Thank you
I received an reply from dvc yesterday saying to upgrade to 2.8
Is your solution simpler?
WhatsApp : CHAT_Papa	Wednesday November 17, 2021 19:53	Papa ~ Yes it is... We can discuss after ur exams
WhatsApp : CHAT_Papa	Thursday November 18, 2021 17:01	Krishna Saxena ~ I am ready to discuss whenever you are
WhatsApp : CHAT_Papa	Thursday November 18, 2021 17:02	Papa ~ Cool, give me a few minutes...
Back finally
WhatsApp : CHAT_Papa	Thursday November 18, 2021 19:49	Krishna Saxena ~ Alright
WhatsApp : CHAT_Papa	Thursday November 18, 2021 19:49	Papa ~ Shall we zoom?
WhatsApp : CHAT_Papa	Thursday November 18, 2021 19:50	Krishna Saxena ~ Sure
I can call whenever you are free
WhatsApp : CHAT_Papa	Friday November 19, 2021 17:00	Papa ~ Me freeee
WhatsApp : CHAT_Papa	Friday November 19, 2021 17:01	Krishna Saxena ~ Ok
I'm back

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='no' 



WhatsApp : CHAT_Saxena Family	Thursday September 14, 2023 12:28	Papa ~ I didn't get any voicemail
WhatsApp : CHAT_Saxena Family	Thursday September 14, 2023 12:28	Aditi ~ oh weird hold on
WhatsApp : CHAT_Saxena Family	Thursday September 14, 2023 12:45	Papa ~ Can you come out
WhatsApp : CHAT_Saxena Family	Thursday September 14, 2023 12:46	Aditi ~ are you in the parking lot?
WhatsApp : CHAT_Saxena Family	Thursday September 14, 2023 12:46	Papa ~ Yes
WhatsApp : CHAT_Saxena Family	Thursday September 14, 2023 12:46	Aditi ~ On my way!
WhatsApp : CHAT_Saxena Family	Thursday September 14, 2023 12:48	Papa ~ Cool, since it is lunch hour, I got stopped by the attendant in the parking lot
WhatsApp : CHAT_Saxena Family	Friday September 15, 2023 15:21	Ma ~ Geetha and her parents wanted to come meet us  on Sunday for lunch
<Media omitted>
If you see sad looking, angry, miserable or expressionless people you know how their thoughts are. Read good thinks, enjoy, leave space for thin

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='no' 



WhatsApp : CHAT_Saxena Family	Friday September 20, 2024 14:16	Ma ~ Will come in 20 minutes
WhatsApp : CHAT_Saxena Family	Friday September 20, 2024 14:17	Aditi ~ ok 😮
WhatsApp : CHAT_Saxena Family	Friday September 20, 2024 14:17	Papa ~ Coming now
WhatsApp : CHAT_Saxena Family	Friday September 20, 2024 22:59	Ma ~ https://www.instagram.com/reel/C83oXH4Ibzt/?igsh=NjFhOGMzYTE3ZQ==
@19717270030 https://jobs.apple.com/en-gb/search?sort=relevance&key=internship&location=united-states-USA
Look for internship positions in area of your interest and give me a few jobids
WhatsApp : CHAT_Saxena Family	Monday September 23, 2024 08:18	Papa ~ Hi K bhai, please put in a cover letter saying that you are looking for internships between completing your bachelors and applying for Masters…. This is done by many students and companies do like to engage with students
WhatsApp : CHAT_Saxena Family	Monday September 23, 2024 08:19	Ma ~ <Media omitted>
Add more keywords
WhatsApp : CHAT_Saxena

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='no' 



WhatsApp : CHAT_Saxena Family	Tuesday August 27, 2019 17:12	Papa ~ Okie dokie... just checking. See you in a few.
WhatsApp : CHAT_Saxena Family	Tuesday August 27, 2019 17:31	Krishna Saxena ~ Can someone please open the gate
WhatsApp : CHAT_Saxena Family	Tuesday September 03, 2019 17:10	Papa ~ Okie dokie
Boarded?
Cool
Aditi where r u?
<Media omitted>
Adding Aditi
Yes  Thanks,  Coach Jensen and Coach Sanfor
Aditi is already added. You don't see her?
WhatsApp : CHAT_Saxena Family	Tuesday September 24, 2019 17:02	Aditi ~ Hi
Are you guys coming home?
WhatsApp : CHAT_Saxena Family	Thursday September 26, 2019 16:03	Papa ~ No power!
It's back.
Power failure again! !
WhatsApp : CHAT_Saxena Family	Friday September 27, 2019 15:03	Aditi ~ I have a after school activity I'll come home at 5:15
WhatsApp : CHAT_Saxena Family	Friday September 27, 2019 20:30	Papa ~ Cool... see you in about 30 min
Pest control guy is here.
WhatsApp : CHAT_Saxena Family	Thursday October 03, 2019 15:0

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='no' 



WhatsApp : CHAT_Saxena Family	Thursday April 14, 2022 12:38	Papa ~ In a meeting
WhatsApp : CHAT_Saxena Family	Thursday April 14, 2022 12:40	Ma ~ I will pick aditi
<Media omitted>
<Media omitted>
<Media omitted>
https://www.mercurynews.com/2022/04/17/girls-high-school-sweeps-synopsys-science-competition/?utm_source=facebook.com&utm_campaign=socialflow&utm_content=fb-willowglenresident&utm_medium=social&fbclid=IwAR3OrKdZNSo-qJPdXtHrQsFtbCdMIvlsxCd3amaRqEWbNb2Icmg43g1F5S8
Krishna and Aditi the insurance information will need to be updated
WhatsApp : CHAT_Saxena Family	Tuesday April 19, 2022 14:59	Papa ~ Dentist appointment tomorrow for Aditi @ 2 pm
WhatsApp : CHAT_Saxena Family	Tuesday April 19, 2022 15:01	Ma ~ Oh
Let me see if I can move things on my calendar. Is this dr. Sakrani?
WhatsApp : CHAT_Saxena Family	Tuesday April 19, 2022 15:31	Papa ~ Yes, I can take her
WhatsApp : CHAT_Saxena Family	Tuesday April 26, 2022 00:22	Ma ~ Happy birthday Krishna, lots of love a

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='no' 



WhatsApp : CHAT_Papa	Thursday November 25, 2021 00:08	Papa ~ Ok cool
You awake
WhatsApp : CHAT_Papa	Thursday November 25, 2021 09:26	Krishna Saxena ~ Just brushed
WhatsApp : CHAT_Papa	Thursday November 25, 2021 21:39	Papa ~ Let's plan to start at around 8 am.
WhatsApp : CHAT_Papa	Thursday November 25, 2021 21:40	Krishna Saxena ~ Ok
WhatsApp : CHAT_Papa	Friday November 26, 2021 07:58	Papa ~ Ready....
Hellooo
WhatsApp : CHAT_Papa	Sunday November 28, 2021 21:32	Krishna Saxena ~ Hello
WhatsApp : CHAT_Papa	Sunday November 28, 2021 21:33	Papa ~ Back in San Jose... How was ur day?
WhatsApp : CHAT_Papa	Sunday November 28, 2021 21:33	Krishna Saxena ~ Good. Finished up all my projects for the next few days
WhatsApp : CHAT_Papa	Sunday November 28, 2021 21:34	Papa ~ Let's meet tomorrow after your homework etc.
Also first please prepare to apply for internships. Go to the department that handles this for undergrads.
Tell them about your specific CS related coursework and FTC/F

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='yes' 



WhatsApp : CHAT_Saxena Family	Monday February 26, 2024 12:24	Aditi ~ im on the team 🙌
WhatsApp : CHAT_Saxena Family	Monday February 26, 2024 13:20	Ma ~ Yay, congrats
https://www.facebook.com/groups/carmelcalifornia/permalink/7408603919169784/
<Media omitted>
WhatsApp : CHAT_Saxena Family	Monday February 26, 2024 13:34	Aditi ~ 🙂
WhatsApp : CHAT_Saxena Family	Monday February 26, 2024 14:19	Ma ~ <Media omitted>
Will be shipped to store
@15036908999 5:45 pick up from school?
WhatsApp : CHAT_Saxena Family	Monday February 26, 2024 17:16	Aditi ~ dont know we are at a game
yay
WhatsApp : CHAT_Saxena Family	Monday February 26, 2024 17:16	Ma ~ Message when the bus heads to school
WhatsApp : CHAT_Saxena Family	Monday February 26, 2024 17:17	Papa ~ In the field near school?
Or somewhere else
WhatsApp : CHAT_Saxena Family	Monday February 26, 2024 17:18	Aditi ~ somewhere else
17 minutes til we reach school
is anyone here
WhatsApp : CHAT_Saxena Family	Tuesday February 27, 2024 

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='no' 



WhatsApp : CHAT_Papa	Sunday May 15, 2022 14:25	Papa ~ Back now
WhatsApp : CHAT_Papa	Sunday May 15, 2022 14:26	Krishna Saxena ~ Ok
I am back
WhatsApp : CHAT_Papa	Sunday May 15, 2022 19:40	Papa ~ 2 minutes
WhatsApp : CHAT_Papa	Tuesday May 17, 2022 15:03	Krishna Saxena ~ Can you call today?
WhatsApp : CHAT_Papa	Tuesday May 17, 2022 15:22	Papa ~ Ok, give me 5 min
WhatsApp : CHAT_Papa	Tuesday May 17, 2022 19:40	Krishna Saxena ~ I'm back
WhatsApp : CHAT_Papa	Tuesday May 17, 2022 19:58	Papa ~ Sorry got delayed
WhatsApp : CHAT_Papa	Tuesday May 17, 2022 19:58	Krishna Saxena ~ No worries
Can you call today?
WhatsApp : CHAT_Papa	Thursday May 19, 2022 15:34	Papa ~ Back home
A few minutes
WhatsApp : CHAT_Papa	Thursday May 19, 2022 15:35	Krishna Saxena ~ Ok
WhatsApp : CHAT_Papa	Thursday May 19, 2022 15:59	Papa ~ Back online
WhatsApp : CHAT_Papa	Thursday May 19, 2022 19:43	Krishna Saxena ~ Long delay but I am back
WhatsApp : CHAT_Papa	Thursday May 19, 2022 20:10	Papa ~ Me too
Wh

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='no' 



WhatsApp : CHAT_Saxena Family	Wednesday September 27, 2023 08:11	Papa ~ That's a good idea...
WhatsApp : CHAT_Saxena Family	Wednesday September 27, 2023 08:31	Ma ~ That way we can get one flight from sea to sjc and back and another one between dallas and sjc
WhatsApp : CHAT_Saxena Family	Wednesday September 27, 2023 08:34	Aditi ~ could anyone pick me up during lunch i cant swallow anything
WhatsApp : CHAT_Saxena Family	Wednesday September 27, 2023 08:34	Papa ~ Ok I will, what time?
WhatsApp : CHAT_Saxena Family	Wednesday September 27, 2023 08:40	Ma ~ <Media omitted>
Get a mask aditi
@15038405146 trying ti call you
<Media omitted>
<Media omitted>
@19717270030 in the evening book sea <-> sjc
WhatsApp : CHAT_Saxena Family	Wednesday September 27, 2023 09:14	Aditi ~ i was in class
ill have the school call you
WhatsApp : CHAT_Saxena Family	Wednesday September 27, 2023 12:17	Ma ~ @15038405146 Aditi needs to be picked up
School just called
WhatsApp : CHAT_Saxena Family	Th

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='no' 



WhatsApp : CHAT_Saxena Family	Thursday March 28, 2024 14:59	Papa ~ Call Mom and check
WhatsApp : CHAT_Saxena Family	Thursday March 28, 2024 14:59	Ma ~ Sorry later, tryimg to find a break to get out
WhatsApp : CHAT_Saxena Family	Thursday March 28, 2024 15:00	Aditi ~ around how long
WhatsApp : CHAT_Saxena Family	Thursday March 28, 2024 15:00	Ma ~ My first break is 4:45 to leave from here
Does that work
Please reply asap
WhatsApp : CHAT_Saxena Family	Thursday March 28, 2024 15:12	Aditi ~ yeah
WhatsApp : CHAT_Saxena Family	Thursday March 28, 2024 22:24	Ma ~ https://www.instagram.com/reel/C5BJRcroAzA/?igsh=NjFhOGMzYTE3ZQ==
Interesting https://www.instagram.com/reel/C5BLLhfJ9YO/?igsh=NjFhOGMzYTE3ZQ==
<Media omitted>
WhatsApp : CHAT_Saxena Family	Saturday March 30, 2024 13:22	Papa ~ https://www.newyorker.com/magazine/2024/04/01/what-have-fourteen-years-of-conservative-rule-done-to-britain  A bit long, but the problem of bad government has been disastrous in Britain. Ther

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='yes' 



WhatsApp : CHAT_Papa	Saturday October 08, 2022 23:04	Papa ~ Back on zoom
Hope you rested...
Your passport is here.
<Media omitted>
WhatsApp : CHAT_Papa	Sunday October 09, 2022 19:02	Krishna Saxena ~ Cool
WhatsApp : CHAT_Papa	Tuesday October 11, 2022 21:14	Papa ~ K betu, can u send me the copy of the selenium notebook.
Thank u
Would u have some time now?
Just a few minutes
WhatsApp : CHAT_Papa	Saturday October 15, 2022 21:26	Krishna Saxena ~ Sure
WhatsApp : CHAT_Papa	Saturday October 15, 2022 21:26	Papa ~ Give me 2 min
On zoom
My id
Good morning Krishna let me know whenever you have some time...
Hackaday: 3-DOF Robot Arm Wrist Without The Motor Weight. https://hackaday.com/2022/11/03/3-dof-robot-arm-wrist-without-the-motor-weight/
Hi Krishna, just saw your missed calls... Sorry
How are you doing? Busy with homework
WhatsApp : CHAT_Papa	Sunday November 06, 2022 18:02	Krishna Saxena ~ Did most of my work for this week today
WhatsApp : CHAT_Papa	Sunday November 06, 2

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='yes' 



WhatsApp : CHAT_Papa	Saturday May 14, 2022 13:59	Papa ~ Just saw ur msg
WhatsApp : CHAT_Papa	Saturday May 14, 2022 13:59	Krishna Saxena ~ no worries
WhatsApp : CHAT_Papa	Saturday May 14, 2022 14:00	Papa ~ On zoom
WhatsApp : CHAT_Papa	Saturday May 14, 2022 18:00	Krishna Saxena ~ Back online
WhatsApp : CHAT_Papa	Saturday May 14, 2022 18:07	Papa ~ 2 minutes
WhatsApp : CHAT_Papa	Saturday May 14, 2022 18:07	Krishna Saxena ~ ok
i'm back
WhatsApp : CHAT_Papa	Saturday May 14, 2022 19:53	Papa ~ Me now
WhatsApp : CHAT_Papa	Saturday May 14, 2022 23:39	Krishna Saxena ~ Lost internet connection
WhatsApp : CHAT_Papa	Saturday May 14, 2022 23:39	Papa ~ Good night let's pick it up tomorrow
Take care
WhatsApp : CHAT_Papa	Saturday May 14, 2022 23:40	Krishna Saxena ~ Ok. Good night
Can you call now?
WhatsApp : CHAT_Papa	Sunday May 15, 2022 14:25	Papa ~ Back now
WhatsApp : CHAT_Papa	Sunday May 15, 2022 14:26	Krishna Saxena ~ Ok
I am back
WhatsApp : CHAT_Papa	Sunday May 15, 2022 19:40

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='no' 



WhatsApp : CHAT_Saxena Family	Monday July 31, 2023 15:04	Papa ~ Outside
WhatsApp : CHAT_Saxena Family	Monday July 31, 2023 15:04	Aditi ~ cool
Did we ever set an appointment
WhatsApp : CHAT_Saxena Family	Wednesday August 02, 2023 13:34	Ma ~ Just got one for aug 10th at 10:30am with dr. Geetha
Cupertino
WhatsApp : CHAT_Saxena Family	Wednesday August 02, 2023 13:35	Papa ~ Aug 10 is dentist appointment?
WhatsApp : CHAT_Saxena Family	Wednesday August 02, 2023 13:35	Ma ~ What time?
Its in Cupertino too
WhatsApp : CHAT_Saxena Family	Wednesday August 02, 2023 13:35	Krishna Saxena ~ Dentist appts at 10am, 11am
WhatsApp : CHAT_Saxena Family	Wednesday August 02, 2023 13:36	Ma ~ Aditi could go the 11am one ?
WhatsApp : CHAT_Saxena Family	Wednesday August 02, 2023 13:53	Aditi ~ I thought 9th
WhatsApp : CHAT_Saxena Family	Wednesday August 02, 2023 13:54	Ma ~ Oh good
WhatsApp : CHAT_Saxena Family	Wednesday August 02, 2023 13:54	Krishna Saxena ~ 9th is correct
WhatsApp : CHAT_Sax

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


binary_score='no' 





### Generate Result

In [21]:
from langchain_core.output_parsers import StrOutputParser

# Prompt
system = """You are an assistant for question-answering tasks. Answer the question based upon your knowledge.
Use three-to-five sentences maximum and keep the answer concise."""
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved documents: \n\n <docs>{documents}</docs> \n\n User question: <question>{question}</question>"),
    ]
)

# Post-processing
def format_docs(docs):
    return "\n".join(f"<doc{i+1}>:\nSource:{doc['MSG_ID']}\nContent:{doc['FULL_CONTEXT']}\n</doc{i+1}>\n" for i, doc in enumerate(docs))

# Chain
rag_chain = prompt | big_llm | StrOutputParser()

# Run
generation = rag_chain.invoke({"documents":format_docs(docs_to_use), "question": question})
print(generation)

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


Based on the provided WhatsApp conversation logs, I was unable to find any explicit "ToDos" mentioned by Papa. However, I can try to infer some tasks or reminders that Papa might have given to Krishna based on their conversations.

If you'd like, I can attempt to extract some potential ToDos or reminders from the conversation logs. Please let me know!


### Check for Hallucinations

In [None]:
# Data model
class GradeHallucinations(BaseModel):
    """Binary score for hallucination present in 'generation' answer."""

    binary_score: str = Field(
        ...,
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )

# LLM with function call
structured_llm_grader = big_llm.with_structured_output(GradeHallucinations)

# Prompt
system = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n
    Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""
hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Set of facts: \n\n <facts>{documents}</facts> \n\n LLM generation: <generation>{generation}</generation>"),
    ]
)

hallucination_grader = hallucination_prompt | structured_llm_grader

response = hallucination_grader.invoke({"documents": format_docs(docs_to_use), "generation": generation})
print(response)

### Highlight Used Docs

In [None]:
from typing import List
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate

# Data model
class HighlightDocuments(BaseModel):
    """Return the specific part of a document used for answering the question."""

    Source: List[str] = Field(
        ...,
        description="List of alphanumeric ID of docs used to answers the question"
    )
    Content: List[str] = Field(
        ...,
        description="List of complete conversation contexts that answers the question"
    )
    Segment: List[str] = Field(
        ...,
        description="List of pointed, direct segments from used documents that answer the question"
    )

# parser
parser = PydanticOutputParser(pydantic_object=HighlightDocuments)

# Prompt
system = """You are an advanced assistant for document search and retrieval. You are provided with the following:
1. A question.
2. A generated answer based on the question.
3. A set of documents that were referenced in generating the answer.

Your task is to identify and extract the exact inline segments from the provided documents that directly correspond to the content used to
generate the given answer. The extracted segments must be verbatim snippets from the documents, ensuring a word-for-word match with the text
in the provided documents.

Ensure that:
- (Important) Each segment is an exact match to a part of the document and is fully contained within the document text.
- The relevance of each segment to the generated answer is clear and directly supports the answer provided.
- (Important) If you didn't used the specific document don't mention it.

Used documents: <docs>{documents}</docs> \n\n User question: <question>{question}</question> \n\n Generated answer: <answer>{generation}</answer>

<format_instruction>
{format_instructions}
</format_instruction>
"""


prompt = PromptTemplate(
    template= system,
    input_variables=["documents", "question", "generation"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

# Chain
doc_lookup = prompt | big_llm | parser

# Run
lookup_response = doc_lookup.invoke({"documents":format_docs(docs_to_use), "question": question, "generation": generation})

In [None]:
for id, source, segment in zip(lookup_response.Source, lookup_response.Content, lookup_response.Segment):
    print(f"ID: {id}\nSource: {source}\nText Segment: {segment}\n")