# Packages

In [1]:

from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain.prompts import MessagesPlaceholder, PromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate, AIMessagePromptTemplate, HumanMessagePromptTemplate
from langchain_core.prompts import FewShotChatMessagePromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.document_loaders import BaseLoader
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_community.document_loaders.directory import DirectoryLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain.schema.runnable import RunnablePassthrough
from langchain_core.runnables import RunnableBranch
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain.retrievers import TimeWeightedVectorStoreRetriever
import datetime
from langchain.utils import mock_now
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore import InMemoryDocstore
from langchain_core.documents import Document
from langchain.agents import create_openai_functions_agent, Tool, AgentExecutor
from langchain import hub
from langchain.chains.summarize import load_summarize_chain
import pandas as pd
import re
import random
import numpy as np
import uuid

import os
from pinecone import Pinecone, ServerlessSpec


from langchain_community.callbacks import get_openai_callback

from langchain_pinecone import PineconeVectorStore



# System global

In [2]:
import dotenv

dotenv.load_dotenv()

True

In [3]:
session_id = '0'
journal_csv_path = "georgette_2/journal.csv"
formulaire_csv_path = "georgette_2/formulaire.csv"
chat_history_csv_path = "georgette_2/history.csv"

In [4]:
# function to get the date of today 
def get_today():
    return datetime.datetime.now().strftime('%m/%d/%Y')

date_today = get_today()

# LLM model

In [5]:
# model llm
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.0)

# Load CSV documents

In [6]:
def add_date_to_documents(docs):
    for i, doc in enumerate(docs):
        page_content = doc.page_content
        try:
            date = re.search(r'\d{2}/\d{2}/\d{4}', page_content).group() 
            date = datetime.datetime.strptime(date, '%m/%d/%Y')
            doc.metadata["created_at"] = date
        except:
            continue

        
    return docs

In [7]:
# csv load documents
formulaire_loader = DirectoryLoader('georgette_2/', glob="formulaire.csv", loader_cls=CSVLoader)
formulaire_docs = formulaire_loader.load()
formulaire_docs = add_date_to_documents(formulaire_docs)

journal_loader = DirectoryLoader('georgette_2/', glob="journal.csv", loader_cls=CSVLoader)
journal_docs = journal_loader.load()
journal_docs = add_date_to_documents(journal_docs)

history_loader = DirectoryLoader('georgette_2/', glob="history.csv", loader_cls=CSVLoader)
history_docs = history_loader.load()
history_docs = add_date_to_documents(history_docs)

# User Profile Summary

In [8]:
summary_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            You are summarizer assitant focusing on job and career actions and thoughts.
            
            You summarizes the user's information, based on the below context and career concerns. 

            Do NOT exceed 300 words.

            If context is empty, return an empty string.

            <context>
            {context}
            </context>
            """
        ), 
        MessagesPlaceholder(variable_name="messages"),
    ]
)
summarizer = create_stuff_documents_chain(llm, summary_prompt)


user_summary = summarizer.invoke(
    {
        "context": formulaire_docs,
        "messages": [
            HumanMessage(content="Make a summary text of the user's answers to the formulaire. Starts with name and date of birth. do not exeed 300 words.")
        ],
    }
)

journal_summary = summarizer.invoke(
    {
        "context": journal_docs,
        "messages": [
            HumanMessage(content="Make a summary text of your observations about the user. do not exeed 300 words.")
        ],
    }
)

In [9]:
# user profiler
user_profiler_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            You are an assistant that updates the user summary based on journal summary.

            Journal summary represents the summary of the previous conversations.

            It always starts with the user's name and date of birth.

            Journal focuses on actions and thoughts relative to job and career personal development.

            Modify the user summary accroding to the journal information to write a new user summary.

            The new user summary must focus on job and career actions and thoughts.

            You summarizes the user's information, based on the below context and career concerns.
        
            Do not exceed 300 words.

            User Summary:
            {user_summary}

            Journal Summary:
            {journal_summary}

            """,
        ),
    ]
)
user_profiler_chain = user_profiler_prompt | llm


user_profile = user_profiler_chain.invoke(
    {
        "user_summary": user_summary, 
        "journal_summary": journal_summary
    }
    ).content


In [10]:
print(journal_summary)

The user is currently considering a career transition to freelance technical writing, aiming to utilize their technical skills and creativity. They are focused on building a portfolio to showcase their work and have requested help in crafting an introduction and LinkedIn post to promote their services. Despite acknowledging challenges in public speaking, conflict resolution, and handling difficult coworkers, the user is actively seeking advice to improve these skills for success in freelancing. Their determination to pursue a career aligned with their strengths and interests is evident through seeking guidance and addressing weaknesses while highlighting strengths. This proactive and forward-thinking approach demonstrates the user's commitment to personal growth and professional fulfillment, showcasing resilience and determination in embarking on a new career path.


In [11]:
print(user_summary)

Georgette, born on 1/28/1976, expressed a desire to improve in the domain of professional orientation and career transition. She values financial stability and high salary in a job, with a primary motivation being the potential to earn more money. Georgette prefers working independently in a traditional office setting, with a 9-5 work schedule. She struggles with public speaking, conflict resolution, and handling difficult coworkers. Georgette enjoys relaxing at home, watching movies, and spending time alone or with close friends. She prefers to work with machines, values independence, and personal freedom. Georgette is not open to learning new skills or taking on new challenges in her career and prefers a structured, hierarchical workplace. Her hobbies are related to specific skills or activities, and she is a creative thinker motivated by financial stability and security.


In [12]:
print(user_profile)

Georgette, born on 1/28/1976, is currently exploring a career transition to freelance technical writing to leverage her technical skills and creativity. She is dedicated to building a portfolio to showcase her work and has sought assistance in crafting an introduction and LinkedIn post to promote her services. Despite facing challenges in public speaking, conflict resolution, and handling difficult coworkers, Georgette is actively seeking guidance to enhance these skills for success in freelancing. Her determination to pursue a career aligned with her strengths and interests is evident through her proactive approach in addressing weaknesses while highlighting strengths. Georgette's commitment to personal growth and professional fulfillment showcases resilience and determination in embarking on a new career path. She values financial stability and high salary, with a primary motivation to earn more money, and prefers working independently in a traditional office setting with a 9-5 work 

# Build Time Weighted Vectore Store

## FAISS

In [13]:
# build FAISS Index for TimeWeightedVectorStoreRetriever
faiss_index = faiss.IndexFlatL2(1536)
faiss_vectorstore = FAISS(OpenAIEmbeddings(), faiss_index, InMemoryDocstore({}), {})
faiss_time_retriever = TimeWeightedVectorStoreRetriever(
    vectorstore=faiss_vectorstore, 
    decay_rate=1e-5, 
    k=5
)

for i, doc in enumerate(journal_docs):
    page_content = doc.page_content
    metadata = doc.metadata
    faiss_time_retriever.add_documents([Document(page_content=page_content, metadata=metadata)])

In [None]:
faiss_time_retriever.invoke('AI')

## Pinecone

In [14]:
import datetime
from copy import deepcopy
from typing import Any, Dict, List, Optional, Tuple

from langchain_core.callbacks import (
    AsyncCallbackManagerForRetrieverRun,
    CallbackManagerForRetrieverRun,
)
from langchain_core.documents import Document
from langchain_core.pydantic_v1 import Field
from langchain_core.retrievers import BaseRetriever
from langchain_core.vectorstores import VectorStore


def _get_hours_passed(time: datetime.datetime, ref_time: datetime.datetime) -> float:
    """Get the hours passed between two datetimes."""
    return (time - ref_time).total_seconds() / 3600


class Pinecone_Modified_TimeWeightedVectorStoreRetriever(BaseRetriever):
    """Retriever that combines embedding similarity with
    recency in retrieving values."""

    vectorstore: VectorStore
    """The vectorstore to store documents and determine salience."""

    search_kwargs: dict = Field(default_factory=lambda: dict(k=100))
    """Keyword arguments to pass to the vectorstore similarity search."""

    # TODO: abstract as a queue
    memory_stream: List[Document] = Field(default_factory=list)
    """The memory_stream of documents to search through."""

    decay_rate: float = Field(default=0.01)
    """The exponential decay factor used as (1.0-decay_rate)**(hrs_passed)."""

    k: int = 4
    """The maximum number of documents to retrieve in a given call."""

    other_score_keys: List[str] = []
    """Other keys in the metadata to factor into the score, e.g. 'importance'."""

    default_salience: Optional[float] = None
    """The salience to assign memories not retrieved from the vector store.

    None assigns no salience to documents not fetched from the vector store.
    """

    class Config:
        """Configuration for this pydantic object."""

        arbitrary_types_allowed = True

    def _document_get_date(self, field: str, document: Document) -> datetime.datetime:
        """Return the value of the date field of a document."""
        if field in document.metadata:
            if isinstance(document.metadata[field], float):
                return datetime.datetime.fromtimestamp(document.metadata[field])
            return document.metadata[field]
        return datetime.datetime.now()

    def _get_combined_score(
        self,
        document: Document,
        vector_relevance: Optional[float],
        current_time: datetime.datetime,
    ) -> float:
        """Return the combined score for a document."""
        hours_passed = _get_hours_passed(
            current_time,
            self._document_get_date("last_accessed_at", document),
        )
        score = (1.0 - self.decay_rate) ** hours_passed
        for key in self.other_score_keys:
            if key in document.metadata:
                score += document.metadata[key]
        if vector_relevance is not None:
            score += vector_relevance
        return score

    def get_salient_docs(self, query: str) -> Dict[int, Tuple[Document, float]]:
        """Return documents that are salient to the query."""
        docs_and_scores: List[Tuple[Document, float]]
        docs_and_scores = self.vectorstore.similarity_search_with_relevance_scores(
            query, **self.search_kwargs
        )
        results = {}
        for fetched_doc, relevance in docs_and_scores:
            if "buffer_idx" in fetched_doc.metadata:
                # modification add int to buffer_idx
                buffer_idx = int(fetched_doc.metadata["buffer_idx"])
                doc = self.memory_stream[buffer_idx]
                results[buffer_idx] = (doc, relevance)
        return results


    async def aget_salient_docs(self, query: str) -> Dict[int, Tuple[Document, float]]:
        """Return documents that are salient to the query."""
        docs_and_scores: List[Tuple[Document, float]]
        docs_and_scores = (
            await self.vectorstore.asimilarity_search_with_relevance_scores(
                query, **self.search_kwargs
            )
        )
        results = {}
        for fetched_doc, relevance in docs_and_scores:
            if "buffer_idx" in fetched_doc.metadata:
                # modification add int to buffer_idx
                buffer_idx = int(fetched_doc.metadata["buffer_idx"])
                doc = self.memory_stream[buffer_idx]
                results[buffer_idx] = (doc, relevance)
        return results


    def _get_rescored_docs(
        self, docs_and_scores: Dict[Any, Tuple[Document, Optional[float]]]
    ) -> List[Document]:
        current_time = datetime.datetime.now()
        rescored_docs = [
            (doc, self._get_combined_score(doc, relevance, current_time))
            for doc, relevance in docs_and_scores.values()
        ]
        rescored_docs.sort(key=lambda x: x[1], reverse=True)
        result = []
        # Ensure frequently accessed memories aren't forgotten
        for doc, _ in rescored_docs[: self.k]:
            # TODO: Update vector store doc once `update` method is exposed.
            buffered_doc = self.memory_stream[doc.metadata["buffer_idx"]]
            buffered_doc.metadata["last_accessed_at"] = current_time
            result.append(buffered_doc)
        return result

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        docs_and_scores = {
            doc.metadata["buffer_idx"]: (doc, self.default_salience)
            for doc in self.memory_stream[-self.k :]
        }
        # If a doc is considered salient, update the salience score
        docs_and_scores.update(self.get_salient_docs(query))
        return self._get_rescored_docs(docs_and_scores)

    async def _aget_relevant_documents(
        self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun
    ) -> List[Document]:
        docs_and_scores = {
            doc.metadata["buffer_idx"]: (doc, self.default_salience)
            for doc in self.memory_stream[-self.k :]
        }
        # If a doc is considered salient, update the salience score
        docs_and_scores.update(await self.aget_salient_docs(query))
        return self._get_rescored_docs(docs_and_scores)

    def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
        """Add documents to vectorstore."""
        current_time = kwargs.get("current_time")
        if current_time is None:
            current_time = datetime.datetime.now()
        # Avoid mutating input documents
        dup_docs = [deepcopy(d) for d in documents]
        for i, doc in enumerate(dup_docs):
            if "last_accessed_at" not in doc.metadata:
                doc.metadata["last_accessed_at"] = current_time
            if "created_at" not in doc.metadata:
                doc.metadata["created_at"] = current_time
            # modification add int to buffer_idx
            doc.metadata["buffer_idx"] = int(len(self.memory_stream) + i)
        self.memory_stream.extend(dup_docs)
        return self.vectorstore.add_documents(dup_docs, **kwargs)


    async def aadd_documents(
        self, documents: List[Document], **kwargs: Any
    ) -> List[str]:
        """Add documents to vectorstore."""
        current_time = kwargs.get("current_time")
        if current_time is None:
            current_time = datetime.datetime.now()
        # Avoid mutating input documents
        dup_docs = [deepcopy(d) for d in documents]
        for i, doc in enumerate(dup_docs):
            if "last_accessed_at" not in doc.metadata:
                doc.metadata["last_accessed_at"] = current_time
            if "created_at" not in doc.metadata:
                doc.metadata["created_at"] = current_time
            # modification add int to buffer_idx
            doc.metadata["buffer_idx"] = int(len(self.memory_stream) + i)
        self.memory_stream.extend(dup_docs)
        return await self.vectorstore.aadd_documents(dup_docs, **kwargs)


In [15]:
pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))

index_name = "matthiasdb"

index = pc.Index(index_name)


dimension = 1536

embedder = OpenAIEmbeddings()

In [34]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 2}},
 'total_vector_count': 2}

In [17]:
index.delete(delete_all=True)

{}

In [18]:
pinecone_vectorstore = PineconeVectorStore(index_name=index_name, embedding=embedder)
pinecone_time_retriever = Pinecone_Modified_TimeWeightedVectorStoreRetriever(
    vectorstore=pinecone_vectorstore, 
    decay_rate=1e-5, 
    k=5
)

for i, doc in enumerate(journal_docs):
    page_content = doc.page_content
    metadata = doc.metadata
    pinecone_time_retriever.add_documents([Document(page_content=page_content, metadata=metadata)])

# Build the RAG Conversational Chatbot

## Version 1 
https://python.langchain.com/docs/use_cases/question_answering/chat_history/

In [None]:
# Reformulate question from chat history prompt
reformulate_question_from_chat_history_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", 
        """
        You are an assistant that is asked to reformulate a question based on the chat history and the latest user question.

        Formulate a standalone question that can be understood without the chat history. 
        
        Do NOT answer the question, just reformulate it if needed and otherwise return it as is.
        
        """
        ),
        
        MessagesPlaceholder("chat_history"),

        ("human", "{input}"),
    ]
)


In [None]:
# coach chatbot prompt
coach_chatbot_prompt = ChatPromptTemplate.from_messages(
    [
        ("system",
        """
        You are an assistant for job and career search.

        The user speaks to and you speak to him.

        The conversation is like normale frienship conversation.

        Your wisdom should guide the user clearly and confidently, lighting the way to a fulfilling career journey.

        However, you are capable of jugment on user input related to career search and his profile.

        If you think the user is not in the right direction, you can tell him.

        The provided chat history summary includes facts about the user you are speaking with.

        this is the date of today conversation: 
        {date_today}

        this is the user summary to refer to: 
        {user_summary}

        this is the context to refer to:
        {context}

        Always answer with less than 300 words.

        """
        ),

        MessagesPlaceholder("chat_history"),

        ("human", "{input}"),
    ]
)

In [None]:
# construct history-aware retriever
history_aware_retriever = create_history_aware_retriever(
    llm, 
    time_retriever, 
    reformulate_question_from_chat_history_prompt
)

# build chat chain
question_answer_chain = create_stuff_documents_chain(llm, coach_chatbot_prompt)

# build retrieval chain
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [None]:
#Statefully manage chat history 
store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)


def func(session_id, input):
    with get_openai_callback() as cb:
        result = conversational_rag_chain.invoke(
        {
            'date_today': date_today,
            'user_summary': user_summary,
            "input":input
        },
        config={"configurable": {"session_id": session_id}}
        )
    return result, cb


In [None]:
r,c = func(session_id, "what we spoke about last time?")

In [None]:
r,c = func(session_id, "you can help to make a portfolio for me?")

In [None]:
r,c = func(session_id, "What are the keywords I should use?")

In [None]:
r,c = func(session_id, "make a nice introduction for the portfolio")

In [None]:
print(c)

In [None]:
print(c)

In [None]:
store['0'].messages

## Version 2

In [19]:
# coach chatbot prompt
coach_chatbot_prompt = ChatPromptTemplate.from_messages(
    [
        ("system",
        """
        You are  coach for job and career search.

        The user speaks to and you speak to him.

        The conversation is like normale frienship conversation.

        Stay concise and to the point.

        If the user is asking to describe or explain more you can say more but keep short.

        Your wisdom should guide the user clearly and confidently, lighting the way to a fulfilling career journey.

        However, you are capable of jugment on user input related to career search and his profile.

        If you think the user is not in the right direction, you can tell him.

        The provided chat history summary includes facts about the user you are speaking with.

        this is the date of today conversation: 
        {date_today}

        this is the updated user profile to refer to: 
        {user_profile}

        this is the context to refer to:
        {context}

        """
        ),

        MessagesPlaceholder("chat_history"),

        ("human", "{input}"),
    ]
)

# define document chain
document_chain = create_stuff_documents_chain(llm, coach_chatbot_prompt)

# define document chain with history of session
store_history = {
    session_id:
    {
        "ongo":ChatMessageHistory(), 
        "full":ChatMessageHistory()
    }
}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store_history:
        store_history[session_id] = {
        "ongo":ChatMessageHistory(), 
        "full":ChatMessageHistory()
        }
    return store_history[session_id]['ongo']


document_chain_with_message_history = RunnableWithMessageHistory(
    document_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    answer_messages_key="answer"
)

In [20]:
def summarize_messages(chain_input):
    stored_messages = store_history[session_id]['ongo'].messages
    if len(stored_messages) == 0:
        return False
    summarization_prompt = ChatPromptTemplate.from_messages(
        [
            MessagesPlaceholder(variable_name="chat_history"),
            (
                "user",
                """
                You are summarizer assitant focusing on job and career actions and thoughts.

                Summarize the user's chat history based on career concerns. 

                Do NOT exceed 300 words.
                
                """,
            ),
        ]
    )

    summarization_chain = summarization_prompt | llm
    summary_message = summarization_chain.invoke({"chat_history": stored_messages})

    # each chat summarization - manage history storing
    store_history[session_id]['ongo'].clear()
    store_history[session_id]['ongo'].add_message(summary_message)
    store_history[session_id]['full'].add_message(stored_messages[-2])
    store_history[session_id]['full'].add_message(stored_messages[-1])

    return True

from typing import Dict
def parse_retriever_input(params: Dict):
    return params["input"]

retrieval_document_chain_with_message_history = (
    RunnablePassthrough.assign(
        messages_summarized=summarize_messages, 
        context=parse_retriever_input | pinecone_time_retriever).assign(
            answer=document_chain_with_message_history)
)


In [21]:
def func(session_id, input):
    with get_openai_callback() as cb:
        result = retrieval_document_chain_with_message_history.invoke(
        {
            'date_today': date_today,
            'user_profile': user_profile,
            "input":input
        },
        config={"configurable": {"session_id": session_id}}
        )
    return result, cb

In [22]:
questions = [
    "what we spoke about last time?",
    "I am not sure that skills are not enough for the job",
    "Can you elaborate on my skills?",
    "Tell me something to encourage me"
    ]

for q in questions:
    r,c = func(session_id, q)
    print(q)
    print(r['answer'])
    print(c)
    print()

what we spoke about last time?
Last time, we discussed your exploration of a career transition to freelance technical writing, your interest in building a portfolio, seeking help with crafting an introduction and LinkedIn post, and your challenges in public speaking, conflict resolution, and handling difficult coworkers. We also talked about your determination to address these challenges and pursue a career aligned with your strengths and interests, showcasing your resilience and proactive approach to personal and professional growth.
Tokens Used: 715
	Prompt Tokens: 634
	Completion Tokens: 81
Successful Requests: 1
Total Cost (USD): $0.001113

I am not sure that skills are not enough for the job
It's great that you're aware of the importance of skills in your desired career path. To enhance your skills for freelance technical writing, consider taking online courses, attending workshops, or seeking mentorship in the field. Building a strong portfolio showcasing your work can also help 

In [23]:
store_history['0']

{'ongo': ChatMessageHistory(messages=[AIMessage(content="The user has been actively exploring a career transition to freelance technical writing, seeking guidance on building a portfolio and crafting introductions and promotional materials. They acknowledge the importance of skills in their desired field and are open to enhancing them through online courses, workshops, and mentorship. The user has expressed challenges in public speaking, conflict resolution, and dealing with difficult coworkers but is determined to address these obstacles. They have shown resilience and a proactive approach to personal and professional growth, demonstrating a willingness to take action towards achieving their career goals. The user's chat history reflects a strong commitment to self-improvement and a clear focus on building a successful career in freelance technical writing.", response_metadata={'token_usage': {'completion_tokens': 133, 'prompt_tokens': 347, 'total_tokens': 480}, 'model_name': 'gpt-3.5

## Version 3

In [None]:
#
question_answering_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer the user's questions based on the below context:\n\n{context}",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

#
document_chain = create_stuff_documents_chain(llm, question_answering_prompt)


In [None]:
#
demo_ephemeral_chat_history = ChatMessageHistory()
demo_ephemeral_chat_history.add_user_message("what are the steps for making a portfolio for freelance ? be short please")

In [None]:
#
query_transform_prompt = ChatPromptTemplate.from_messages(
    [
        MessagesPlaceholder(variable_name="messages"),
        (
            "user",
            "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation. Only respond with the query, nothing else.",
        ),
    ]
)

#
query_transforming_retriever_chain = RunnableBranch(
    (
        lambda x: len(x.get("messages", [])) == 1,
        # If only one message, then we just pass that message's content to retriever
        (lambda x: x["messages"][-1].content) | time_retriever,
    ),
    # If messages, then we pass inputs to LLM chain to transform the query, then pass to retriever
    query_transform_prompt | llm | StrOutputParser() | time_retriever,
).with_config(run_name="chat_retriever_chain")

#
conversational_retrieval_chain = RunnablePassthrough.assign(
    context=query_transforming_retriever_chain,
).assign(
    answer=document_chain,
)


In [None]:
#
response = conversational_retrieval_chain.invoke(
    {
        "messages": demo_ephemeral_chat_history.messages,
    }
)

#
print(response)


In [None]:
#
demo_ephemeral_chat_history.add_ai_message(response["answer"])
demo_ephemeral_chat_history.add_user_message("tell me more about that")


In [None]:
demo_ephemeral_chat_history.messages

In [None]:
#
response = conversational_retrieval_chain.invoke(
    {
        "messages": demo_ephemeral_chat_history.messages,
    },
)

#
print(response)

In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            You are a helpful assistant. 
            Answer all questions to the best of your ability with the provided context.
            {context} 
            The provided chat history includes facts about the user you are speaking with.
            """,
        ),
        MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
    ]
)


document_chain = create_stuff_documents_chain(llm, prompt)

document_chain_with_message_history = RunnableWithMessageHistory(
    document_chain,
    lambda session_id: demo_ephemeral_chat_history,
    input_messages_key="input",
    history_messages_key="chat_history",
)

In [None]:
def summarize_messages(chain_input):
    stored_messages = demo_ephemeral_chat_history.messages
    if len(stored_messages) == 0:
        return False
    summarization_prompt = ChatPromptTemplate.from_messages(
        [
            MessagesPlaceholder(variable_name="chat_history"),
            (
                "user",
                "Distill the above chat messages into a single summary message. Include as many specific details as you can.",
            ),
        ]
    )
    summarization_chain = summarization_prompt | llm

    summary_message = summarization_chain.invoke({"chat_history": stored_messages})

    demo_ephemeral_chat_history.clear()

    demo_ephemeral_chat_history.add_message(summary_message)

    return True

from typing import Dict
def parse_retriever_input(params: Dict):
    return params["input"]

document_chain_with_message_history_with_summarization = (
    RunnablePassthrough.assign(messages_summarized=summarize_messages, context= parse_retriever_input | time_retriever)
    | document_chain_with_message_history
)

In [None]:
document_chain_with_message_history_with_summarization.invoke(
    {"input": "Tell me more about that"},
    {"configurable": {"session_id": "unused"}},
)

In [None]:
demo_ephemeral_chat_history

In [None]:
with get_openai_callback() as cb:
    document_chain_with_message_history_with_summarization.invoke(
    {"input": "What is my current objective?"},
    {"configurable": {"session_id": "unused"}},
)

In [None]:
cb

# Summarization of chat history for journal

In [24]:
# Summarize chat history
summarization_prompt = ChatPromptTemplate.from_messages(
    [
        MessagesPlaceholder(variable_name="chat_history"),
        (
            "user",
            """
            You are summarizer assitant focusing on job and career actions and thoughts of the user.

            Summarize the user's chat history based on career concerns to create a new personal journal observation about the user. 

            Do NOT exceed 300 words.
            """,
        ),
    ]
)
summarization_chain = summarization_prompt | llm
new_journal = summarization_chain.invoke({"chat_history": store_history[session_id]['full'].messages})


# Store and update data of conversation

## Store and update Journal

In [27]:

# load pandas journal.csv
df = pd.read_csv(journal_csv_path)

# add new line at date of today with the summary message
r = pd.DataFrame({str(len(df)+1):{'date': date_today, 'sentence': new_journal.content}}).T
df = pd.concat([df, r], ignore_index=True)

# save the new dataframe
df.to_csv(journal_csv_path, index=False)


In [33]:
# add in the pinecone time weighted retriever
metadata = {
    'source': 'georgette_2/journal.csv',
    'row': len(journal_docs)+1,
    "created_at": datetime.datetime.now()
}
pinecone_time_retriever.add_documents([Document(page_content=new_journal.content, metadata=metadata)])


['7c7ffea4-6c8c-4f27-bc25-c2e56d0955f1']

In [29]:
# new chat history
new_df_chat_history = pd.DataFrame(store_history[session_id]['full'].dict()['messages'])
new_df_chat_history['date'] = date_today
new_df_chat_history = new_df_chat_history[['date', 'content', 'type']]

# old chat history
df_chat_history = pd.read_csv(chat_history_csv_path, index_col=0)

# add new_df_chat_messages to df_chat_messages
df_chat_history = pd.concat([df_chat_history, new_df_chat_history], ignore_index=True).reset_index(drop=True)

# save the new dataframe
df_chat_history.to_csv(chat_history_csv_path, index=False)