### Load Data

In [1]:
import pandas as pd

df = pd.read_excel('finance-model-info.xlsx')



In [2]:
data = df.drop('No', axis=1)
data

Unnamed: 0,MODEL,DESCRIPTION,LINK
0,Current Asset Value Calculator,Current Asset Value Calculator aims to develop...,Link-to-model1
1,Potential Asset Value Calculator,Potential Asset Value Calculator is designed t...,Link-to-model2
2,Fair Credit Calculator,Fair Credit Calculator is one initiative among...,Link-to-model3
3,Treatment Recommendation System,The goal of Treatment Recommendation System is...,Link-to-model4
4,AHP Model for Dental Tourism,AHP Model for Dental Tourism is a model design...,Link-to-model5
5,AHP Location Selection,AHP Location Selection is the model to perform...,Link-to-model6
6,Group Buying,Group Buying is a model to simulate expected r...,Link-to-model7
7,Appointment Schedule and Roster Optimization,Appointment Schedule and Roster Optimization i...,Link-to-model8
8,Dentist Performance Analysis,Dentist Performance Analysis is a model to per...,Link-to-model9


In [3]:
from langchain.document_loaders import DataFrameLoader

# format the data into documents
loader = DataFrameLoader(df, page_content_column="DESCRIPTION")
documents = loader.load()

### Create embedding

In [None]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

# Initialize HuggingFace embedding model
model_name = "BAAI/bge-base-en"
model_kwargs = {"device": "mps"}
encode_kwargs = {"normalize_embeddings": True}
hf = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

In [None]:
from langchain.vectorstores import Chroma

# Define a persistence directory for Chroma
persist_dir = "./data_db"

# Create the Chroma database
vectordb = Chroma.from_documents(
    documents=documents, embedding=hf, persist_directory=persist_dir
)

### Retrieval QA

In [None]:
from langchain.chains import RetrievalQA
# from langchain_community.llms import HuggingFacePipeline
from langchain_huggingface import HuggingFacePipeline
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import pipeline
# from langchain.vectorstores import Chroma
from langchain_chroma import Chroma
# from langchain.prompts import PromptTemplate 

In [9]:
# Choose one of LLMs, in this experiment I use flan-t5
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base", device_map='auto')

pipe = pipeline(
    "text2text-generation",
    model= model,
    tokenizer=tokenizer,
    max_length=837,
    temperature=0,
)

local_llm = HuggingFacePipeline(pipeline=pipe)

In [10]:
def load_embedding_model(path):
    # embedding
    model_name = "BAAI/bge-base-en"
    encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

    embedding = HuggingFaceBgeEmbeddings(
        model_name=model_name,
        model_kwargs={'device': 'cpu'},
        encode_kwargs=encode_kwargs
    )

    # Load from disk
    vectordb = Chroma(persist_directory=path, embedding_function=embedding)
    return vectordb

# Load embedding
embedding_path = 'db'
vectordb = load_embedding_model(embedding_path)

In [11]:
# make a retriver
retriever = vectordb.as_retriever(search_kwargs={"k": 3})

# create the chain to answer questions
qa_chain = RetrievalQA.from_chain_type(llm=local_llm,
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)

In [32]:
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nRESULT:')

    for source in llm_response["source_documents"]:
        print(source.metadata['MODEL']+': '+source.metadata['LINK'])


In [48]:
query = "what is a tool to get predictive analysis?"
# query = "do you like chocholate?"
# llm_response = qa_chain(query)
process_llm_response(llm_response)
# llm_response

Potential Asset Value Calculator


RESULT:
Potential Asset Value Calculator: Link-to-model2
Current Asset Value Calculator: Link-to-model1
Treatment Recommendation System: Link-to-model4


### New update on Langchain

In [41]:
docs = retriever.get_relevant_documents(query)
docs

[Document(metadata={'LINK': 'Link-to-model1', 'MODEL': 'Current Asset Value Calculator', 'No': 1}, page_content='Current Asset Value Calculator aims to develop a predictive analytics tool that calculates a clinic’s fair market value by incorporating critical variables. It serves as an essential asset for reducing the risk of\nunderselling for sellers and overpaying for buyers. Key variables in the model include business performance data from financial statements (such as profit and loss statements and cash flow),\nas well as customer metrics, including the number of unique patients over time, average revenue per patient, and its variation. Additionally, the model considers the clinic’s physical assets, such as equipment and its usage age, to provide a more comprehensive valuation. By integrating these diverse factors, this model minimizes valuation bias, providing a fairer, data-driven valuation process that benefits both parties in the negotiation. Model definition: Use relevant busin

In [42]:
from langchain.prompts import ChatPromptTemplate

# Prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based only on the following context:\n{context}\n\nQuestion: {question}\n'), additional_kwargs={})])

In [43]:
# Chain
chain = prompt | local_llm

In [49]:
# Run
chain.invoke({"context":docs,"question":query})



'Current Asset Value Calculator'

In [26]:
from langchain import hub
prompt_hub_rag = hub.pull("rlm/rag-prompt")
prompt_hub_rag



ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])