## Load Model from HugginFace(Local) to langchain

[link to hugginface](https://huggingface.co/docs/hub/index)

In [2]:
from langchain import HuggingFacePipeline

In [11]:
MAX_LEN=1024

In [None]:
# model_args = {
#     'model_id':"stabilityai/stablelm-tuned-alpha-3b",
#     'task':'text-generation'
# }
model_args = {
    # 'model_id':"declare-lab/flan-alpaca-gpt4-xl",
    'model_id':'declare-lab/flan-alpaca-large',
    'task':"text2text-generation"
}
# model_args = {
#     'model_id':'google/flan-t5-xl',
#     'task':'text2text-generation'
# }
# model_args = {
#     'model_id':'lmsys/fastchat-t5-3b-v1.0',
#     'task':'text2text-generation'
# }
# model_args = {
#     'model_id':'BlinkDL/rwkv-4-raven',
#     'task':'text2text
# }
# model_args = {
#     'model_id':'TheBloke/stable-vicuna-13B-HF',
#     'task':'text-generation'
# }

In [None]:
llm = HuggingFacePipeline.from_model_id(
    **model_args,
    model_kwargs={"temperature":0, "max_length":MAX_LEN},
    device=0
)

## Import model to Langchain with prompts, define the chain

further explainations of the chain could be found [here](https://python.langchain.com/en/latest/modules/chains/getting_started.html)

In [None]:
from langchain import PromptTemplate, LLMChain

In [None]:
template = """Question: {question}

Answer: Let's think step by step."""
prompt = PromptTemplate(template=template, input_variables=["question"])
llm_chain = LLMChain(prompt=prompt, llm=llm)

question = "Who won the FIFA World Cup in the year 1994? "

# call llm_chain with only reply (meaning the reply is in the form of a string)
print(llm_chain.run(question))

# call llm_chain (return will be a dictionary)
# llm_chain(question)

##　Add memory to Chain

* step 1: Set a prompt template to require memory for each input
* step 2: Declare a memory
* step 3: Add memory to Chain

note that the `verbose=True` is used for debugging and checking what the model is outputing

In [None]:
from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory

In [None]:
chatbot_template = """

{chat_history}
Human: {human_input}
Chatbot:"""

chatbot_prompt = PromptTemplate(
    input_variables=["chat_history", "human_input"], 
    template=chatbot_template
)
memory = ConversationBufferMemory(memory_key="chat_history")

In [None]:
# conversation = ConversationChain(
#     llm=chat,
#     memory=ConversationBufferMemory()
# )

conversation = LLMChain(
    prompt=chatbot_prompt,
    llm=llm,
    memory=memory,
    verbose=True
)

## Chatbot 

In [None]:
template = """You are a musician that is very good at the guitar. You also do not have many friends and will be frightened to talk to a stranger. This is a conversation between you and a person you just met. Do not be rude and also it is ok for you to not know the answer of the question.
You normally repeat the same thing twice if you are not familiar to the person you are talking to.

{history}
Human: {human_input}
Assistant:"""

prompt = PromptTemplate(
    input_variables=["history", "human_input"], 
    template=template
)


chatgpt_chain = LLMChain(
    llm=llm, 
    prompt=prompt, 
    verbose=True, 
    memory=ConversationBufferWindowMemory(k=3),
)

output = chatgpt_chain.predict(human_input="Hi, nice to meet you. Can you introduce yourself?")
print(output)

In [None]:
while True:
    human_input=input()
    if human_input=='break':
        break
    output = chatgpt_chain.predict(human_input=human_input)
    print(output)

## Ideas for LangChain

1. GAN with langchain
2. Khanmigo
3. Agents for playing games (try to represent the game state in prompt)

In [None]:
template = """Your task is to be a brainstorming partner and provide creative ideas and suggestions for a given topic or problem.\
Your response should include original, unique, and relevant ideas that could help solve the problem\
or further explore the topic in an interesting way.\
Please note that your response should also take into account any specific requirements or constraints of the task.

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do

Begin! Remember to speak as in an interesting way.

Human: {human_input}
Assistant:"""

In [None]:
template = """Answer the following questions as best you can using the following format:

Format:

Question: the question you need to answer.
Thought: one idea that can be done to answer the question.
Action: Which of the ideas is best to solve the question
Observation: what is predicted to happen after taking the action.
Final Answer: the final answer to the original input question

Begin! Remember to speak as creative as possible when giving your final answer.

Question: {human_input}"""

In [None]:
prompt = PromptTemplate(
    input_variables=["human_input"], 
    template=template
)


chatgpt_chain = LLMChain(
    llm=llm, 
    prompt=prompt, 
    verbose=True, 
    # memory=ConversationBufferWindowMemory(k=3),
)

output = chatgpt_chain.predict(human_input="please write a function of bubble sort in python")
print(output)

In [None]:
output = chatgpt_chain.predict(human_input="when did shakespear die")
print(output)

In [None]:
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser, load_tools
from langchain.prompts import StringPromptTemplate
from langchain import LLMChain, SerpAPIWrapper, OpenAI
from typing import List, Union
from langchain.schema import AgentAction, AgentFinish
import re

In [28]:
import os
os.environ["SERPAPI_API_KEY"] = ""


In [None]:
llm_openai = OpenAI(temperature=0)

In [None]:
llm_openai("aifejaofjao")

In [None]:
search = SerpAPIWrapper()
tools = [
    Tool(
        name = "Search",
        func=search.run,
        description="useful for when you need to answer questions about current events"
    )
]

In [None]:
# Set up the base template
template = """Answer the following questions as best you can, but speaking as a pirate might speak. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin! Remember to speak as a pirate when giving your final answer. Use lots of "Arg"s

Question: {input}
{agent_scratchpad}"""

In [None]:
# Set up a prompt template
class CustomPromptTemplate(StringPromptTemplate):
    # The template to use
    template: str
    # The list of tools available
    tools: List[Tool]
    
    def format(self, **kwargs) -> str:
        # Get the intermediate steps (AgentAction, Observation tuples)
        # Format them in a particular way
        intermediate_steps = kwargs.pop("intermediate_steps")
        thoughts = ""
        for action, observation in intermediate_steps:
            thoughts += action.log
            thoughts += f"\nObservation: {observation}\nThought: "
        # Set the agent_scratchpad variable to that value
        kwargs["agent_scratchpad"] = thoughts
        # Create a tools variable from the list of tools provided
        kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
        # Create a list of tool names for the tools provided
        kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
        return self.template.format(**kwargs)

In [None]:
class CustomOutputParser(AgentOutputParser):
    
    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
        # Check if agent should finish
        if "Final Answer:" in llm_output:
            return AgentFinish(
                # Return values is generally always a dictionary with a single `output` key
                # It is not recommended to try anything else at the moment :)
                return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
                log=llm_output,
            )
        # Parse out the action and action input
        regex = r"Action\s*\d*\s*:(.*?)\nAction\s*\d*\s*Input\s*\d*\s*:[\s]*(.*)"
        match = re.search(regex, llm_output, re.DOTALL)
        if not match:
            raise ValueError(f"Could not parse LLM output: `{llm_output}`")
        action = match.group(1).strip()
        action_input = match.group(2)
        # Return the action and action input
        return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)

In [None]:
prompt = CustomPromptTemplate(
    template=template,
    tools=tools,
    # This omits the `agent_scratchpad`, `tools`, and `tool_names` variables because those are generated dynamically
    # This includes the `intermediate_steps` variable because that is needed
    input_variables=["input", "intermediate_steps"]
)

In [None]:
# LLM chain consisting of the LLM and a prompt
llm_chain = LLMChain(llm=llm_openai, prompt=prompt)

In [None]:
output_parser = CustomOutputParser()

In [None]:
tool_names = [tool.name for tool in tools]
agent = LLMSingleActionAgent(
    llm_chain=llm_chain, 
    output_parser=output_parser,
    stop=["\nObservation:"], 
    allowed_tools=tool_names
)

In [None]:
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)

In [None]:
agent_executor.run("How many people live in canada as of 2023?")

In [None]:
llm

## Documents

In [3]:
from langchain.document_loaders import PyMuPDFLoader
from langchain.document_loaders import DirectoryLoader

In [4]:
root_dir = "pdfs"

In [6]:
# For Single PDF Loader
# loader = pyMuPDFLoader("{pdf_file.pdf}")
loader = DirectoryLoader(f'{root_dir}/', glob="./*.pdf", loader_cls=PyMuPDFLoader)
documents = loader.load()

In [15]:
len(documents)

24

In [8]:
from langchain.schema import Document
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

In [None]:
# from langchain.indexes import VectorstoreIndexCreator

In [9]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA

In [12]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=MAX_LEN, 
    chunk_overlap=0
)

texts = text_splitter.split_documents(documents)

In [13]:
len(texts)

105

In [17]:
import pickle
import faiss
from langchain.vectorstores import FAISS

## PDF with OpenAI

In [37]:
import os

pdf_folder_path = 'pdfs'
print(os.listdir(pdf_folder_path))

['attention_is_all_you_need.pdf', 'kg.pdf']


In [32]:
os.environ["OPENAI_API_KEY"] = "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"

In [26]:
from langchain.document_loaders import PyPDFDirectoryLoader
loader = PyPDFDirectoryLoader(pdf_folder_path)
docs = loader.load()

In [None]:
llm = OpenAI(
    model = 
)

In [35]:
from langchain.document_loaders import PyPDFLoader 
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.llms import OpenAI

embeddings = OpenAIEmbeddings()
vectordb = FAISS.from_documents(docs, embedding=embeddings)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
pdf_qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0.8) , vectordb.as_retriever(), memory=memory)

In [36]:
query = "what is an attention?"
result = pdf_qa({"question": query})
print("Answer:")
result["answer"]

Answer:


' Attention is a mechanism used in neural networks to determine how much each input should be weighted in order to contribute to the output. It is commonly used in sequence transduction models, such as recurrent and convolutional neural networks, to connect the encoder and decoder. In the Transformer model, attention is used solely, dispensing with recurrence and convolutions.'

In [18]:
def store_embeddings(docs, embeddings, sotre_name, path):
    
    vectorStore = FAISS.from_documents(docs, embeddings)

    with open(f"{path}/faiss_{sotre_name}.pkl", "wb") as f:
        pickle.dump(vectorStore, f)

In [19]:
def load_embeddings(sotre_name, path):
    with open(f"{path}/faiss_{sotre_name}.pkl", "rb") as f:
        VectorStore = pickle.load(f)
    return VectorStore

In [22]:
from langchain.embeddings import HuggingFaceInstructEmbeddings

instructor_embeddings = HuggingFaceInstructEmbeddings(
    model_name="hkunlp/instructor-xl", 
    model_kwargs={"device": "cuda"}
)

ValueError: Dependencies for InstructorEmbedding not found.

In [None]:
Embedding_store_path = f"{root_dir}/Embedding_store"

In [None]:
db_instructEmbedd = FAISS.from_documents(texts, instructor_embeddings)

In [None]:
retriever = db_instructEmbedd.as_retriever(search_kwargs={"k": 5})

In [None]:
retriever.search_type

In [None]:
retriever.search_kwargs

In [None]:
docs = retriever.get_relevant_documents("Who are the authors of this report?")

In [None]:
docs[0]

### Testing Model

In [None]:
from langchain.prompts import PromptTemplate

In [None]:
## Cite sources
import textwrap

def wrap_text_preserve_newlines(text, width=110):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text

def process_llm_response(llm_response):
    print(wrap_text_preserve_newlines(llm_response['result']))
    print('\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

### Stuff Chain Type
> A stuff chain means that all the data are used at the same time to make only one api call. 

In [None]:
prompt_template = """Use the following pieces of context to answer the question at the end. \
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

chain_type_kwargs = {"prompt": PROMPT}

In [None]:
qa_chain_instrucEmbed = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever,
    return_source_documents=True
)

In [None]:
query = 'What is an attention network'

print('-------------------Instructor Embeddings------------------\n')
llm_response = qa_chain_instrucEmbed(query)
process_llm_response(llm_response)

In [None]:
query = 'Please summarize these documents breifly'

print('-------------------Instructor Embeddings------------------\n')
llm_response = qa_chain_instrucEmbed(query)
process_llm_response(llm_response)

### Map_reduce type
> The map_reduce type is a type that for each document, an api call is made to summarize each segment. Then a final api call is to summarize all the summarys.
This is useful when input data is large but needs to make multiple api calls in order to get the answer.

In [None]:
qa_chain_instrucEmbed = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="map_reduce", 
    retriever=retriever,
    return_source_documents=True
)

In [None]:
query = 'What is an attention network'

print('-------------------Instructor Embeddings------------------\n')
llm_response = qa_chain_instrucEmbed(query)
process_llm_response(llm_response)

In [None]:
query = 'Please summarize these documents briefly'

print('-------------------Instructor Embeddings------------------\n')
llm_response = qa_chain_instrucEmbed(query)
process_llm_response(llm_response)

### Refine Chain
> The refine chain is similar to the map_reduce chain, but summaries are done sequentially. The summary of the first document will be an input to the second document, then the summary of the second document is generated. This proccess is repeat until the end of the documents

In [None]:
qa_chain_instrucEmbed = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="refine", 
    retriever=retriever,
    return_source_documents=True
)

In [None]:
query = 'What is an attention network'

print('-------------------Instructor Embeddings------------------\n')
llm_response = qa_chain_instrucEmbed(query)
process_llm_response(llm_response)

In [None]:
query = 'Please summarize these documents briefly'

print('-------------------Instructor Embeddings------------------\n')
llm_response = qa_chain_instrucEmbed(query)
process_llm_response(llm_response)

## Split Texts using LLM

In [None]:
type(texts)

In [None]:
len(texts)

In [None]:
texts[0]

In [None]:
db_instructEmbedd

In [None]:
from langchain.chains.question_answering import load_qa_chain

In [None]:
prompt_template = """Given the following content, how would you split the content into two seperate parts.
{context}

Answer using the following format:

Part 1:
Part 2:"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context"]
)

chain = load_qa_chain(
    llm,
    chain_type="stuff",
    prompt=PROMPT
)

chain({"input_documents": [texts[10]]}, return_only_outputs=True)

In [None]:
texts[1]

In [None]:
llm("how do you make money in the stock market")