# LLMs and Chat Models

In [None]:
# text-davinci-003
from langchain.llms.openai import OpenAI
# gpt-r.5-turbo
from langchain.chat_models import ChatOpenAI
llm = OpenAI()
chat = ChatOpenAI()

# predict just one string, message
a = llm.predict("How many planets are there")
b = chat.predict("How many planets are there")
a,b

# Predict Messages

In [None]:
from langchain.chat_models import ChatOpenAI
chat = ChatOpenAI(temperature=0.1)

In [None]:
# predict list of messages
# message constructors
# 맥락 및 조건 설정 가능, 작성한 것들 memory에 추가, 
from langchain.schema import HumanMessage, AIMessage, SystemMessage

messages = [
    SystemMessage(
        content = "You are a georgraphy expoert. And you only reply in Inalian.",
    ),
    AIMessage(content= "Ciao, mi chiamo Paolo"),
    HumanMessage(
        content = "What is the distance between Mexico and Thailand. Also, what isyour name?",
    ),
]
chat.predict_messages(messages)

# Prompt Template

In [None]:
# 템플릿은 일종의 validation 기능을 해서 쓸대없는 문장을 소비하지 않게 만들어줌
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate, ChatPromptTemplate
chat = ChatOpenAI(temperature=0.1)

template = PromptTemplate.from_template(
    "What is the distance between {country_a} and {country_b}."
)
prompt = template.format(country_a = "Mexico",country_b = "Thailand")
chat.predict(prompt)


In [None]:
template = ChatPromptTemplate.from_messages(
    [
        ('system',"You are a georgraphy expoert. And you only reply in {language}."),
        ("ai","Ciao, mi chiamo {name}"),
        ("human","What is the distance between {country_a} and {country_b}. Also, what is your name?")
    ]
)
prompt = template.format_messages(
    language = 'Greek',
    name = "Socrates",
    country_a = "Mexico",
    country_b = "Thailand"

)
chat.predict_messages(prompt)

# Output Parser and LangChainExpressionLanguage(LCEL)

In [None]:
# LangChain Expression Language
from langchain.schema import BaseOutputParser

# LLM 응답을 변형해야 할 때 사용하기 위해서
class CommaOutputParser(BaseOutputParser):
    def parse(self,text):
        items = text.strip().split(",")
        return list(map(str.strip,items))
p = CommaOutputParser()
p.parse("Hello,how, are, you")
    

In [None]:
from langchain.prompts import ChatPromptTemplate
template = ChatPromptTemplate.from_messages([
    ('system', "You are a list generating machine. Everything you are asked will be answered with a comma seperated list of max {max_items}. Do NOT reply with anything else"),
    ("human","{question}")
]
)
prompt = template.format_messages(max_items = 10,question = "What are the planets?")
result = chat.predict_messages(prompt)
p = CommaOutputParser()
p.parse(result.content)

In [None]:
prompt = template.format_messages(max_items = 10, question = "What are the colors")
result = chat.predict_messages(prompt)
p = CommaOutputParser()
p.parse(result.content)

- 체인을 이용하면 각각의 predict 결과를 연결해서 사용할 수 있음
- R의 chain 같은 느낌
```
chain1 = template1 | chat | outputparser1
chain2 = template2 | chat | outputparser2
chain_sum = chain1 | chain2 | outputparser_sum
```

In [None]:
# 코드가 너무 김
# 기존 작업을 위해 작성한 코드: Chat model, 파서 생성, 템플릿 생성, message format, predict, 파서 호출, 파싱 
template = ChatPromptTemplate.from_messages([
    ('system', "You are a list generating machine. Everything you are asked will be answered with a comma seperated list of max {max_items}. Do NOT reply with anything else"),
    ("human","{question}")
]
)
# chain 사용
chain = template | chat | CommaOutputParser()
chain.invoke({
    "max_items": 5,
    "question": "What are the poketmons?"
})

# Chaining Chains
- [invoke 설명](https://python.langchain.com/docs/expression_language/interface)

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler
# Streaming은 모델의 응답이 생성되는걸 보게 해줌, 
chat = ChatOpenAI(
    temperature=0.1,streaming=True,callbacks=[StreamingStdOutCallbackHandler()]
    )


chef_template = ChatPromptTemplate.from_messages([
    ("system","You are a world-class international chef. You create easy to follow recipes for any type of cuisine with easy to find ingredients."),
    ("human","I want to cook {cuisine} food")
])

chef_chain = chef_template | chat 

In [None]:
veg_chef_prompt = ChatPromptTemplate.from_messages([
    ("system","You are a vegetereian chef specialized on making traditional recipes vegetarian. You find alternative ingredients and explain their preparation. You don't radically modify the recipe. If there is no alternative for a food just say you don't know how to replace it."),
    "human","{recipe}"
])
veg_chain = veg_chef_prompt | chat


final_chain = {"recipe": chef_chain} | veg_chain


final_chain.invoke({
    "cuisine":"indian"
})


# [Module format](https://python.langchain.com/docs/modules/)
- Model I/O
    - Model Input & Output
    - prompts, language model, output parser


- Retrieval
    - 외부데이터를 모델에 어떻게 적용하느냐

- Chains

- Memory

- Agents


# Fewshot Prompt Template
- prompt template을 디스크에 저장하고 로드할 수 있기 때문에 필요함

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler

chat = ChatOpenAI(
    temperature = 0.1,
    streaming = True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
        ],
)

t = PromptTemplate(
    template = "What is the capital of {country}",
    input_variables=["country"],
)
t.format(country = "France")

t= PromptTemplate.from_template("What is the capital of {country}")
t.format(country = "France")


In [None]:
examples = [
    {
        "question": "What do you know about France?",
        "answer": """
        Here is what I know:
        Capital: Paris
        Language: French
        Food: Wine and Cheese
        Currency: Euro
        """,
    },
    {
        "question": "What do you know about Italy?",
        "answer": """
        I know this:
        Capital: Rome
        Language: Italian
        Food: Pizza and Pasta
        Currency: Euro
        """,
    },
    {
        "question": "What do you know about Greece?",
        "answer": """
        I know this:
        Capital: Athens
        Language: Greek
        Food: Souvlaki and Feta Cheese
        Currency: Euro
        """,
    },
]

chat = ChatOpenAI(
    temperature = 0.1,
    streaming = True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
        ],
)

example_prompt =  PromptTemplate.from_template("Human:{question}\nAI:{answer}")

# suffix 는 사용자의 질문
prompt = FewShotPromptTemplate(
    example_prompt= example_prompt,
    examples = examples,
    suffix = "Human: What do you know about {country}?",
    input_variables = ["country"],
    )


# chat.predict(prompt.format(country = 'Germany'))
chain = prompt | chat
chain.invoke({
    "country": "Turkey"
})

# Few Shot Chat Message Prompt Template

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.prompts.few_shot import FewShotChatMessagePromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.prompts import ChatPromptTemplate

chat = ChatOpenAI(
    temperature = 0.1,
    streaming = True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
        ],
)

examples = [
    {
        "country": "France",
        "answer": """
        Here is what I know:
        Capital: Paris
        Language: French
        Food: Wine and Cheese
        Currency: Euro
        """,
    },
    {
        "country": "Italy",
        "answer": """
        I know this:
        Capital: Rome
        Language: Italian
        Food: Pizza and Pasta
        Currency: Euro
        """,
    },
    {
        "country": "Greece",
        "answer": """
        I know this:
        Capital: Athens
        Language: Greek
        Food: Souvlaki and Feta Cheese
        Currency: Euro
        """,
    },
]

# example prompt는 example과 key값이 일치해야함
example_prompt =  ChatPromptTemplate.from_messages(
    [
    ("human", "What do you know about {country}?"),
    ("ai","{answer}"),
    ]
)

example_prompt = FewShotChatMessagePromptTemplate(
    example_prompt= example_prompt,
    examples = examples,
    )

final_prompt = ChatPromptTemplate.from_messages(
    [
    ("system"," You are a geography expert, you gives short answers"),
    example_prompt,
    ("human","What do you know about {country}?"),
    ],
)

chain = final_prompt | chat
chain.invoke({
    "country": "THailand"
})

In [None]:
print(final_prompt.format(country = "Germany"))

# Length Based Example Selector


In [None]:
from typing import Any, Dict, List
from langchain.chat_models import ChatOpenAI
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.prompts.prompt import PromptTemplate
from langchain.prompts.example_selector import LengthBasedExampleSelector
from langchain.prompts.example_selector.base import BaseExampleSelector


        


chat = ChatOpenAI(
    temperature = 0.1,
    streaming = True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
        ],
)

examples = [
    {
        "question": "What do you know about France?",
        "answer": """
        Here is what I know:
        Capital: Paris
        Language: French
        Food: Wine and Cheese
        Currency: Euro
        """,
    },
    {
        "question": "What do you know about Italy?",
        "answer": """
        I know this:
        Capital: Rome
        Language: Italian
        Food: Pizza and Pasta
        Currency: Euro
        """,
    },
    {
        "question": "What do you know about Greece?",
        "answer": """
        I know this:
        Capital: Athens
        Language: Greek
        Food: Souvlaki and Feta Cheese
        Currency: Euro
        """,
    },
]

class RandomExampleSelector(BaseExampleSelector):

    def __init__(self,examples):
        self.examples = examples

    def add_example(self, example):
        self.examples.append(example)
    
    def select_examples(self, input_variables):
        from random import choice
        return [choice(self.examples)]

    
example_prompt =  PromptTemplate.from_template("Human:{question}\nAI:{answer}")

# example_selector = LengthBasedExampleSelector(
#     examples = examples,
#     example_prompt = example_prompt,
#     max_length = 80)

example_selector = RandomExampleSelector(
    examples = examples,
)

prompt = FewShotPromptTemplate(
    example_prompt= example_prompt,
    example_selector= example_selector,
    suffix = "Human: What do you know about {country}?",
    input_variables = ["country"],
    )

prompt.format(country='Brazil')

# Serialize
- 디스크에서 프롬프트를 가져오는 법
- 이를 통해 prompt를 별도로 저장 가능함


In [None]:
from langchain.prompts import load_prompt

prompt = load_prompt("./prompt.json")
prompt = load_prompt("./prompt.yaml")

prompt.format(country = "Germany")


In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.prompts import PromptTemplate
from langchain.prompts.pipeline import PipelinePromptTemplate
chat = ChatOpenAI(
    temperature = 0.1,
    streaming= True,
    callbacks=[
        StreamingStdOutCallbackHandler(),
    ]
)

intro = PromptTemplate.from_template(
    """
    You are a role playing assistant.
    And you are impersonating a {character}
"""
)

example = PromptTemplate.from_template(
    """
    This is an example of how you talk:

    Human: {example_question}
    You: {example_answer}
"""
)

start = PromptTemplate.from_template(
    """
    Start now!

    Human: {question}
    You:
"""
)

final = PromptTemplate.from_template(
    """
    {intro}
                                     
    {example}
                              
    {start}
"""
)


prompts = [
    ("intro", intro),
    ("example", example),
    ("start", start)
]


full_prompt = PipelinePromptTemplate(
    final_prompt = final,
    pipeline_prompts= prompts
)

full_prompt.format(
    character = "Pirate",
    example_question = "What is your location",
    example_answer = "Arggg! That is a secret! Arg arg!!",
    question = "What is your fav food?"
)

chain  =  full_prompt | chat
chain.invoke({
    "character" : "Pirate",
    "example_question" : "What is your location",
    "example_answer" : "Arggg! That is a secret! Arg arg!!",
    "question" : "What is your fav food?"
})

# Caching
- LM의 응답을 저장할 수 있음

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import StreamingStdOutCallbackHandler
from langchain.globals import set_llm_cache, set_debug
from langchain.cache import InMemoryCache, SQLiteCache

set_llm_cache(InMemoryCache())
# 자동으로 db를 만들며 저장함 third party를 통해 다양한 방법 가능
set_llm_cache(SQLiteCache("cache.db"))
set_debug(True) # 세부 정보 다 볼 수 있음

chat = ChatOpenAI(
    temperature = 0.1,
    # streaming= True,
    # callbacks=[
    #     StreamingStdOutCallbackHandler(),
    # ]
)
chat.predict("How do you make Italian pasta?")

In [None]:
# 속도가 달라지는 것을 볼 수 있음
chat.predict("How do you make Italian pasta?")


# Serialize and Calculate Cost

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.callbacks import get_openai_callback
chat = ChatOpenAI(
    temperature= 0.1,
)

with get_openai_callback() as usage:
    a = chat.predict("whar is the recipe for soju")
    b = chat.predict("whar is the recipe for bread")
    print(a,b,"\n")
    print(usage)





In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.llms.openai import OpenAI
from langchain.llms.loading import load_llm
chat = OpenAI(
    temperature=0.1,
    max_tokens=450,
    model = "gpt-3.5-turbo"
)
# 특별하게 세팅한 모델 저장 & 호출 가능
chat.save("model.json")

call_chat = load_llm("model.json")
call_chat

# Memory
- 챗봇의 기본
- openai api는 메모리 기능을 포함하지 않음

### Conversation Buffer Memory
- 이전 모든 대화를 저장 -> 대화 내용이 길어질수록 저장
- text completion에 사용함
- 5 종류의 메모리가 있는데 이는 같은 API를 공유함 (save_context,load_memory_variables)
- return_messages 는 chat을 사용할 때 사용

In [None]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(return_messages=True)

memory.save_context({"inpuy":"Hi"},{"output":"How are you?"})
memory.load_memory_variables({})

In [None]:
memory.save_context({"inpuy":"Hi"},{"output":"How are you?"})
memory.load_memory_variables({})

### Conversation Buffer Window Memory
- 대화의 특정 부분만 저장하는 방법 (특정 범위)
- 최근 대화만 집중한다는 단점

In [None]:
from langchain.memory import ConversationBufferWindowMemory
memory = ConversationBufferWindowMemory(
    return_messages = True,
    k=4
)

def add_message(input,output):
    memory.save_context({"input":input},{"output":output})
add_message(1,1)
add_message(2,2)
add_message(3,3)
add_message(4,4)
memory.load_memory_variables({})


In [None]:
add_message(5,5)
memory.load_memory_variables({})

### Conversation Summary Memory
- llm을 사용하는 memory
- conversation을 요약해서 저장
 

In [None]:
from langchain.memory import ConversationSummaryMemory
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0.1)
memory = ConversationSummaryMemory(llm=llm)

def add_message(input,output):
    memory.save_context({"input":input},{"output":output})

def get_history():
    return memory.load_memory_variables({})


In [None]:
add_message("Hi, my name is Donghyun Ann, I live in South Korea.","Wow that is so cool")
add_message("South Korea is so pretty.","I wish I could go!")


In [None]:
get_history()

### Conversation Summary Buffer Memory
- 메모리에 보내온 메시지의 수를 저장
- limit에 도달하면 오래된 메시지는 요약해서 저장


In [None]:
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0.1)
memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit = 100,
    return_messages= True,
    )

def add_message(input,output):
    memory.save_context({"input":input},{"output":output})

def get_history():
    return memory.load_memory_variables({})

add_message("Hi, my name is Donghyun Ann, I live in South Korea.","Wow that is so cool")
add_message("South Korea is so pretty.","I wish I could go!")

In [None]:
get_history()


In [None]:
add_message("How far is Korea form Argentina.","I don't know! Super far!")


In [None]:
get_history()


### Conversation Kknowledge Graph Memory
- Knowledge Graph를 만들어냄 = 가장 중요한 내용만 뽑아낸 요약본

In [None]:
from langchain.memory import ConversationKGMemory
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(temperature=0.1)
memory = ConversationKGMemory(
    llm=llm,
    return_messages= True,
    )

def add_message(input,output):
    memory.save_context({"input":input},{"output":output})

# def get_history():
#     return memory.load_memory_variables({})

add_message("Hi, my name is Donghyun. I live in South Korea.","Wow that is so cool")
# add_message("South Korea is so pretty.","I wish I could go!")

In [None]:
memory.load_memory_variables({"input":"Who is Donghyun?"})

In [None]:
add_message("Donghyun likes Kimchi","Wow that is so cool")
memory.load_memory_variables({"input":"Who is Donghyun?"})


## Memory on LLMChain
- template 안에 memory를 집어넣는 법

In [None]:
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
llm = ChatOpenAI(temperature=0.1)
memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=50,
    memory_key="chat_history"
)

template = """

    You are a helpful AI talking to a human.

    {chat_history}
    Human:{question}

"""

chain = LLMChain(
    llm = llm,
    memory = memory,
    prompt = PromptTemplate.from_template(template),
    verbose = True
)

chain.predict(question = "My name is Danny")

In [None]:
chain.predict(question = "I live in Seongnam")

In [None]:
chain.predict(question = "What is my name?")


### Chat Based Memory

In [None]:
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate, ChatPromptTemplate,MessagesPlaceholder
llm = ChatOpenAI(temperature=0.1)
memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=50,
    memory_key="chat_history",
    return_messages= True
)


prompt = ChatPromptTemplate.from_messages(
    [
    ("system","You are a helpful AI talking to a human."),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human","{question}"),
    ]
)

chain = LLMChain(
    llm = llm,
    memory = memory,
    prompt = prompt,
    verbose = True
)

chain.predict(question = "My name is Danny")

In [None]:
chain.predict(question = "I live in Seongnam")


In [None]:
chain.predict(question = "What is my name?")

### LCEL Based Memory

In [None]:
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chat_models import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from langchain.prompts import PromptTemplate, ChatPromptTemplate,MessagesPlaceholder

llm = ChatOpenAI(temperature=0.1)
memory = ConversationSummaryBufferMemory(
    llm=llm,
    max_token_limit=50,
    memory_key="history", #default
    return_messages= True
)

prompt = ChatPromptTemplate.from_messages(
    [
    ("system","You are a helpful AI talking to a human."),
    MessagesPlaceholder(variable_name="history"),
    ("human","{question}"),
    ]
)

def load_memory(input):
    print(input)
    return memory.load_memory_variables({})["history"]

chain = RunnablePassthrough.assign(history = load_memory) | prompt | llm


def invoke_chain(question):
    result= chain.invoke(
        {
        "question": question
        }
    )
    memory.save_context(
        {"input":question},
        {"output":result.content},
        )
    print(result)

In [None]:
invoke_chain("My name is Donghyun")

In [None]:
invoke_chain("What is my name?")


# RAG

## [Retrieval](https://python.langchain.com/docs/modules/data_connection/)
### Loader: 다양한 로더가 있고 3rd party 도 있음 -> slack ,telegram, twitter 등



In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader

loader = TextLoader("./files/chapter_one.txt")

loader.load()

In [None]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("./files/chapter_one.pdf")

loader.load()

In [None]:
# 파워포인트 html pdf 이미지 등등 다 가능함
from langchain.document_loaders import UnstructuredFileLoader

# 에러가 날 경우  설치가 필요함
# import nltk
# nltk.download('punkt')
# nltk.download('averaged_perceptron_tagger')

loader = UnstructuredFileLoader("./files/chapter_one.docx")
loader.load()

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter()
loader = UnstructuredFileLoader("./files/chapter_one.docx")

docs = loader.load()
splitter.split_documents(docs)

In [None]:
loader.load_and_split(text_splitter=splitter)

### Transform: 문서 분할
- 임베딩, 저장을 위해 필요한 파일의 부분들을 전달해줘야 함
- Text Splitter
- [openai tokenizer](https://platform.openai.com/tokenizer)와 같이 토크나이저를 splitter로 사용할 수 있음

In [None]:
# 더 작은 단위로 자르는 방법
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(  
    chunk_size = 600,
    chunk_overlap =100,   # 문장 분할할때 앞부분 조금 가져옴
)
loader = UnstructuredFileLoader("./files/chapter_one.docx")
len(loader.load_and_split(text_splitter=splitter))


In [None]:

# 다른 splitter
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
splitter = CharacterTextSplitter(  
    separator =  "\n",   # 특정 문자열을 기준으로 자르는것
    chunk_size = 600,
    chunk_overlap =100,   # 문장 분할할때 앞부분 조금 가져옴
)
loader = UnstructuredFileLoader("./files/chapter_one.docx")
len(loader.load_and_split(text_splitter=splitter))


In [None]:
# 다른 splitter
from langchain.text_splitter import CharacterTextSplitter
splitter = CharacterTextSplitter(  
    separator =  "\n",   # 특정 문자열을 기준으로 자르는것
    chunk_size = 600,
    chunk_overlap =100,   # 문장 분할할때 앞부분 조금 가져옴
    length_function = len,  # 길이를 측정하는 방식을 함수로 정의할 수 있음
)
loader = UnstructuredFileLoader("./files/chapter_one.docx")
len(loader.load_and_split(text_splitter=splitter))


In [None]:
# tiktoken 사용
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
splitter = CharacterTextSplitter.from_tiktoken_encoder(  
    separator =  "\n",   # 특정 문자열을 기준으로 자르는것
    chunk_size = 600,
    chunk_overlap = 100,   # 문장 분할할때 앞부분 조금 가져옴
)
loader = UnstructuredFileLoader("./files/chapter_one.docx")
loader.load_and_split(text_splitter=splitter)

## Embedding
- Text -> numbers (vector)
- [임베딩 확인](https://turbomaze.github.io/word2vecjson/)
- [LLM 작동원리](https://youtu.be/2eWuYf-aZE4?si=Kl1QF8_j-f683V3c)
- embedding을 저장하고 캐싱해서 사용하는게 효율적임, 매번 embedding을 하는 것은 효율적이지 않음
- 클라우드 환경이 아닌 로컬에서 사용하는 것을 Chroma를 Store로 사용할 예정

In [None]:
from langchain.embeddings import OpenAIEmbeddings

embedder = OpenAIEmbeddings()

vector = embedder.embed_query("Hi")
len(vector)

In [None]:
vector = embedder.embed_documents([
    "Hi",
    "how",
    "are",
    "you longer sentences"])
len(vector),len(vector[0])

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100
)

loader = UnstructuredFileLoader("./files/chapter_one.docx")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings,cache_dir
)
vectorstore = Chroma.from_documents(docs,cached_embeddings)

In [None]:
results = vectorstore.similarity_search("where does winston live")

In [None]:
results

# [Document Chain](https://python.langchain.com/docs/modules/chains/document/)
- stuff: 관련된 doc을 한번에 입력해서 질문
- refine: 각각의 doc에 대해 질문하면서 질문을 개선
- map reduce: doc에 대해 각각 요약 후 llm에 전달
- map rerank: 각 doc에 대해 질문 후 대답에 대한 점수 -> 가장 높은 점수를 채택

In [167]:
# stuff
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA

llm =ChatOpenAI()

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100
)

loader = UnstructuredFileLoader("./files/chapter_one.docx")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings,cache_dir
)
vectorstore = Chroma.from_documents(docs,cached_embeddings)

chain = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type="stuff",
    retriever = vectorstore.as_retriever(),
)

chain.run("WHere does Winston live?")



'Winston Smith lives in Victory Mansions.'

In [170]:
# refine
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA

llm =ChatOpenAI()

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100
)

loader = UnstructuredFileLoader("./files/chapter_one.docx")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings,cache_dir
)

vectorstore = FAISS.from_documents(docs,cached_embeddings)

chain = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type="refine",
    retriever = vectorstore.as_retriever(),
)

chain.run("WHere does Winston live?")



'Based on the additional context provided, Winston Smith lives in Victory Mansions, an apartment complex, which is located within the dystopian society depicted in George Orwell\'s novel, "1984."'

In [176]:
# LCEL을 사용한 stuff chain (RetrievalQA 사용X)
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough

llm =ChatOpenAI(
    temperature=0.1
)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100
)

loader = UnstructuredFileLoader("./files/chapter_one.docx")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(
    embeddings,cache_dir
)

vectorstore = FAISS.from_documents(docs,cached_embeddings)

retriver = vectorstore.as_retriever()
prompt = ChatPromptTemplate.from_messages([
    ("system","You are a helpful assistant. Answer questions using only the following context. If you don't know the answer just say you don't know, don't make it up:\n\n{context}"),
    ("human","{question}")
])

# RunnablePassthrough : 입력한 값 집어넣기
chain = {"context":retriver,"question":RunnablePassthrough()}|prompt | llm
chain.invoke("Describe Vioctory Mansions")



AIMessage(content='Victory Mansions is a building where Winston Smith resides. It is described as having glass doors at the entrance, which allow gritty dust to enter. The hallway of Victory Mansions has a smell of boiled cabbage and old rag mats. There is a large colored poster on one end of the hallway, depicting the face of a man in his forties with a black mustache. The building has seven floors, and the flat where Winston lives is on the seventh floor. The flat is accessed by stairs since the lift is rarely working. The building is not well-maintained, with rotting houses and patched windows. From the roof of Victory Mansions, one can see the other three buildings that house the Ministries of Truth, Peace, and Love.')

In [188]:
# Map Reduce Chain

from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import FAISS
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda

llm = ChatOpenAI(temperature=0.1)

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n", chunk_size=600, chunk_overlap=100
)

loader = UnstructuredFileLoader("./files/chapter_one.docx")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = FAISS.from_documents(docs, cached_embeddings)

retriever = vectorstore.as_retriever()


map_doc_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
        Use the following portion of a long document to see if any of the text is relevant to answer the question. Return any relevant text verbatim.
        -------
        {context}
        """,
        ),
        ("human", "{question}"),
    ]
)

map_doc_chain = map_doc_prompt | llm

# def map_docs(inputs):
#     print(inputs)
#     documents = inputs['documents']
#     question = input["question"]
#     results = []
#     for document in documents:
#         result = map_doc_chain.invoke(
#             {"context": document.page_content,"question": question}
#         )
#         results.append(result)
#     results = "\n\n".join(results)
#     return results


def map_docs(inputs):
    documents = inputs["documents"]
    question = inputs["question"]
    return "\n\n".join(
        map_doc_chain.invoke(
            {"context": doc.page_content, "question": question}
        ).content
        for doc in documents
    )


map_chain = {
    "documents": retriever,
    "question": RunnablePassthrough(),
} | RunnableLambda(map_docs)


final_prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
        Given the following extracted parts of a long document and a question, create a final answer. 
        If you don't know the answer, just say that you don't know. Don't try to make up an answer.
        ------
        {context}
        """,
        ),
        ("human", "{question}"),
    ]
)

chain = {"context": map_chain, "question": RunnablePassthrough()} | final_prompt | llm

chain.invoke("How many ministries are mentioned")

AIMessage(content='In the given portion of the text, there is no mention of any ministries.')

# [Streamlit 사용](https://docs.streamlit.io/get-started/tutorials/create-an-app)
- streamlit이 설치되어있는 환경을 활성화
- 명령어 입력 ```streamlit run pythonfile```
- local host 접속 후 실행 코드 내 수정하면서 변경사항 실시간으로 확인
- ```streamlit.write``` 라는 강력한 함숮도 있지만 그냥 객체를 쓰는걸로 바로 같은 효과를 볼 수 있음
```python
# 같은 결과
streamlit.write([1,2,3])
[1,2,3]
```
- 페이지 내 옵션이 재설정 될 때마다 전체가 새롭게 활성화됨 (flutter, Reactjs와는 다름)


In [None]:
import streamlit as st
from langchain.prompts import PromptTemplate
from datetime import datetime

# 제목
st.title("Hello world!")
# 부제목
st.subheader("welcome to streamlit!")

# 사이드바
with st.sidebar:
    st.title("Sidebar Title")
    st.text_input("XXX")
# 탭
tab_one, tab_two, tab_three = st.tabs(["A","B","C"])
with tab_one:
    st.write("a")
with tab_two:
    st.write("b")
with tab_three:
    st.write("c")


# 마크다운 입력
st.markdown(
    """
    #### I love it!!!
"""
)

# streamlit.write 실습
l = [1, 2, 3, 4]
st.write(l)
l
d = {"x": 1}
st.write(d)
d
p = PromptTemplate.from_template("xxxx")
st.write(p)
p
today = datetime.today().strftime("%H:%M:%S")
st.write(today)


# 선택 박스
st.selectbox("Choose your model", ("GPT-3", "GPT-4"))

# text_input
name = st.text_input("What is your name?")
name

# slidebar
value = st.slider("temperature",min_value = 0.1, max_value = 1.0)
value

In [None]:
# 대화창 만들기 예시 (그냥 보여주기)
with st.chat_message("human"):
    st.write("Hello")

with st.chat_message("ai"):
    st.write("How are you")

with st.status("Embedding file...",expanded=True) as status:
    time.sleep(3)
    st.write("Getting the file")
    time.sleep(3)
    st.write("Embedding the file")
    time.sleep(3)
    st.write("Caching the file")
    status.update(label="Error",state = "error")

st.chat_input("Send a message to the AI")

In [None]:
# Initialize the memory
if "messages" not in st.session_state:
    st.session_state["messages"] = []


# Send Message Function
def send_message(message, role, save=True):
    with st.chat_message(role):
        st.write(message)
    if save:
        st.session_state["messages"].append({"message": message, "role": role})

# display messages (history)
for message in st.session_state["messages"]:
    send_message(message["message"], message["role"], save=False)

# 메시지 입력창
message = st.chat_input("Send a message to the AI")

# 메시지 입력
if message:
    send_message(message, "human")
    time.sleep(2)
    send_message(f"You said: {message}", "ai")

# 사이드바 로그
with st.sidebar:
    st.write(st.session_state)