### Asking simple question without providing an history or context

In [2]:
import langchain
from langchain_together import Together
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

In [3]:
llm = Together(
    model="META-LLAMA/LLAMA-2-7B-CHAT-HF",
    together_api_key="24cdbdf50106e08f6ba3328ac07f97a73eb440ae36da6cdd72f9b091ccca850a"
)

model = ChatOpenAI(
    base_url="https://api.together.xyz/v1",
    api_key="24cdbdf50106e08f6ba3328ac07f97a73eb440ae36da6cdd72f9b091ccca850a",
    model="META-LLAMA/LLAMA-2-7B-CHAT-HF",)
    # model="mistralai/Mixtral-8x7B-Instruct-v0.1",)

prompt = ChatPromptTemplate.from_template("tell me a short joke about {topic}")

output_parser = StrOutputParser()

chain = prompt | model | output_parser
chain.invoke({"topic": "ice cream"})

'  Why did the ice cream cone go to the party?\n\nBecause it was a scream!'

In [12]:
x = model.invoke("how to do prompt engineering in llms")
print(x.content)

  Prompt engineering is a technique used in language learning models (LLMs) to generate high-quality responses to given prompts or inputs. Here are some general steps to perform prompt engineering in LLMs:

1. Define the task: Determine the task or problem you want the LLM to perform. This could be generating text, answers to questions, or even completing a task.
2. Design the prompt: Create a prompt that accurately reflects the task you want the LLM to perform. This prompt should be clear, concise, and unambiguous.
3. Train the LLM: Train the LLM on a large dataset of examples that include the prompt and the desired output. The more diverse and extensive the training data, the better the LLM will be at generating high-quality responses.
4. Evaluate the LLM: Once the LLM has been trained, evaluate its performance on a separate test set to determine how well it can generate high-quality responses to the prompts. You can use metrics such as perplexity, accuracy, or ROUGE score to evaluat

In [23]:
prompt_value = prompt.invoke({"topic": "ice cream"})
# prompt_value.to_messages()
# prompt_value.to_string()
prompt_value

ChatPromptValue(messages=[HumanMessage(content='tell me a short joke about ice cream')])

In [24]:
message = model.invoke(prompt_value)
print(message)

content='  Why did the ice cream cone go to the party?\n\nBecause he was feeling cold and wanted to get scooped up!' response_metadata={'token_usage': {'completion_tokens': 31, 'prompt_tokens': 19, 'total_tokens': 50}, 'model_name': 'META-LLAMA/LLAMA-2-7B-CHAT-HF', 'system_fingerprint': None, 'finish_reason': 'eos', 'logprobs': None} id='run-60409bfe-5639-4847-9492-0ef9fade80b5-0'


In [25]:
output_parser.invoke(message)

'  Why did the ice cream cone go to the party?\n\nBecause he was feeling cold and wanted to get scooped up!'

### Chatting with a model where some context is also provided

In [26]:
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_community.embeddings import OllamaEmbeddings

vectorstore = DocArrayInMemorySearch.from_texts(
    ["harrison worked at kensho", "bears like to eat honey"],
    embedding = OllamaEmbeddings(),
)
retriever = vectorstore.as_retriever()

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
output_parser = StrOutputParser()

setup_and_retrieval = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
)
chain = setup_and_retrieval | prompt | model | output_parser

chain.invoke("where did harrison work?")

'  Based on the provided context, the answer to the question "where did Harrison work?" is:\n\nKensho'

In [31]:
retriever.invoke("where did harrison work?")

[Document(page_content='bears like to eat honey'),
 Document(page_content='harrison worked at kensho')]

In [2]:
from langchain_openai import ChatOpenAI
from langchain_together import Together
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


prompt = ChatPromptTemplate.from_template(
    "Tell me a short joke about {topic}"
)
output_parser = StrOutputParser()
model = ChatOpenAI(
    base_url="https://api.together.xyz/v1",
    api_key="24cdbdf50106e08f6ba3328ac07f97a73eb440ae36da6cdd72f9b091ccca850a",
    model="META-LLAMA/LLAMA-2-7B-CHAT-HF",)
    # model="mistralai/Mixtral-8x7B-Instruct-v0.1",)

chain = (
    {"topic": RunnablePassthrough()} 
    | prompt
    | model
    | output_parser
)

chain.invoke("ice cream")

'  Why did the ice cream cone go to the party?\n\nBecause it was a scream!'

In [3]:
for chunk in chain.stream("ice cream"):
    print(chunk, end="", flush=True)

  Why did the ice cream cone go to the party?

Because it was feeling a little "scooped" up!

In [35]:
chain.batch(["ice cream", "spaghetti", "dumplings"])

['  Why did the ice cream cone go to the party?\n\nBecause it was a scream!',
 '  Why did the spaghetti refuse to get dressed?\n\nBecause it already had a saucy attitude!',
 '  Why did the dumpling go to the party?\n\nBecause it was a filling good time!']

In [36]:
await chain.ainvoke("ice cream")

'  Why did the ice cream cone go to the party? Because it was feeling sprinkled!'