In [2]:
import os

from pprint import pprint

# import torch
import torch
from torch import cuda, bfloat16

# import transformers
import transformers
from transformers import StoppingCriteria, StoppingCriteriaList

# import langchain
from langchain import PromptTemplate
from langchain.llms import HuggingFacePipeline
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
    )
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import (
    WikipediaLoader
    )
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain.prompts import ChatPromptTemplate

# Langsmith credentials

In [3]:
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = 'lc_yourkey'

In [4]:
# the model and tokenizer for the generation
LLM_MODEL="mistralai/Mistral-7B-Instruct-v0.2"
LLM_TOKENIZER="mistralai/Mistral-7B-Instruct-v0.2"
# placeholder for an embedding function
embedding_function=None
# set device to GPU if available
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# Load data (i.e. Wikipedia articles about Aristophanes)

In [5]:
# Load Document
loader = WikipediaLoader(query= "Aristophanes")

loader.requests_kwargs = {"verify": False}

document = loader.load()
pprint(document[0].page_content[:200])



  lis = BeautifulSoup(html).find_all('li')


('Aristophanes (; Ancient Greek: Ἀριστοφάνης, pronounced [aristopʰánɛːs]; '
 'c.\u2009446 – c.\u2009386 BC), son of Philippus and Zenodora, of the deme '
 'Kydathenaion (Latin: Cydathenaeum), was a comic playwright or co')


In [6]:
# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
splits = text_splitter.split_documents(document)
splits[:2]

[Document(page_content='Aristophanes (; Ancient Greek: Ἀριστοφάνης, pronounced [aristopʰánɛːs]; c.\u2009446 – c.\u2009386 BC), son of Philippus and Zenodora, of the deme Kydathenaion (Latin: Cydathenaeum), was a comic playwright or comedy-writer of ancient Athens and a poet of Old Attic Comedy. Eleven of his forty plays survive virtually complete. These provide the most valuable examples of a genre of comic drama known as Old Comedy and are used to define it, along with fragments from dozens of lost plays by Aristophanes and his', metadata={'title': 'Aristophanes', 'summary': 'Aristophanes (; Ancient Greek: Ἀριστοφάνης, pronounced [aristopʰánɛːs]; c.\u2009446 – c.\u2009386 BC), son of Philippus and Zenodora, of the deme Kydathenaion (Latin: Cydathenaeum), was a comic playwright or comedy-writer of ancient Athens and a poet of Old Attic Comedy. Eleven of his forty plays survive virtually complete. These provide the most valuable examples of a genre of comic drama known as Old Comedy and

# Create a Vector DB using the Chroma integration in Langchain

## Use a custom model as an embedding generator

In [7]:
EMBED_MODEL="all-MiniLM-L6-v2"

In [8]:
# load the model wrapped as an embedding function
embedding_function = SentenceTransformerEmbeddings(model_name=EMBED_MODEL)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
  return self.fget.__get__(instance, owner)()


## Create the Vector DB

In [9]:
# encode and load the data into Chroma
db = Chroma.from_documents(splits, embedding_function)

In [10]:
# test query
query = "Euripides"
res = db.similarity_search(query)
pprint(res)

[Document(page_content='Euripides (c.\u2009480 – c.\u2009406 BC) was a tragedian of classical Athens. Along with Aeschylus and Sophocles, he is one of the three ancient Greek tragedians for whom any plays have survived in full. Some ancient scholars attributed ninety-five plays to him, but the Suda says it was ninety-two at most. Of these, eighteen or nineteen have survived more or less complete (Rhesus is suspect). There are many fragments (some substantial) of most of his other plays. More of his plays have survived intact', metadata={'source': 'https://en.wikipedia.org/wiki/Euripides', 'summary': 'Euripides (c.\u2009480 – c.\u2009406 BC) was a tragedian of classical Athens. Along with Aeschylus and Sophocles, he is one of the three ancient Greek tragedians for whom any plays have survived in full. Some ancient scholars attributed ninety-five plays to him, but the Suda says it was ninety-two at most. Of these, eighteen or nineteen have survived more or less complete (Rhesus is suspec

# LLM COMPONENT FOR GENERATION

In [11]:
# load the model
model = transformers.AutoModelForCausalLM.from_pretrained(LLM_MODEL, trust_remote_code=True, torch_dtype=bfloat16)
# set it to evaluation mode
model.eval()
# assign it to the available device
model.to(device)

# load the tokenizer
tokenizer = transformers.AutoTokenizer.from_pretrained(LLM_MODEL)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

## Create a custom stopping criteria object

In [12]:
# mistral is trained to add "</s>" at the end of generations
stop_token_ids = tokenizer.convert_tokens_to_ids(["</s>"])

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
  def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
    for stop_id in stop_token_ids:
      if input_ids[0][-1] == stop_id:
        return True
    return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

## FINALIZE THE LLM

In [13]:
generate_text = transformers.pipeline(
    model=model, tokenizer=tokenizer,
    return_full_text=True, # langchain expects the full text
    task='text-generation', device=device, # we pass model parameters here too
    stopping_criteria=stopping_criteria, # without this model will ramble
    temperature=0.1, # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    top_p=0.15, # select from top tokens whose probability add up to 15%
    top_k=0, # select from top 0 tokens (because zero, relies on top_p)
    max_new_tokens=512, # max number of tokens to generate in the output
    repetition_penalty=1.1, # without this output begins repeating
    do_sample=True
    )

prompt = PromptTemplate(input_variables=["instruction"], template="{instruction}")
llm = HuggingFacePipeline(pipeline=generate_text)

# PUT EVERYTHING TOGETHER

## PROMPT

In [14]:
template = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If the answer is not presented, just say that you don't know.
Use three sentences maximum and keep the answer concise.
Question: {question}
Context: {context}
Answer:
"""
prompt = ChatPromptTemplate.from_template(template)

## CHAIN

In [15]:
# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# chain
rag_chain = (
    {"context": db.as_retriever(k=5) | format_docs,  "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# TEST IT

In [16]:
query = "Name the surviving Aristophanes' works and give a short description"

In [17]:
pprint(rag_chain.invoke(query))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


('Aristophanes is best known for his comedies. Some surviving works include '
 '"The Clouds," "Lysistrata," and "Frogs." In "The Clouds," Socrates is '
 'satirized as a teacher who corrupts young men. "Lysistrata" revolves around '
 'women withholding sex from their husbands to end the Peloponnesian War. '
 '"Frogs" features Dionysus traveling to the underworld to retrieve Euripides. '
 'Aristophanes also contributed to language studies, compiling lists of '
 'foreign words and unusual expressions.')
