In [None]:
# %pip -q install langchain huggingface_hub tiktoken
# %pip -q install chromadb
# %pip -q install PyPDF2 pypdf sentence_transformers
# %pip -q install --upgrade together
# %pip -q install -U FlagEmbedding

In [15]:
def create_embeddings(pdf):
    pdf_reader = PdfReader(pdf)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
        )        
    chunks = text_splitter.split_text(text)
    print("Number of chunks:", len(chunks))
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
    knowledge_base = FAISS.from_texts(chunks, embeddings)
    print("BD creada")
    return knowledge_base

from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

def get_prompt(instruction, new_system_prompt ):
    SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
    prompt_template =  B_INST + SYSTEM_PROMPT + instruction + E_INST
    return prompt_template

B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"

sys_prompt = """You are a helpful, respectful and honest assistant. Always answer as helpfully as possible using the context text provided. Your answers should only answer the question once and not have any text after the answer is done.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. 

If the question is not directly related to the provided context, politely inform the user that the question is outside the context scope and cannot be answered accurately.

Ensure that your answers are clear and concise, avoiding ambiguity or vague responses."""

instruction = """CONTEXT:/n/n {context}/n

Question: {question}"""

prompt_template = get_prompt(instruction, sys_prompt)

print(prompt_template)
llama_prompt = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)
chain_type_kwargs = {"prompt": llama_prompt}

import textwrap
def wrap_text_preserve_newlines(text, width=110):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')
    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)
    return wrapped_text

def process_llm_response(llm_response):
    print(wrap_text_preserve_newlines(llm_response['result']))

def modelo_llm(modelo):
    return TogetherLLM(
        model= modelo,
    )

[INST]<<SYS>>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible using the context text provided. Your answers should only answer the question once and not have any text after the answer is done.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. 

If the question is not directly related to the provided context, politely inform the user that the question is outside the context scope and cannot be answered accurately.

Ensure that your answers are clear and concise, avoiding ambiguity or vague responses.
<</SYS>>

CONTEXT:/n/n {context}/n

Question: {question}[/INST]


In [16]:
import together
from typing import Any
from langchain.llms.base import LLM
import os
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

os.environ["TOGETHER_API_KEY"] = "f8935229473a0d8a3f4709a9ef32533fe365c0cb215ba8c41413b5ca53a5c767"

class TogetherLLM(LLM):
    model: str = "togethercomputer/llama-2-7b-chat"
    together_api_key: str = os.environ["TOGETHER_API_KEY"]
    temperature: float = 0.1
    max_tokens: int = 1024
    class Config:
        extra = 'forbid'
    @property
    def _llm_type(self) -> str:
        return "together"
    def _call(self, prompt: str, **kwargs: Any) -> str:
        if not self.together_api_key:
            raise ValueError("API key is not set.")
        together.api_key = self.together_api_key
        output = together.Complete.create(prompt,
                                          model=self.model,
                                          max_tokens=self.max_tokens,
                                          temperature=self.temperature)
        print(f"modelo: {self.model}")
        if 'choices' in output:
            return output['choices'][0]['text']
        else:
            raise KeyError("The key 'choices' is not in the response.")

In [17]:
pdf_obj = "./prueba.pdf"
knowledge_base = create_embeddings(pdf_obj)
retriever = knowledge_base.as_retriever(search_kwargs={"k": 5})

Number of chunks: 98
BD creada


In [22]:
modelo = "togethercomputer/llama-2-70b-chat"
llm = modelo_llm(modelo)
qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                    chain_type="stuff",
                                    retriever=retriever,
                                    chain_type_kwargs=chain_type_kwargs,)
    
query = "What was found with the related research?"
llm_response = qa_chain.invoke(query)
process_llm_response(llm_response)

  output = together.Complete.create(prompt,


modelo: togethercomputer/llama-2-70b-chat
  The related research found that incorporating local feature view clustering for 3D object recognition can
easily incorporate other useful properties, such as color, motion, figure-ground discrimination, region shape
descriptors, and stereo depth cues. Additionally, the use of multidimensional histograms summarizing the
distribution of measurements within image regions may be particularly useful for recognition of textured
objects with deformable shapes. The local feature approach can also easily incorporate better approaches, such
as the model of biological vision proposed by Edelman, Intrator, and Poggio (1997), which allows for matching
and recognition of 3D objects from a range of viewpoints.
