In [None]:
# https://www.youtube.com/watch?v=rIV1EseKwU4&ab_channel=AIAnytime

In [None]:
!pip install chromadb
!pip install -q torch transformers langchain sentence_transformers
!pip install -q accelerate peft bitsandbytes transformers trl

In [None]:
import os
import torch
from transformers import (
  AutoTokenizer, 
  AutoModelForCausalLM, 
  BitsAndBytesConfig,
  pipeline
)
from transformers import BitsAndBytesConfig

In [None]:
#################################################################
# Tokenizer
#################################################################

model_name='mistralai/Mistral-7B-Instruct-v0.2'

model_config = transformers.AutoConfig.from_pretrained(
    model_name,
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
#################################################################
# bitsandbytes parameters
#################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

In [None]:
#################################################################
# Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

In [None]:
#################################################################
# Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)

In [None]:
#################################################################
# Base inference
#################################################################
prompt = tokenizer.encode_plus("[INST] Tell me about fantasy football? [/INST]", return_tensors="pt")['input_ids'].to('cuda')

generated_ids = model.generate(prompt, 
                               max_new_tokens=1000, 
                               do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)

## LangChain

In [None]:
from langchain.document_loaders import DirectoryLoader, PyPDFLoader, PDFMinerLoader

CHROMA_PATH = "chroma"
DATA_PATH = "data/books"

# Will load all the documents avaible in the path
def load_documents():
    for root, dirs, files in os.walk(DATA_PATH):
        for file in files:
            if file.endswith("*.pdf"):
                print(file)
                loader = PDFMinerLoader(os.path.join(root, file))
    
    documents = loader.load()
    return documents

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.chroma import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings

def split_text(documents):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=100,
        length_function=len,
        add_start_index=True
    )
    
    chunks = text_splitter.split_documents(documents)
    print(f"Split {len(documents)} documents into {len(chunks)} chunks.")

    # Cogemos un chunk aleatorio
    document = chunks[10]
    print(document.page_content) # Printeamos su contenido
    print(document.metadata) # Y su metadata (Fichero al que pertenece y donde empieza 'start_index')
    
    return chunks

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.chroma import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings
import shutil

def save_to_chroma(chunks):
    
    # Clear out the database first if already exists.
    if os.path.exists(CHROMA_PATH):
        shutil.rmtree(CHROMA_PATH)
    
    #embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2')
    embeddings = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")
    
    # Create a new DB from the documents.
    db = Chroma.from_documents(
        chunks, 
        embeddings, 
        persist_directory=CHROMA_PATH
    )
    
    db.persist()
    print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")

In [None]:
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate

def load_model():
    text_generation_pipeline = transformers.pipeline(
        model=model,
        tokenizer=tokenizer,
        task="text-generation",
        temperature=0.2,
        repetition_penalty=1.1,
        return_full_text=True,
        max_new_tokens=300
    )

    hf = HuggingFacePipeline(pipeline=text_generation_pipeline)
    return hf

In [None]:
from langchain.chains import RetrievalQA

def query_data():
    llm = load_model()
    embeddings = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")

    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)
    retriever = db.as_retriever()

    qa = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=True
    )
    
    return qa

In [None]:
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.chains import LLMChain

def query_data2():
    llm = load_model()
    embeddings = SentenceTransformerEmbeddings(model_name="all-mpnet-base-v2")

    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)
    retriever = db.as_retriever()

    prompt_template = """
    ### [INST] 
    Instruction: Answer the question based on your 
    fantasy football knowledge. Here is context to help:

    {context}

    ### QUESTION:
    {question} 

    [/INST]
     """
    
    promptTemplate = PromptTemplate(
        input_variables=["context", "question"],
        template=prompt_template
    )
    
    llm_chain = LLMChain(llm=llm, prompt=promptTemplate)

    # RunnablePassthrough para pasar la query al siguiente step en la chain
    rag_chain = ( 
     {"context": retriever, "question": RunnablePassthrough()}
        | llm_chain
    )

    return rag_chain

In [None]:
prompt_template = """
### [INST] Instrucción: Eres un experto en inteligecia artificial responda en español la pregunta según sus conocimientos de la siguiente página web. Aquí hay contexto para ayudar:

{context}

### PREGUNTA:
{question} (responde en castellano) [/INST]
 """

In [None]:
def generate_answer(query):
    qa = query_data()
    generated_text = qa(query)
    answer = generated_text['result']
    
    return answer, generated_text

In [None]:
def generate_answer2(query):
    
    rag_chain = query_data2()
    answer = rag_chain.invoke(query)
    
    return answer

In [None]:
answer['context']
answer['text']