In [None]:
!pip install -q langchain
!pip install -q torch
!pip install -q transformers
!pip install -q -U bitsandbytes
!pip install -q accelerate
!pip install -q sentence-transformers
!pip install -q datasets
!pip install -q faiss-cpu

In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline
from langchain import HuggingFacePipeline
from langchain import PromptTemplate
from langchain.chains import RetrievalQA

import os

In [None]:
pdf_folder_path = "./mom_rag/bandi/"
documents = []
for file in os.listdir(pdf_folder_path):
    if file.endswith('.pdf'):
        pdf_path = os.path.join(pdf_folder_path, file)
        loader = PyPDFLoader(pdf_path)
        documents.extend(loader.load())

In [None]:
print(documents[0].page_content)

In [None]:
splitter = RecursiveCharacterTextSplitter()
docs = splitter.split_documents(documents)

In [None]:
docs[2]

In [None]:
# Define the path to the pre-trained model you want to use
modelPath = "nickprock/sentence-bert-base-italian-uncased"

# Create a dictionary with model configuration options, specifying to use the CPU for computations
# use cpu or gpu
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_kwargs = {'device': device}

# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
encode_kwargs = {'normalize_embeddings': False}

# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,     # Provide the pre-trained model's path
    model_kwargs=model_kwargs, # Pass the model configuration options
    encode_kwargs=encode_kwargs # Pass the encoding options
)

In [None]:
db = FAISS.from_documents(docs, embeddings)

In [None]:
MODEL_NAME = "galatolo/cerbero-7b"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

# Senza quantizzazione servono 15GB di VRAM

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    trust_remote_code=True,
    device_map="auto",
    # offload_folder="offload"
    quantization_config=quantization_config
)

generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.0001
generation_config.top_p = 0.95
generation_config.do_sample = True
generation_config.repetition_penalty = 1.15

pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
    generation_config=generation_config,
)

llm = HuggingFacePipeline(
    pipeline=pipeline,
    )


In [None]:
prompt_template = """
Sei un esperto di bandi e concorsi pubblici. Hai memorizzato tutti i bandi mai creati.
Utilizza le regole nella tua memoria insieme a qualsiasi altro contesto che possiedi per rispondere alle domande sul bando.


{context}


[|Umano|] {question}
[|Assistente|]
"""

prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 2}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt},
)

In [None]:
input = "Sto indicendo un nuovo bando per software di CRM SALESFORCE. Crea un paragrafo oggetto della fornitura di questo nuovo bando"
result_ = chain(
    input
)
result = result_["result"].strip()

print(input)
print(result)