In [1]:
import os
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQAWithSourcesChain
from huggingface_hub import notebook_login
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain import HuggingFacePipeline
from langchain.text_splitter import CharacterTextSplitter
import textwrap
import sys
import torch

In [3]:
loader = UnstructuredFileLoader(r'D:\nu_QA_data\Nuvoton-MCU-PSG.pdf')
documents = loader.load()

In [None]:
#print(sys.getsizeof(documents[0]))
#print(documents[0])

In [4]:
text_splitter=CharacterTextSplitter(separator='\n', chunk_size=1000, chunk_overlap=50)
text_chunks=text_splitter.split_documents(documents)

In [5]:
#embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',model_kwargs={'device': 'cuda'})
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',model_kwargs={'device': 'cpu'})

In [6]:
vectorstore=FAISS.from_documents(text_chunks, embeddings)

In [2]:
# If you want to download the llm model first time
#from huggingface_hub import login
#login(token = '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')
#tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")

# From local location
tokenizer = AutoTokenizer.from_pretrained(r'C:\Users\USER\Desktop\llma\text-generation-webui\models\Llama-2-7b-chat-hf')

In [3]:
model = AutoModelForCausalLM.from_pretrained(r'C:\Users\USER\Desktop\llma\text-generation-webui\models\Llama-2-7b-chat-hf', device_map='auto',
                       torch_dtype=torch.float16,
                       use_auth_token=True,
                       #load_in_8bit=True,
                        #load_in_4bit=True
                       )



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [9]:
pipe = pipeline("text-generation",
         model=model,
         tokenizer= tokenizer,
         torch_dtype=torch.bfloat16,
         device_map="auto",
         max_new_tokens = 1024,
         do_sample=True,
         top_k=10,
         num_return_sequences=1,
         eos_token_id=tokenizer.eos_token_id
         )

In [10]:
llm=HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature':0})
chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", return_source_documents=True, retriever=vectorstore.as_retriever())

In [13]:
#query = "What are the key features of M460 Series"
#query = "What is the operating voltage of M031G"
query = "What is the architecture of NuMicro M23"
result=chain({"query": query}, return_only_outputs=True)
wrapped_text = textwrap.fill(result['result'], width=500)
wrapped_text

' The NuMicro M23 microcontroller series is based on the Arm Cortex-M23 core with TrustZone for Armv8-M architecture, which divides memory and peripherals into secure and non-secure worlds to achieve data integrity, firmware update, and operation security. Additionally, the M2351 series also provides high-performance connectivity peripheral interfaces such as UART, SPI, I²C, GPIOs, USB, and ISO 7816-3 for smart card readers.'

## Custom Prompts
- Can't use Llama-2-7b-chat

In [14]:
from langchain.prompts import PromptTemplate

prompt_template = """Use the following pieces of context answering the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Answer in Italian:"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

chain_type_kwargs = {"prompt": PROMPT}

In [15]:
chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", return_source_documents=True, retriever=vectorstore.as_retriever(), chain_type_kwargs=chain_type_kwargs)

In [17]:
query = "What is the architecture of NuMicro M23"
result=chain({"query": query}, return_only_outputs=True)
wrapped_text = textwrap.fill(result['result'], width=500)
wrapped_text

" La NuMicro M23 è basata sull'architettura Arm Cortex-M23 e utilizza TrustZone per Armv8-M. Questa tecnologia consente di dividere i memoria e i periferiche in due mondi separati: un mondo sicuro e un mondo non sicuro. Ciò consente di garantire la integrità dei dati, l'aggiornamento di firmware e la sicurezza dell'operazione. Inoltre, TrustZone for Armv8-M fornisce il beneficio di switchare tra i due mondi in modo veloce e più efficiente. La NuMicro M2351 è basata sull'architettura Arm\nCortex-M23 e utilizza TrustZone per Armv8-M. Questa tecnologia consente di dividere i memoria e i periferiche in due mondi separati: un mondo sicuro e un mondo non sicuro. Ciò consente di garantire la integrità dei dati, l'aggiornamento di firmware e la sicurezza dell'operazione. Inoltre, TrustZone for Armv8-M fornisce il beneficio di switchare tra i due mondi in modo veloce e più efficiente. La NuMicro MA35 è basata sull'architettura Arm Cortex-A35 e utilizza TrustZone per Armv8-A. Questa\ntecnologia 