In [1]:
from langchain.text_splitter import Language
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser
import os

from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQAWithSourcesChain
from huggingface_hub import notebook_login
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain import HuggingFacePipeline
from langchain.text_splitter import CharacterTextSplitter
import textwrap
import sys
import torch

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from langchain.chains import LLMChain
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI

from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory, ConversationSummaryMemory

from langchain.embeddings import OpenAIEmbeddings

import APIKEY

## RetrievalQA & Chat

In [2]:

os.environ["OPENAI_API_KEY"] = APIKEY.API_KEY_SERVICE_OPENAI
llm = ChatOpenAI(temperature=0, model='gpt-4-1106-preview') # model="gpt-3.5-turbo"
embeddings = OpenAIEmbeddings()

# FAISS

In [8]:
#input_doc_pth = r'D:\nu_QA_data\m460bsp_Library_StdDriver_headers_1000'
#input_doc_pth = r'D:\nu_QA_data\m460bsp_StdDriver'
#input_doc_pth = r'D:\nu_QA_data\m2351bsp_1000'
#input_doc_pth = r'D:\nu_QA_data\m2351bsp_StdDriver_regs_1000'
#input_doc_pth = r'D:\nu_QA_data\m251bsp_StdDriver_1000'
input_doc_pth = r'D:\nu_QA_data\TRM_M463_M467_openai_pypdf'
# load embedding model
print("===== Load the embedding model =====")
#embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2',model_kwargs={'device': 'cpu'})
#embeddings = HuggingFaceInstructEmbeddings(model_name='BAAI/bge-small-en')

# Create vectors store
print("===== Build FAISS =====")
#vectorstore=FAISS.from_documents(texts, embeddings)
vectorstore=FAISS.load_local(input_doc_pth, embeddings)

===== Load the embedding model =====
===== Build FAISS =====


# LanceDB

In [3]:
import lancedb
from langchain.vectorstores import LanceDB

save_path = r'D:\nu_QA_data\lanceDB'
save_name = r'TRM_m460'

db = lancedb.connect(save_path)
table = db.open_table(save_name)
vectorstore = LanceDB(table, embeddings)

In [4]:
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever())
query = "M460 has how many UART? each functions have any difference? if different, list it."
qa.run(query)

'The M460 microcontroller has a total of 6 UARTs. Each UART has slightly different features and functions. Here is a list of the differences:\n\nUART0/UART1:\n- Supports 16-byte FIFOs with programmable level trigger\n- Supports auto flow control (nCTS and nRTS)\n- Supports IrDA (SIR) function\n- Supports LIN function on UART0 and UART1\n- Supports RS-485 9-bit mode and direction control\n- Supports wake-up function\n- Supports 8-bit receiver FIFO time-out detection function\n- Supports break error, frame error, parity error, and receive/transmit FIFO overflow detection function\n- Supports PDMA operation\n\nUART2/UART3/UART4/UART5:\n- Supports 4-byte FIFOs with programmable level trigger\n- Supports programmable guard time selection (11 ETU ~ 266 ETU)\n- Supports three sets of ISO-7816-3 compliant smart card interfaces (T=0, T=1)\n- Supports full-duplex UART function\n- Supports auto inverse convention function\n- Supports stop clock level and clock stop (clock keep) function\n- Suppor

In [4]:
from langchain.chains.question_answering import load_qa_chain
from langchain import PromptTemplate 
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate

# Prompt
#template="""Use the following pieces of context to answer the question at the end.
#Please answer with C Code function as complete as possible.
#If you don't know the answer or the question has nothing to do with code or programing, don't try to make up an answer.
#{context}
#Question: {question}
#Answer:"""

#template="""Use the following pieces of context to answer the question at the end. The context is standard driver C header files of M251 MCU. 
#Please answer with C Code function as complete as possible.
#If you don't know the answer or the question has nothing to do with code or programing, don't try to make up an answer.
#{context}
#Question: {question}
#Answer:"""

#template="""Use the following pieces of context to answer the question at the end. The context is standard driver C header files of M251 MCU. 
#Please use context C functions as much as possible to answer with C code.
#If you don't know the answer or the question has nothing to do with code or programing, don't try to make up an answer.
#{context}
#Question: {question}
#Answer:"""



#template="""Use the following pieces of context and chat history to answer the question at the end. The context is standard driver C header files of M251 MCU. 
#Please answer with C Code function as complete as possible.
#If you don't know the answer or the question has nothing to do with code or programing, don't try to make up an answer.
#{context}
#{chat_history}
#Question: {question}
#Answer:"""
#
#QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question", "chat_history"], template=template)





# Define the system message template
#system_template = """Use the following pieces of context and chat history to answer the question at the end. The context is standard driver C header files of M251 MCU. 
#Please answer with C Code function as complete as possible.
#If you don't know the answer or the question has nothing to do with code or programing, don't try to make up an answer.
#----------------
#{context}
#{chat_history}"""

system_template = """Use the following pieces of context and chat history to answer the question at the end. The context is Nuvoton M467 Series Technical Reference Manual.
        If you don't know the answer or the question has nothing to do with code or programing, don't try to make up an answer.
        ----------------
        {context}
        {chat_history}"""

# Create the chat prompt templates
messages = [
SystemMessagePromptTemplate.from_template(system_template),
HumanMessagePromptTemplate.from_template("{question}")
]

qa_prompt = ChatPromptTemplate.from_messages(messages)


## RetrievalQA

In [5]:
retriever_vec=vectorstore.as_retriever(
    search_type="similarity", # Also test "mmr"
    search_kwargs={"k": 8})

# Normal memory
memory = ConversationBufferMemory(memory_key="chat_history", input_key='question', output_key='answer', return_messages=True)
#memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
# Should save the tokens
#memory = ConversationSummaryMemory(llm=llm, memory_key="chat_history", return_messages=True)

chain = ConversationalRetrievalChain.from_llm(llm, retriever=retriever_vec, memory=memory,
                                             return_source_documents=True, 
                                             combine_docs_chain_kwargs={"prompt": qa_prompt}
                                             )

chat_history = []

In [6]:
#query = "Generate a C source code which initializes I2C0 to access slave ROM with byte write and byte read operation, and check if the read data is equal to the write data."
#query = "Generate a C source code which TIMER0 MODE is TIMER_ONESHOT_MODE, TIMER0_FREQ is 1000000, TIMER0_PRESCALE_VALUE is 5, TIMER0_CMP_VALUE is 0x5A5A5A"
#query = "Generate a C source code which open EBI with bank2, 16BIT width, and fast timing"
#query = "Generate a C source code to get data from UART, PDMA to memory address 0x20000000, and do CRC32 at 0x20000000"
#query = "Write a C code of ACMP comparing DAC output with ACMP1_P1"
#query = "Write a C code to set the BPWM0 channel 0 for capture function"
#query = "Write a C code to set the BPWM0 channel 0 to output waveform with frequency 240000Hz and duty 50%"

#query = "Hi my name is Gary"
query = "M460 has how many UART? each functions have any difference? if different, list it."
#query = "If I want to use multibytes write and multibytes read, how should I update this code?"

#result = chain({"question": query, "chat_history": chat_history})
result = chain({"question": query})
chat_history.extend([(query, result["answer"])])
#result = chain("Write a C code of ACMP comparing DAC output with ACMP1_P1")


In [7]:
print(result)

{'question': 'M460 has how many UART? each functions have any difference? if different, list it.', 'chat_history': [HumanMessage(content='M460 has how many UART? each functions have any difference? if different, list it.'), AIMessage(content='The Nuvoton M2354 Series, as described in the provided context, has six UART interfaces: UART0, UART1, UART2, UART3, UART4, and UART5. Each UART has different functionalities and support for various features. Here is a summary of the differences:\n\n1. **UART0/UART1:**\n   - FIFO: 16 Bytes\n   - Auto Flow Control (CTS/RTS): Supported\n   - IrDA: Supported\n   - LIN: Supported (Only UART0/UART1 with LIN function)\n   - RS-485 Function Mode: Supported\n   - nCTS Wake-up: Supported\n   - Incoming Data Wake-up: Supported\n   - Received Data threshold Wake-up: FIFO reached\n   - RS-485 Address Match (AAD mode) Wake-up: Supported\n   - Auto-Baud Rate Measurement: Supported\n   - STOP Bit Length: 1, 1.5, 2 bit\n   - Word Length: 5, 6, 7, 8 bits\n   - Eve

In [11]:
for i in range(len(result['source_documents'])):
    print(result['source_documents'][i].metadata)
    print(result['source_documents'][i].metadata['page']+1)

{'vector': array([-0.0215742 , -0.00730873, -0.01307151, ...,  0.01203628,
       -0.00247593, -0.02222295], dtype=float32), 'id': '2f0ccf0e-2418-4723-a34f-46b9011f4735', '_distance': 0.3796325922012329}


KeyError: 'page'

In [8]:
print(result['answer'])

The Nuvoton M2354 Series, as described in the provided context, has six UART interfaces: UART0, UART1, UART2, UART3, UART4, and UART5. Each UART has different functionalities and support for various features. Here is a summary of the differences:

1. **UART0/UART1:**
   - FIFO: 16 Bytes
   - Auto Flow Control (CTS/RTS): Supported
   - IrDA: Supported
   - LIN: Supported (Only UART0/UART1 with LIN function)
   - RS-485 Function Mode: Supported
   - nCTS Wake-up: Supported
   - Incoming Data Wake-up: Supported
   - Received Data threshold Wake-up: FIFO reached
   - RS-485 Address Match (AAD mode) Wake-up: Supported
   - Auto-Baud Rate Measurement: Supported
   - STOP Bit Length: 1, 1.5, 2 bit
   - Word Length: 5, 6, 7, 8 bits
   - Even/Odd Parity: Supported
   - Stick Bit: Supported

2. **UART2/UART3/UART4/UART5:**
   - FIFO: 16 Bytes
   - Auto Flow Control (CTS/RTS): Supported
   - IrDA: Supported (except UART5)
   - LIN: Not Supported
   - RS-485 Function Mode: Supported
   - nCTS Wake

In [90]:
print(result['answer'])

To update the code to use multibyte write and multibyte read operations, you can modify the `I2C_WriteByte` and `I2C_ReadByte` functions to use the `I2C_WriteMultiBytes` and `I2C_ReadMultiBytes` functions respectively. Here's the updated code:

```c
#include "M251.h"

#define I2C_PORT I2C0
#define SLAVE_ROM_ADDR 0x50

void I2C_Init(void)
{
    // Enable I2C0 clock
    CLK_EnableModuleClock(I2C0_MODULE);

    // Set I2C0 multi-function pins
    SYS->GPA_MFPL &= ~(SYS_GPA_MFPL_PA9MFP_Msk | SYS_GPA_MFPL_PA8MFP_Msk);
    SYS->GPA_MFPL |= (SYS_GPA_MFPL_PA9MFP_I2C0_SCL | SYS_GPA_MFPL_PA8MFP_I2C0_SDA);

    // Configure I2C0 as master, 100kHz
    I2C_Open(I2C_PORT, 100000);

    // Enable I2C0 interrupt
    I2C_EnableInt(I2C_PORT);
    NVIC_EnableIRQ(I2C0_IRQn);
}

void I2C_WriteMultiBytes(uint8_t data[], uint32_t length)
{
    // Send start condition, write address, and data
    I2C_START(I2C_PORT);
    I2C_WAIT_READY(I2C_PORT);
    I2C_SET_DATA(I2C_PORT, SLAVE_ROM_ADDR << 1);
    I2C_SET_CO

In [58]:
chain.memory

ConversationBufferMemory(chat_memory=ChatMessageHistory(messages=[HumanMessage(content='Hi my name is Gary'), AIMessage(content="Sorry, but I can't provide the answer you're looking for.")]), output_key='answer', input_key='question', return_messages=True, memory_key='chat_history')

In [49]:
chat_history

[('Hi my name is Gary', 'Hello Gary! How can I assist you today?'),
 ('What is my name', "I am an AI assistant and I don't have a name."),
 ('Whats my name',
  "I'm sorry, but I am an AI language model and I do not have the capability to know your name.")]

## Another choose load_qa_chain()

In [5]:

# Docs Retriever
#retriever=vectorstore.as_retriever(search_type="mmr", # Also test "similarity"
#                                   search_kwargs={"k": 8})
retriever=vectorstore.as_retriever(search_type="similarity", # Also test "similarity"
                                   search_kwargs={"k": 8})

# question & vector search
question = "Write a C code of ACMP comparing DAC output with ACMP1_P1"
docs = retriever.get_relevant_documents(question)

# Chain
chain = load_qa_chain(llm, chain_type="stuff", prompt=QA_CHAIN_PROMPT) 

In [None]:
result = chain({"input_documents": docs, "question": question}, return_only_outputs=True)

In [None]:
print(len(docs))
print(result['output_text'])

## ConversationSummaryMemory

In [None]:
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain

memory = ConversationSummaryMemory(llm=llm,memory_key="chat_history",return_messages=True)
qa = ConversationalRetrievalChain.from_llm(llm, retriever=vectorstore.as_retriever(), memory=memory)

In [None]:
#query = "Write a C code of ACMP comparing DAC output with ACMP1_P1"
#query = "Write a C code of BMC data transfer with PDMA"
#query = "Write a C code function that reverse a string, input a string in parameter and return the reversed string"
query = "Give me a C code of configure BMC example"
result = qa(query)

In [None]:
print(result['answer'])