In [1]:
!pip install -qU langchain langchain-core langchain-community langchain-experimental langchain-openai langchain-text-splitters langchain-huggingface sentence_transformers pypdf langchain_chroma

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m19.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m408.7/408.7 kB[0m [31m26.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m61.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.0/209.0 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00

In [2]:
from langchain_openai import AzureChatOpenAI
import os
from google.colab import userdata

In [None]:
os.environ["OPENAI_API_TYPE"] = "azure_ad"
os.environ["AZURE_OPENAI_ENDPOINT"] = ""
os.environ["AZURE_OPENAI_API_VERSION"] = "2024-05-01-preview"
os.environ["AZURE_OPENAI_API_KEY"] = ""
os.environ["AZURE_OPENAI_GPT4O_MODEL_NAME"] = "gpt-4o"
os.environ['HUGGINGFACEHUB_API_TOKEN'] = userdata.get('HUGGINGFACEHUB_API_TOKEN')

# Set Up LLM.

In [4]:
llm = AzureChatOpenAI(
            openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
            azure_deployment=os.environ["AZURE_OPENAI_GPT4O_MODEL_NAME"],
            temperature=1,
        )

# Load PDF

In [5]:
from langchain_community.document_loaders import PyPDFLoader

In [6]:
file_path = '/content/drive/MyDrive/Colab_Notebooks/GenerativeAI/RAG/docs/Wirebonding.pdf'

loader = PyPDFLoader(
    file_path=file_path
)

In [7]:
docs = loader.load()

# Chunk Docs

In [8]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [9]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 4000,
    chunk_overlap = 800,
    length_function = len,
    is_separator_regex=False

)

In [10]:
texts = text_splitter.split_documents(docs)

# Embedding model

In [11]:
from langchain_openai import AzureOpenAIEmbeddings

In [12]:
embeddings = AzureOpenAIEmbeddings(
    model="text-embedding-3-large",
)

In [13]:
embeddings_size = embeddings.embed_query("Hello World")

len(embeddings_size)

3072

# Vector Store

In [14]:
from langchain_chroma import Chroma

In [15]:
%rm -rf "/content/wirebonding_db"

In [16]:
persist_directory='wirebonding_db'

if os.path.exists(persist_directory):
    # Load from disk
    db = Chroma(
        persist_directory=persist_directory,
        embedding_function=embeddings
    )
else:
  # Save to disk.
  db = Chroma.from_documents(
      documents=texts,
      embedding=embeddings,
      persist_directory=persist_directory
  )

In [17]:
vectorstores_retriever = db.as_retriever(search_kwargs={"k": 3})

In [18]:
retrieved_docs = vectorstores_retriever.invoke("Wire bond forms?")

len(retrieved_docs)

3

In [19]:
retrieved_docs

[Document(metadata={'page': 0, 'source': '/content/drive/MyDrive/Colab_Notebooks/GenerativeAI/RAG/docs/Wirebonding.pdf'}, page_content='Wire Bond / Ball Shear Application Note \n \nWhat is wirebonding  \nWirebonding  \nWirebonding is an electrical interconnection technique using thin wire and a combination \nof heat, pressure and/or ultrasonic energy. Wire bonding is a solid phase welding process, \nwhere the two metallic materials (wire and pad surface) are brought into intimate contact. \nOnce the surfaces are in intimate contact, electron sharing or interdiffusion of atoms takes \nplace, resulting in the formation of wirebond. In wirebonding process, bonding force can \nlead to material deformation, breaking up contamination layer and smoothing out surface \nasperity, which can be enhanced by the application of ultrasonic energy. Heat can \naccelerate Interatomic diffusion, thus the bond formation.  \nWirebonding processes  \nWirebonding process begins by firmly attaching the backsi

In [20]:
print(retrieved_docs[0].page_content)

Wire Bond / Ball Shear Application Note 
 
What is wirebonding  
Wirebonding  
Wirebonding is an electrical interconnection technique using thin wire and a combination 
of heat, pressure and/or ultrasonic energy. Wire bonding is a solid phase welding process, 
where the two metallic materials (wire and pad surface) are brought into intimate contact. 
Once the surfaces are in intimate contact, electron sharing or interdiffusion of atoms takes 
place, resulting in the formation of wirebond. In wirebonding process, bonding force can 
lead to material deformation, breaking up contamination layer and smoothing out surface 
asperity, which can be enhanced by the application of ultrasonic energy. Heat can 
accelerate Interatomic diffusion, thus the bond formation.  
Wirebonding processes  
Wirebonding process begins by firmly attaching the backside of a chip to a chip carrier 
using either an organic conductive adhesive or a solder (Die Attach). The wires then are 
welded using a special bond

# Prompting

In [21]:
from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

In [22]:
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

In [23]:
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [24]:
history_aware_retriever = create_history_aware_retriever(
    llm,
    vectorstores_retriever,
    contextualize_q_prompt
)

In [25]:
system_prompt = '''
  You are a friendly local guide who is an expert on Wire Bond and Ball Shear Application and is fluent in Hindi, Tamil, Gujarati and English, with a conversational, human approach.

  Audience:
  Your intended audience consists of factory workers. So you always converse in a way they can understand

  Your Task:
  Identify the language of the user’s question. Then, respond in the identifed language only, keeping your answer natural and conversational, just as people speak.
  Use English terms as needed to enhance clarity and keep the response engaging, rather than formal or strictly academic.


  Knowledge Scope:
  Use the following pieces of context to answer the user question.
  {context}

  If you don't know the answer, just say that you don't know, don't try to make up an answer.
  Use three sentences maximum and keep the answer as concise as possible.


  Question: {input}

  Guidelines for Response:
  * If the identified language is in English, then you must respond back in English.
  * You can use English terms as needed to enhance clarity and keep the response engaging, rather than formal or strictly academic.
  * Answer only from the provided context. Donot make up an answer.
  * You are not a translator, instead you a expert multilingual agent.

Output Format:
  When you receive an input, first output the detected language, and then provide the response in the following format:
    Detected language: Response
  Example:
    English: In the ultrasonic wirebonding process, low pressure and ultrasonic energy are used, with a typical temperature of 25°C, and it uses Gold or Aluminium wires. In the thermosonic wirebonding process, it requires low pressure as well, but involves a higher temperature range of 100-150°C along with ultrasonic energy and primarily uses Gold wires.
'''

In [26]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

# Chaining

In [27]:
question_answer_chain = create_stuff_documents_chain(
    llm,
    qa_prompt
    )



In [28]:
rag_chain = create_retrieval_chain(
    history_aware_retriever,
    question_answer_chain
    )

# Run Chain

In [29]:
from langchain_core.messages import HumanMessage, AIMessage

In [30]:
chat_history = []

In [31]:
user_input = "What is the difference between ultrasonic and thermosonic wirebonding process??"

response = rag_chain.invoke({
    "input" : user_input,
    "chat_history" : chat_history,

})

response['answer']

'English: In the ultrasonic wirebonding process, low pressure and ultrasonic energy are used, with a typical temperature of 25°C, and it uses Gold or Aluminium wires. In the thermosonic wirebonding process, it requires low pressure as well, but involves a higher temperature range of 100-150°C along with ultrasonic energy and primarily uses Gold wires.'

In [32]:
chat_history.extend([
    HumanMessage(content=user_input),
    AIMessage(content=response['answer'])
])

In [33]:
user_input = "Capillaries என்றால் என்ன?"
response = rag_chain.invoke({
    "input" : user_input,
    "chat_history" : chat_history,

})

response['answer']

'Tamil: Capillaries என்பது wire bonding செயல்முறையில் பயன்படுத்தப்படும் கரெய்மிக் கருவிகள் ஆகும், அரசுச்சக உறுப்புகளுடன் செங்குத்து ஊட்டுவரிசைகள் கொண்டு உள்ளன.'

In [34]:
chat_history.extend([
    HumanMessage(content=user_input),
    AIMessage(content=response['answer'])
])

In [35]:
user_input = "मुझे thermosonic bonding के बारे में बताएं??"

response = rag_chain.invoke({
    "input" : user_input,
    "chat_history" : chat_history,

})

response['answer']

'Hindi: Thermosonic bonding में low pressure और ultrasonic energy का उपयोग होता है, और तापमान 100-150°C के बीच रखा जाता है। इस प्रक्रिया में मुख्य रूप से गोल्ड (Gold) वायर का उपयोग होता है। इसे thermosonic कहा जाता है क्योंकि इसमें heat और ultrasonic energy दोनों का प्रयोग होता है।'

In [36]:
user_input = "મને Thermosonic bonding વિશે કહો?"

response = rag_chain.invoke({
    "input" : user_input,
    "chat_history" : chat_history,

})

response['answer']

'Gujarati: Thermosonic bonding માં નીચા દબાણ અને અલ્ટ્રાસોનીક એનર્જી સાથે 100-150°C ના તાપમાને ગોલ્ડ વાયર નો ઉપયોગ થાય છે. DLI Thermosonic માટે તાપમાન 100-240°C પણ હોઇ શકે છે.'

In [37]:
import re

# Input string
input_text = response['answer']

# Regular expression to split text before and after the colon
match = re.match(r"([^:]+):\s*(.*)", input_text)

if match:
    language = match.group(1)  # Text before the colon
    result = match.group(2)    # Text after the colon

    print(f"Language = {language}")
    print(f"result = {result}")
else:
    print("No match found")


Language = Gujarati
result = Thermosonic bonding માં નીચા દબાણ અને અલ્ટ્રાસોનીક એનર્જી સાથે 100-150°C ના તાપમાને ગોલ્ડ વાયર નો ઉપયોગ થાય છે. DLI Thermosonic માટે તાપમાન 100-240°C પણ હોઇ શકે છે.
