In [1]:
%pip install gradio --quiet
%pip install xformer --quiet
%pip install chromadb --quiet
%pip install langchain --quiet
%pip install accelerate --quiet
%pip install transformers --quiet
%pip install bitsandbytes --quiet
%pip install unstructured --quiet
%pip install sentence-transformers --quiet
%pip install unstructured[pdf]

Collecting antlr4-python3-runtime==4.9.3 (from unstructured[pdf])
  Downloading antlr4-python3-runtime-4.9.3.tar.gz (117 kB)
     ---------------------------------------- 0.0/117.0 kB ? eta -:--:--
     --- ------------------------------------ 10.2/117.0 kB ? eta -:--:--
     --------- --------------------------- 30.7/117.0 kB 325.1 kB/s eta 0:00:01
     --------- --------------------------- 30.7/117.0 kB 325.1 kB/s eta 0:00:01
     ------------ ------------------------ 41.0/117.0 kB 245.8 kB/s eta 0:00:01
     ------------------- ----------------- 61.4/117.0 kB 297.7 kB/s eta 0:00:01
     ---------------------- -------------- 71.7/117.0 kB 302.7 kB/s eta 0:00:01
     ------------------------------------ 117.0/117.0 kB 401.5 kB/s eta 0:00:00
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Installing backend dependencie

In [2]:
import torch
import gradio as gr

from textwrap import fill
from IPython.display import Markdown, display

from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
    )

from langchain import PromptTemplate
from langchain import HuggingFacePipeline
from langchain.vectorstores import Chroma
from langchain.schema import AIMessage, HumanMessage
from langchain.memory import ConversationBufferMemory
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredMarkdownLoader, UnstructuredURLLoader
from langchain.chains import LLMChain, SimpleSequentialChain, RetrievalQA, ConversationalRetrievalChain

from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline

import warnings
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
    MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"

    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
    )

    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
    tokenizer.pad_token = tokenizer.eos_token

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME, torch_dtype=torch.float16,
        trust_remote_code=True,
        device_map="auto",
        quantization_config=quantization_config
    )

    generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
    generation_config.max_new_tokens = 1024
    generation_config.temperature = 0.0001
    generation_config.top_p = 0.95
    generation_config.do_sample = True
    generation_config.repetition_penalty = 1.15

    pipeline = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        return_full_text=True,
        generation_config=generation_config,
    )

RuntimeError: No GPU found. A GPU is needed for quantization.

In [None]:
llm = HuggingFacePipeline(
    pipeline=pipeline,
    )

In [None]:
query = "Explain the difference between ChatGPT and open source LLMs in a couple of lines."
result = llm(
    query
)

display(Markdown(f"<b>{query}</b>"))
display(Markdown(f"<p>{result}</p>"))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<b>Explain the difference between ChatGPT and open source LLMs in a couple of lines.</b>

<p>
ChatGPT is a proprietary model developed by OpenAI, while open source LLMs are models that are made available for anyone to use, modify, and distribute under an open-source license.</p>

In [None]:
embeddings = HuggingFaceEmbeddings(
    model_name="thenlper/gte-large",
    model_kwargs={"device": "cuda"},
    encode_kwargs={"normalize_embeddings": True},
)

modules.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/67.9k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/619 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/670M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

In [None]:
template = """
[INST] <>
Act as a Machine Learning engineer who is teaching high school students.
<>

{text} [/INST]
"""

prompt = PromptTemplate(
    input_variables=["text"],
    template=template,
)

In [None]:
query = "Explain what are Deep Neural Networks in 2-3 sentences"
result = llm(prompt.format(text=query))

display(Markdown(f"<b>{query}</b>"))
display(Markdown(f"<p>{result}</p>"))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<b>Explain what are Deep Neural Networks in 2-3 sentences</b>

<p>Deep neural networks (DNN) are a type of artificial intelligence model that simulates the structure and function of the human brain. They consist of multiple layers of interconnected nodes, or neurons, that process information by passing it through successive layers until an output is generated. DNNs can be trained on large amounts of data to recognize patterns and make predictions, making them useful for tasks such as image recognition, speech recognition, and natural language processing.</p>

In [None]:
urls = [
    "https://smartnet.niua.org/sites/default/files/resources/Smart_Cities_Council_India_Readiness_Guide_v2016-02.pdf"
]

loader = UnstructuredURLLoader(urls=urls)
documents = loader.load()

len(documents)
# Output

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


1

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
texts_chunks = text_splitter.split_documents(documents)

len(texts_chunks)

3656

In [None]:
db = Chroma.from_documents(texts_chunks, embeddings, persist_directory="db")

In [None]:
template = """
[INST] <>
Act as an AI Assistant. Use the following information to answer the question at the end.
<>

{context}

{question} [/INST]
"""

prompt = PromptTemplate(template=template, input_variables=["context", "question"])

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 2}),
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt},
)

In [None]:
query = "What is Indus Valley Civilization"
result_ = qa_chain(
    query
)
result = result_["result"].strip()


display(Markdown(f"<b>{query}</b>"))
display(Markdown(f"<p>{result}</p>"))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<b>What is Indus Valley Civilization</b>

<p>The Indus Valley Civilization was a Bronze Age civilization that flourished in the Northwestern part of the Indian subcontinent from around 2600 BCE to 1900 BCE. It was one of the world's earliest urban civilizations and was characterized by advanced urban planning, architecture, sanitation systems, and trade networks. The civilization was centered on several major cities, including Harappa and Mohenjo-Daro, which were connected by well-maintained roads and canals. The people of the Indus Valley Civilization were known for their use of standardized weights and measures, as well as their knowledge of astronomy and mathematics. Despite its many achievements, the Indus Valley Civilization eventually declined and was replaced by other regional powers.</p>

In [None]:
result_["source_documents"]

[Document(page_content='India is a country of unity in vast diversity, the multiplicity of heritage, culture, economic (cid:70)(cid:79)(cid:68)(cid:86)(cid:86)(cid:15)(cid:3)(cid:85)(cid:68)(cid:70)(cid:72)(cid:15)(cid:3)(cid:72)(cid:87)(cid:70)(cid:17)(cid:15)(cid:3)(cid:76)(cid:86)(cid:3)(cid:85)(cid:72)(cid:192)(cid:72)(cid:70)(cid:87)(cid:72)(cid:71)(cid:3)(cid:68)(cid:81)(cid:71)(cid:3)(cid:70)(cid:82)(cid:72)(cid:91)(cid:76)(cid:86)(cid:87)(cid:86)(cid:3)(cid:76)(cid:81)(cid:3) every city. National unity and integrity have been maintained even through acute economic and social inequalities. Though each city has a unique combination of socioeconomic strata, estimated acceleration of mass migration to cities in the future will likely change the equation. While developing the concept', metadata={'source': 'https://smartnet.niua.org/sites/default/files/resources/Smart_Cities_Council_India_Readiness_Guide_v2016-02.pdf'}),
 Document(page_content='(cid:88)(cid:85)(cid:68)(cid:79)(cid:3)

In [None]:
custom_template = """You are an AI Assistant. Given the
following conversation and a follow up question, rephrase the follow up question
to be a standalone question. At the end of standalone question add this
'Answer the question in English language.' If you do not know the answer reply with 'I am sorry, I dont have enough information'.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:
"""

CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=db.as_retriever(search_kwargs={"k": 2}),
    memory=memory,
    condense_question_prompt=CUSTOM_QUESTION_PROMPT,
)

In [None]:
query = "Who you are?"
result_ = qa_chain({"question": query})
result = result_["answer"].strip()

display(Markdown(f"<b>{query}</b>"))
display(Markdown(f"<p>{result}</p>"))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<b>Who you are?</b>

<p>I am a company that values its employees, clients, and the communities it serves. I strive to operate with integrity, an adventurous spirit, and dedication to the well-being of people in my life and work.</p>

In [None]:
query = "Atomic Bombings of Hiroshima and Nagasaki"

result_ = qa_chain({"question": query})
result = result_["answer"].strip()

display(Markdown(f"<b>{query}</b>"))
display(Markdown(f"<p>{result}</p>"))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<b>Atomic Bombings of Hiroshima and Nagasaki</b>

<p>The atomic bombings of Hiroshima and Nagasaki were two separate events during World War II in which the United States dropped nuclear bombs on the Japanese cities of Hiroshima and Nagasaki, respectively. These attacks were the first time nuclear weapons were used in warfare and resulted in the deaths of thousands of people. The bombings are widely considered to be a major factor in Japan's surrender and the end of World War II.</p>

In [None]:
def querying(query, history):
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

  qa_chain = ConversationalRetrievalChain.from_llm(
      llm=llm,
      retriever=db.as_retriever(search_kwargs={"k": 2}),
      memory=memory,
      condense_question_prompt=CUSTOM_QUESTION_PROMPT,
  )

  result = qa_chain({"question": query})
  return result["answer"].strip()

In [None]:
iface = gr.ChatInterface(
    fn = querying,
    chatbot=gr.Chatbot(height=600),
    textbox=gr.Textbox(placeholder="Ask your question", container=False, scale=7),
    title="Chat",
    theme="soft",
    # examples=["Why Hiberus has created GenAI Ecosystem?",
    #           "What is GenAI Ecosystem?"],
    # cache_examples=True,
    retry_btn="Retry",
    undo_btn="Undo",
    clear_btn="Clear",
    submit_btn="Submit"

    )

iface.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://2d1207847c32deffa4.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
