In [2]:
# Install necessary packages
!pip install gradio requests transformers sentence-transformers faiss-cpu langchain
!pip install -q langchain
!pip install -q torch
!pip install -q transformers
!pip install -q sentence-transformers
!pip install -q datasets
!pip install -q faiss-cpu
!pip install requests

!pip install -U langchain-community



In [3]:
import warnings
from langchain.document_loaders import HuggingFaceDatasetLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
from langchain import HuggingFacePipeline
from langchain.chains import RetrievalQA
import requests
import gradio as gr

In [4]:
# Define a class to fetch posts from a WordPress site
class WordPressAPILoader:
    def __init__(self, base_url, post_type='posts'):
        self.base_url = base_url
        self.post_type = post_type

    def fetch_posts(self, count=15):
        url = f"{self.base_url}/wp-json/wp/v2/{self.post_type}"
        params = {'per_page': count}
        response = requests.get(url, params=params)
        response.raise_for_status()
        return response.json()

    def load(self, count=15):
        return self.fetch_posts(count)

In [5]:
# Simple document class to hold page content and metadata
class Document:
    def __init__(self, content, metadata=None):
        self.page_content = content
        self.metadata = metadata or {}

In [6]:
# Load dataset and split it into manageable chunks
dataset_name = "databricks/databricks-dolly-15k"
page_content_column = "context"

loader = HuggingFaceDatasetLoader(dataset_name, page_content_column)
data = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
docs = text_splitter.split_documents(data)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/8.20k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/13.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/15011 [00:00<?, ? examples/s]

In [7]:
# Set up embeddings and vector store
model_path = "sentence-transformers/all-MiniLM-l6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_path, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': False})

  warn_deprecated(


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [8]:
db = FAISS.from_documents(docs, embeddings)

In [9]:
# Set up question-answering model
model_name = "Intel/dynamic_tinybert"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)

question_answerer = pipeline("question-answering", model=model, tokenizer=tokenizer, return_tensors='pt')
llm = HuggingFacePipeline(pipeline=question_answerer, model_kwargs={"temperature": 0.7, "max_length": 512})

tokenizer_config.json:   0%|          | 0.00/351 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/268M [00:00<?, ?B/s]

  warn_deprecated(


In [10]:
# Set up retriever and QA pipeline
retriever = db.as_retriever(search_kwargs={"k": 4})
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="refine", retriever=retriever, return_source_documents=False)


In [11]:
# Load WordPress posts and add to vector store
base_url = "https://techcrunch.com"
wp_loader = WordPressAPILoader(base_url)
wp_data = wp_loader.load()

wp_docs = [Document(doc['content']['rendered']) for doc in wp_data]
wp_docs_split = text_splitter.split_documents(wp_docs)
db.add_documents(wp_docs_split)

['dbcdac88-3c7d-4be5-b8f9-4887cbf2bd28',
 '5d3055aa-ec0a-4253-a8a1-714abc837ed4',
 '1973cc19-52ca-45a4-928b-e3e25a2c3315',
 'ffc2e2a1-bd32-413e-90f7-e03950635417',
 '30fdaf33-99d3-4821-84ae-51ae01415a10',
 '935e06d1-357e-4799-9d86-bba7cdffd3a1',
 'ffbd99ca-cb71-4b1f-8cb9-56a180765d43',
 '14a16c91-3fc9-4f9d-884c-170f6fedc02b',
 'f9c8b771-9564-41db-ae20-f6b629b225ec',
 'ead0206b-2792-4ede-bb59-634a6d36ee1e',
 '2473c99b-78dc-4c09-93cc-ca418eca8197',
 '1e560505-1a61-4d41-90ee-418f15895c9a',
 '76b60ae8-a2cb-4c61-8412-5a3dcd51686d',
 'db82f4de-98d1-4e91-b4b7-28824f2f215d',
 '3e99d1cb-f76c-4b83-be12-79733427e293',
 '4e1a86c7-6119-4227-a723-ed4dea24d6ec',
 '30b11a88-20e2-47bb-a48b-590055cfd0d7',
 '5ac0d9b8-7f4f-47dd-a965-261325c6db48',
 'aa2db35a-8d29-4d16-9d30-ac42fb8fc071',
 '319ec35f-0618-4751-9bd2-b30be368ab07',
 'bf0871ff-795a-40f9-aaab-a9c67a1d3e97',
 '0a0fbaa8-198d-4102-ac77-b1ca48af059b',
 '4a6d9d4b-9526-4ff3-8b6f-914906c35c6f',
 '74bbb68a-0cec-47db-908a-0edfd00445ee',
 '1972e1ca-1937-

In [29]:
# Define function to process user queries
def answer_question(final_question):
    wp_searchDocs = db.similarity_search(final_question)
    context = " ".join([doc.page_content for doc in wp_searchDocs])
    qa_input = {"question": final_question, "context": context}
    final_answer = question_answerer(qa_input)
    return final_answer['answer']

In [35]:
# Create and launch Gradio interface with chat-like UI
with gr.Blocks() as iface:
    gr.Markdown("<h1 style='text-align: center;'>RAG-based Query Suggestion Chatbot</h1>")
    chatbot = gr.Chatbot()
    with gr.Row():
        with gr.Column():
            user_input = gr.Textbox(show_label=False, placeholder="Type your question here...")
        with gr.Column():
            submit_button = gr.Button("Send")

    def user(user_message, history):
        return "", history + [(user_message, "")]

    def bot(history):
        user_message = history[-1][0]
        answer = answer_question(user_message)
        history[-1] = (user_message, answer)
        return history

    submit_button.click(user, [user_input, chatbot], [user_input, chatbot], queue=False).then(bot, chatbot, chatbot)

iface.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://1f85a950dbc90d3ff1.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


