<a href="https://colab.research.google.com/github/Arya0212/Arya0212/blob/main/Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gradio requests transformers sentence-transformers faiss-cpu langchain
!pip install -q langchain torch transformers sentence-transformers datasets faiss-cpu requests
!pip install -U langchain-community




In [None]:
import warnings
from langchain.document_loaders import HuggingFaceDatasetLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, pipeline
from langchain import HuggingFacePipeline
from langchain.chains import RetrievalQA
import requests
import gradio as gr

In [None]:
# Define a class to fetch posts from a WordPress site
class WordPressAPILoader:
    def __init__(self, base_url, post_type='posts'):
        self.base_url = base_url
        self.post_type = post_type

    def fetch_posts(self, count=15):
        url = f"{self.base_url}/wp-json/wp/v2/{self.post_type}"
        params = {'per_page': count}
        response = requests.get(url, params=params)
        response.raise_for_status()
        return response.json()

    def load(self, count=15):
        return self.fetch_posts(count)

In [None]:
# Set up embeddings and vector store
model_path = "sentence-transformers/all-MiniLM-l6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_path, model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': False})




In [None]:
!pip install transformers datasets faiss-cpu
from datasets import load_dataset

dataset = load_dataset("databricks/databricks-dolly-15k")
class Document:
    def __init__(self, page_content, metadata=None):
        self.page_content = page_content
        self.metadata = metadata or {}
docs = [Document(page_content=entry['response'], metadata={}) for entry in dataset['train']]




In [None]:
db = FAISS.from_documents(docs, embeddings)

In [None]:
# Set up question-answering model
model_name = "Intel/dynamic_tinybert"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)

question_answerer = pipeline("question-answering", model=model, tokenizer=tokenizer, return_tensors='pt')
llm = HuggingFacePipeline(pipeline=question_answerer, model_kwargs={"temperature": 0.7, "max_length": 512})


  warn_deprecated(


In [None]:
# Set up retriever and QA pipeline
retriever = db.as_retriever(search_kwargs={"k": 4})
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="refine", retriever=retriever, return_source_documents=False)


In [None]:
# Load WordPress posts and add to vector store
base_url = "https://techcrunch.com"
wp_loader = WordPressAPILoader(base_url)
wp_data = wp_loader.load()
text_splitter = RecursiveCharacterTextSplitter()
wp_docs = [Document(doc['content']['rendered']) for doc in wp_data]
wp_docs_split = text_splitter.split_documents(wp_docs)
db.add_documents(wp_docs_split)

['6222ed0f-4c3d-4495-826c-34c94e7eceda',
 '3eba1437-013f-4514-902b-f3aeac65d91a',
 '73038612-17bb-4fbe-9a38-51660eff39f1',
 '139e4248-da61-461a-ab48-1934e466c794',
 '3389b21b-11d1-4f51-9a50-f9f32be4a2f0',
 '5c78ef04-a830-4f12-99a6-fc3665f81976',
 '03b4fe97-32aa-4d36-9107-5069cdd97560',
 '3a326749-6021-4c96-97f2-bf44b2b29a74',
 '11c98d1a-660c-4d11-82b9-2c427c254c38',
 '02fa312b-926e-4ea6-8ae8-986bebdab201',
 '7590705a-767e-44b4-8cf3-1fd5b92436f2',
 '891abb6c-32b4-4f55-b80b-94b5fa75f5a3',
 '236ef492-bfb5-426c-ab23-a342365b9000',
 '9276f9be-6b5f-4b99-9620-167e6e689a1b',
 '95f9b51f-afbd-4743-a7c0-a9042a1717ad',
 '8604f3b2-341e-4f51-9b5a-d2bd97368c46',
 '0185f621-fd40-4bec-9612-0f5ccb679c39',
 '5d0e6265-9b99-459c-831c-74e8e136235f',
 'ac7b4f7e-81f3-4027-91d7-0013208de905',
 '9fcaee07-052f-4d09-8320-022b73efbccc',
 '966b4fc6-aa05-43cb-979c-f76167b80c97',
 'd28ca119-b6a5-4ebb-95c6-674fbdca5f42',
 '7c1a7de6-7365-4126-9116-7ce5c7cbff06',
 'de1091a7-85bb-4004-a4b7-c44e9123dcd7',
 '5c3ef974-00b4-

In [None]:
# Define function to process user queries
def answer_question(final_question):
    wp_searchDocs = db.similarity_search(final_question)
    context = " ".join([doc.page_content for doc in wp_searchDocs])
    qa_input = {"question": final_question, "context": context}
    final_answer = question_answerer(qa_input)
    return final_answer['answer']

In [None]:
import gradio as gr

def answer_question(user_message):
    # Placeholder function to generate an answer
    return "This is the answer to: " + user_message

with gr.Blocks(css=".gradio-container {background-color: #f0f8ff;} \
                       .gr-button {background-color: #4caf50; color: white;} \
                       .gr-textbox {background-color: #e6f7ff;}") as iface:
    gr.Markdown("<h1 style='text-align: center; color: #333;'>RAG-based Chatbot</h1>")
    chatbot = gr.Chatbot()
    with gr.Row():
        with gr.Column():
            user_input = gr.Textbox(show_label=False, placeholder="Type your question here...",
                                    lines=1, max_lines=1)
        with gr.Column():
            submit_button = gr.Button("Send")

    def user(user_message, history):
        return "", history + [(user_message, "")]

    def bot(history):
        user_message = history[-1][0]
        answer = answer_question(user_message)
        history[-1] = (user_message, answer)
        return history

    submit_button.click(user, [user_input, chatbot], [user_input, chatbot], queue=False).then(bot, chatbot, chatbot)

iface.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://aa7fda606414ecce3c.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


