In [25]:
!pip install -qU \
    langchain-pinecone==0.1.3 \
    langchain-text-splitters==0.2.0 \
    langchain==0.2.1 \
    pinecone-notebooks==0.1.1 \
    gradio \
    groq \
    langchain-groq \
    PyPDF2 \
    phonenumbers dateparser


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.6/2.6 MB[0m [31m84.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m56.2 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/315.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m315.5/315.5 kB[0m [31m29.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [35]:
import os
import time
import asyncio
from langchain_text_splitters import MarkdownHeaderTextSplitter
from langchain_pinecone import PineconeEmbeddings, PineconeVectorStore
from langchain.chains import RetrievalQA
from pinecone import Pinecone, ServerlessSpec
from google.colab import userdata
import gradio as gr
from groq import Groq
from langchain_groq import ChatGroq

os.environ["PINECONE_API_KEY"] = userdata.get('PINECONE_API_KEY')
os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')

if not os.environ.get("PINECONE_API_KEY"):
    print("Please set PINECONE_API_KEY environment variable")
if not os.environ.get("GROQ_API_KEY"):
    print("Please set GROQ_API_KEY environment variable")

def test_groq_connection():
    try:
        llm = ChatGroq(
            groq_api_key=os.environ.get('GROQ_API_KEY'),
            model_name="llama3-70b-8192",
            temperature=0.0,
            max_retries=2,
            timeout=10
        )
        test_response = llm.invoke("Hello")
        print("Groq connection test successful!")
        return True
    except Exception as e:
        print(f"Groq connection test failed: {e}")
        return False
test_groq_connection()

Groq connection test successful!


True

In [36]:
async def initialize_system(markdown_text):
    """Initialize the RAG system with the provided markdown text"""
    try:
        headers_to_split_on = [
            ("#", "Header 1"),
            ("##", "Header 2")
        ]
        markdown_splitter = MarkdownHeaderTextSplitter(
            headers_to_split_on=headers_to_split_on, strip_headers=False
        )
        md_header_splits = markdown_splitter.split_text(markdown_text)

        model_name = 'multilingual-e5-large'
        embeddings = PineconeEmbeddings(
            model=model_name,
            pinecone_api_key=os.environ.get('PINECONE_API_KEY')
        )

        pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))
        cloud = os.environ.get('PINECONE_CLOUD') or 'aws'
        region = os.environ.get('PINECONE_REGION') or 'us-east-1'
        spec = ServerlessSpec(cloud=cloud, region=region)

        index_name = "rag-chatbot"

        if index_name not in pc.list_indexes().names():
            pc.create_index(
                name=index_name,
                dimension=embeddings.dimension,
                metric="cosine",
                spec=spec
            )
            while not pc.describe_index(index_name).status['ready']:
                time.sleep(1)

        namespace = "markdown-content"
        docsearch = PineconeVectorStore.from_documents(
            documents=md_header_splits,
            index_name=index_name,
            embedding=embeddings,
            namespace=namespace
        )

        llm = ChatGroq(
            groq_api_key=os.environ.get('GROQ_API_KEY'),
            model_name="llama-3.3-70b-versatile",
            temperature=0.0
        )

        qa = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=docsearch.as_retriever(),
            return_source_documents=True
        )

        return qa, llm
    except Exception as e:
        print(f"Error in initialize_system: {str(e)}")
        raise

In [37]:
class ChatbotState:
    def __init__(self):
        self.qa = None
        self.llm = None

state = ChatbotState()

def extract_text_from_file(file):
    try:
        if file.name.endswith('.pdf'):
            import PyPDF2
            with open(file.name, 'rb') as f:
                reader = PyPDF2.PdfReader(f)
                text = "\n".join([page.extract_text() for page in reader.pages])
                return text
        elif file.name.endswith(('.txt', '.md')):
            with open(file.name, 'r', encoding='utf-8') as f:
                return f.read()
        else:
            return f"Unsupported file type: {file.name}"
    except Exception as e:
        return f"Error reading file: {str(e)}"

def submit_content(text_input, file_input):
    content = ""

    if text_input.strip():
        content = text_input
    elif file_input is not None:
        content = extract_text_from_file(file_input)
    else:
        return "Please enter text or upload a file.", gr.update(visible=False)

    if not content.strip():
        return "The content is empty.", gr.update(visible=False)

    try:
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        state.qa, state.llm = loop.run_until_complete(initialize_system(content))
        loop.close()

        return ("Content successfully loaded! You can now ask questions.", gr.update(visible=True))
    except Exception as e:
        print(f"Error in submit_content: {str(e)}")
        return f"Error: {str(e)}", gr.update(visible=False)



ERROR:asyncio:Unclosed client session
client_session: <aiohttp.client.ClientSession object at 0x794ab28ae8d0>


In [38]:
import phonenumbers
import dateparser
from datetime import datetime, timedelta
import re

class UserInfo:
    def __init__(self):
        self.name = None
        self.phone = None
        self.email = None
        self.appointment_date = None
        self.conversation_state = "normal"

user_info = UserInfo()

In [39]:
def validate_email(email):
    """Validate email format"""
    pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
    return re.match(pattern, email) is not None

def validate_phone(phone):
    """Validate phone number format"""
    try:
        pattern = r'^(\+977-)?(98[0-9]{8}|97[0-9]{8}|96[0-9]{8})$'
        phone = re.sub(r'[\s\-\(\)]', '', phone.strip())
        if re.match(pattern, phone):
            return True
        parsed_phone = phonenumbers.parse(phone, None)
        return phonenumbers.is_valid_number(parsed_phone)
    except:
        return False

In [40]:
def extract_date(text):
    """Extract date from natural language"""
    try:
        parsed_date = dateparser.parse(text, languages=['en'])
        if parsed_date:
            return parsed_date.strftime("%Y-%m-%d")
        return None
    except:
        return None

def handle_appointment_request(message):
    global user_info

    if user_info.conversation_state == "normal":
        user_info.conversation_state = "collect_name"
        return "I'd be happy to help you book an appointment! First, what's your full name?"

    elif user_info.conversation_state == "collect_name":
        user_info.name = message.strip()
        user_info.conversation_state = "collect_phone"
        return f"Nice to meet you, {user_info.name}! What's your phone number?"

    elif user_info.conversation_state == "collect_phone":
        if validate_phone(message):
            user_info.phone = message.strip()
            user_info.conversation_state = "collect_email"
            return "Great! What's your email address?"
        else:
            return "Please enter a valid phone number (e.g., +977-9808535455 or 9808535455):"

    elif user_info.conversation_state == "collect_email":
        if validate_email(message):
            user_info.email = message.strip()
            user_info.conversation_state = "collect_date"
            return "Perfect! When would you like to schedule the appointment? (e.g., 'next Monday', 'December 15th', 'tomorrow')"
        else:
            return "Please enter a valid email address (e.g., irfanAlam@email.com):"

    elif user_info.conversation_state == "collect_date":
        date = extract_date(message)
        if date:
            user_info.appointment_date = date
            confirmation = f"""
Thank You!Appointment Booked Successfully!

Here are your details:
• Name: {user_info.name}
• Phone: {user_info.phone}
• Email: {user_info.email}
• Appointment Date: {user_info.appointment_date}

We'll contact you at {user_info.phone} to confirm. Thank you!
            """
            user_info.conversation_state = "normal"
            return confirmation
        else:
            return "I couldn't understand that date. Please try again (e.g., 'next Monday', 'December 15th'):"

In [41]:
def process_query(message, history):
    global user_info
    appointment_keywords = ['call me', 'book appointment', 'schedule', 'meeting', 'call back', 'contact me']
    if any(keyword in message.lower() for keyword in appointment_keywords):
        return handle_appointment_request(message)

    if user_info.conversation_state != "normal":
        return handle_appointment_request(message)

    if state.qa is None:
        return "Please submit content first before asking questions."

    try:
        response = state.qa.invoke({"query": message})
        return response['result']
    except Exception as e:
        print(f"Error in process_query: {str(e)}")
        return f"Error processing query: {str(e)}"


In [42]:
with gr.Blocks() as interface:
    gr.Markdown("ChatBot with appointment Booking")

    with gr.Row():
        with gr.Column():
            text_input = gr.TextArea(
                label="Or paste content here",
                placeholder="Paste your document content here...",
                lines=5
            )

            file_input = gr.File(
                label="Upload document (PDF, TXT)",
                file_types=[".pdf", ".txt", ".md"]
            )

            submit_btn = gr.Button("Submit Content")

    status_msg = gr.Textbox(label="Status", interactive=False)

    with gr.Column(visible=False) as chat_interface:
        chatbot = gr.ChatInterface(
            fn=process_query,
            title="Document Q&A with Appointment Booking",
            examples=[
                "What is this document about?",
                "Book an appointment with me",
                "Call me to discuss this",
                "Schedule a meeting for next Monday"
            ]
        )

    submit_btn.click(
        fn=submit_content,
        inputs=[text_input, file_input],
        outputs=[status_msg, chat_interface]
    )

  self.chatbot = Chatbot(


In [43]:
# Run the interface
if __name__ == "__main__":
    interface.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://9bec0f1b9d88b4645d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Error in process_query: Error code: 413 - {'error': {'message': 'Request too large for model `llama-3.3-70b-versatile` in organization `org_01k31zg48cfrct5jjmppk8gvmr` service tier `on_demand` on tokens per minute (TPM): Limit 12000, Requested 16764, please reduce your message size and try again. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'tokens', 'code': 'rate_limit_exceeded'}}
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://9bec0f1b9d88b4645d.gradio.live
