In [None]:
# To access file from .env
from dotenv import load_dotenv
import os
# To login Huggingface
from huggingface_hub import login
# JSON data loader
from langchain_community.document_loaders import JSONLoader
# Importing Embedding
from langchain_huggingface import HuggingFaceEmbeddings
# To access LLM model
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace, HuggingFaceEmbeddings
# connect with Pinecone

from langchain_pinecone import PineconeVectorStore
# Formatting data to store in PC
import json
from langchain.schema import Document
from uuid import uuid4
# prompt template
from langchain.prompts import PromptTemplate
# To format output
from IPython.display import display, Markdown
import pinecone

from pinecone import Pinecone, ServerlessSpec
from transformers import AutoTokenizer
# for gradio
import gradio as gr
from PIL import Image
import PyPDF2

In [None]:
env = './env'

load_dotenv(env)

# Access the variables using the correct names
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_KEY")
PINECONE_ENVIRONMENT = os.getenv("PINECONE_ENVIRONMENT")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
from huggingface_hub import login
login(HUGGINGFACE_API_KEY)

In [None]:
llm = HuggingFaceEndpoint(
    repo_id="microsoft/Phi-3-mini-4k-instruct",
    task="text-generation",
    max_new_tokens=512,
    do_sample=False,
    repetition_penalty=1.03,
)

chat = ChatHuggingFace(llm=llm, verbose=True)

In [None]:
bot_pic_path = './image/bot.png'
person_pic_path = './image/user.jpg'

bot_pic = Image.open(bot_pic_path).convert('RGB').resize((100, 100))
person_pic = Image.open(person_pic_path).convert('RGBA').resize((100, 100))

# Save resized images as files
bot_pic_file = 'bot_pic_resized.jpg'
person_pic_file = 'person_pic_resized.png'

# Save the resized images
bot_pic.save(bot_pic_file)
person_pic.save(person_pic_file)

In [None]:
def clear_fields():
    return "", None

pc = Pinecone(api_key="YOUR_PINECONE_API_KEY")
index_name = "finantial-data"
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vector_store = Pinecone(embeddings, index_name="finantial-data")

def handle_file_query(file, chat_history=None):
    if chat_history is None:
        chat_history = []
    global vector_store

    if file == None:
        return chat_history + [("No file uploaded", "Please upload a file")]

    with open(file.name, 'rb') as file:
        reader = PyPDF2.PdfReader(file)

        # Get the number of pages
        num_pages = len(reader.pages)

        queries_pdf = ''
        # Read each page
        for page in range(num_pages):
            text = reader.pages[page].extract_text()
            queries_pdf += text
        queries_from_pdf = queries_pdf.split('\n')
        response_from_file = chat_history.copy()  # Copy the existing chat history
        for user_query in queries_from_pdf:
            if user_query:
              results = vector_store.similarity_search(query=i, k=1)

              # Create a prompt template
              prompt_template = PromptTemplate(
                  input_variables=["results", "query"],
                  template='''Given the data: {results}, please respond to the query: "{query}".'''
              )

              # Format the prompt
              formatted_prompt = prompt_template.format(results=results, query=user_query)

              # Create messages to send to the chat model
              messages = [{"role": "human", "content": formatted_prompt}]

              # Invoke the chat model
              response = chat.invoke(messages).content
              print(user_query, response)
              response_from_file.append((i, f"Response to {response}"))

        return response_from_file

In [None]:
vector_store = PineconeVectorStore(
    index='finantial-data',
    embedding=embeddings,
    text_key="text"
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-3-mini-4k-instruct")

def truncate_to_token_limit(text, max_tokens):
    """Truncate text to fit within token limit"""
    tokens = tokenizer.encode(text)
    if len(tokens) > max_tokens:
        tokens = tokens[:max_tokens]
        text = tokenizer.decode(tokens, skip_special_tokens=True)
    return text

def get_token_count(text):
    """Get the number of tokens in a text"""
    return len(tokenizer.encode(text))

def handle_query(user_query, chat_history):
    # Calculate tokens for the base prompt template
    base_prompt = '''Based on this context: [CONTEXT]
    
    Please answer the following question: [QUERY]
    
    If the context doesn't contain enough information to answer the question fully, please state that.'''
    
    base_tokens = get_token_count(base_prompt.replace('[CONTEXT]', '').replace('[QUERY]', user_query))
    
    # Calculate remaining tokens for context
    available_context_tokens = MAX_INPUT_TOKENS - base_tokens
    
    # Perform similarity search with the PineconeVectorStore
    results = vector_store.similarity_search(
        query=user_query,
        k=1
    )
    
    # Extract and format the relevant information from results
    context = ""
    for doc in results:
        context += str(doc.page_content) + "\n\n"
    
    # Truncate context to fit within available tokens
    truncated_context = truncate_to_token_limit(context, available_context_tokens)
    
    # Create a prompt template for responding
    prompt_template = PromptTemplate(
        input_variables=["context", "query"],
        template=base_prompt
    )

    # Format the prompt
    formatted_prompt = prompt_template.format(context=truncated_context, query=user_query)
    
    # Final check to ensure we're within limits
    final_token_count = get_token_count(formatted_prompt)
    if final_token_count > MAX_INPUT_TOKENS:
        raise ValueError(f"Token count ({final_token_count}) exceeds maximum input tokens ({MAX_INPUT_TOKENS})")
    
    # Create messages to send to the chat model
    messages = [{"role": "human", "content": formatted_prompt}]

    # Invoke the chat model to generate a response
    response = chat.invoke(messages).content
    chat_history.append((user_query, response))
    return response, chat_history

In [None]:
with gr.Blocks() as chatbot_app:
    # gr.Markdown("")
    gr.Markdown("Restaurant Chatbot")
    # gr.Markdown("Welcome! You’re interacting with an exciting restaurant chatbot designed to enhance your dining experience. Whether you have questions about our menu, want to make a reservation, or need recommendations, I’m here to help!")
    gr.Markdown("Welcome!")

    chatbot = gr.Chatbot(
        [],
        elem_id="chatbot",
        bubble_full_width=False,
        height=400,
        avatar_images=(person_pic_file, bot_pic_file),
        value=[]
    )


    prompt = gr.Textbox(placeholder="Message Chatbot", interactive=True, label=None, show_label=False, lines=1, max_lines=3)

    with gr.Row(equal_height=False):
        submit_btn = gr.Button("Submit", scale=1)
        upload_btn = gr.UploadButton("📁 Upload PDF or doc files", file_types=['.pdf', '.doc'], file_count="single")
        clear_btn = gr.Button("Clear", scale=1)

    gr.on(
        triggers=[submit_btn.click, prompt.submit],
        fn=handle_query,
        inputs=[prompt, chatbot],
        outputs=[prompt, chatbot],
        queue=False
    )

    upload_btn.upload(fn=handle_file_query, inputs=[upload_btn, chatbot], outputs=chatbot)

    clear_btn.click(fn=clear_fields, outputs=[prompt, upload_btn])

chatbot_app.launch(debug=True)