In [31]:
import numpy as np
from langchain_community.embeddings import HuggingFaceBgeEmbeddings, OpenAIEmbeddings
from langchain_community.llms import HuggingFacePipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from urllib.request import urlretrieve
import openai

In [32]:
loader = PyPDFDirectoryLoader(r"C:\Users\mahdi\OneDrive\Desktop\chatbotdata")


In [33]:
docs_before_split = loader.load()

In [34]:
docs_before_split[1
                  ]

Document(metadata={'producer': 'Microsoft® PowerPoint® 2016', 'creator': 'Microsoft® PowerPoint® 2016', 'creationdate': '2022-02-27T10:48:35+02:00', 'title': 'Electromagnetic Waves & Acoustics', 'author': 'Hamza ISSA', 'moddate': '2022-02-27T10:48:35+02:00', 'source': 'C:\\Users\\mahdi\\OneDrive\\Desktop\\chatbotdata\\Chapter 3-Lecture1 .pdf', 'total_pages': 25, 'page': 1, 'page_label': '2'}, page_content='I. Plan\nReflection and Transmission Coefficient Measurement\nII. Introduction\nIII. Directional Coupler\nIV. Measurement of Ref. and Tx Coefficients\nA. O/P Power Control of signal Generators\nB. Measurement of Reflection Coefficient using Slotted Line measurement \ntechnique\nC. Scalar Analyzer\nD. Vector Network Analyzer\nD.1. Complex Impedance Bridge\nD.2. Transmission Coefficient Bridge\nD.3. Heterodyne VNA.\nD.4. Six-Port technique\ni. Six-Port Reflectometer. ii. Six-Port Network Analyzer. \nChapter 3\n2')

In [35]:
text_splitter =  RecursiveCharacterTextSplitter(
    chunk_size =700,
    chunk_overlap = 50
)
docs_after_split = text_splitter.split_documents(docs_before_split)

In [36]:
len(docs_after_split[0].page_content)

109

In [37]:
avg_doc_length = lambda docs: sum([len(doc.page_content) for doc in docs])//len(docs)

In [38]:
avg_char_before_split = avg_doc_length(docs_before_split)
avg_char_after_split = avg_doc_length(docs_after_split)

In [39]:
print(f'before split: {avg_char_before_split}')
print(f'after split: {avg_char_after_split}')

before split: 326
after split: 307


In [40]:
huggingface_embeddings = HuggingFaceBgeEmbeddings(
    model_name= "sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs = {'device' : 'cpu'},
    encode_kwargs = {'normalize_embeddings' : True}
)

In [41]:
huggingface_embeddings.embed_query("mename is ")

[0.034233853220939636,
 0.14126251637935638,
 -0.020818151533603668,
 -0.010789905674755573,
 -0.06833900511264801,
 0.07193399220705032,
 0.09262653440237045,
 -0.045325133949518204,
 0.006242727395147085,
 0.008324368856847286,
 -0.010593184269964695,
 -0.01494328398257494,
 -0.0037628139834851027,
 -0.018284481018781662,
 0.010794136673212051,
 -0.03792990744113922,
 -0.08044886589050293,
 -0.012282311916351318,
 0.030246837064623833,
 -0.03435514494776726,
 0.12390776723623276,
 0.09385240823030472,
 -0.05612576752901077,
 -0.07806669175624847,
 -0.07746580988168716,
 0.014936726540327072,
 -0.017674215137958527,
 -0.0028217239305377007,
 0.08591408282518387,
 -0.07451906055212021,
 -0.001217714510858059,
 0.037950821220874786,
 0.07088372111320496,
 -0.0016862088814377785,
 0.03360874950885773,
 0.06980914622545242,
 0.059654634445905685,
 0.006904518697410822,
 0.034787148237228394,
 -0.04653768241405487,
 -0.005491605028510094,
 -0.04319872707128525,
 -0.0020654588006436825,
 0.

In [42]:
vectorstore = FAISS.from_documents(docs_after_split, huggingface_embeddings)

In [43]:
retriever = vectorstore.as_retriever(search_type="similarity" , search_kwargs={"k" : 3})

In [44]:
access_token="hf_...NjxL"	

In [45]:
client = openai.OpenAI(
    base_url="http://localhost:11434/v1",
    api_key="nokeyneeded"
)


In [46]:
messages_0 = [{"role" : "system" , "content" : "You are a helpful assistant."}]
response =client.chat.completions.create(
    model='phi:latest',
    temperature=0.3,
    messages = messages_0
        )

        

In [47]:
prompt_template = """Use the following pieces of context to answer the question at the end. Please follow the following rules:
1. If you don't know the answer, don't try to make up an answer. Just say "I can't find the final answer but you may want to check the following links".
2. If you find the answer, write the answer in a concise way with five sentences maximum.

{context}

Question: {question}

Helpful Answer:
"""

PROMPT = PromptTemplate(
 template=prompt_template, input_variables=["context", "question"]
)

In [48]:
from langchain.llms import OpenAI

llm = OpenAI(
    model_name="phi:latest",
    openai_api_base="http://localhost:11434/v1",
    openai_api_key="nokeyneeded"
)


In [49]:
retrievalQA = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs = {"prompt" : PROMPT}
)


In [50]:

def chat_with_model(history, new_message):
    
    result = retrievalQA.invoke({"query": new_message})
    
    # Extract the answer from the result
    assistant_message = result.get("result") or result.get("answer") or str(result)
    
   
    history.append((new_message, assistant_message))
    
    
    return history, ""


In [51]:
import gradio as gr 
def gradio_chat_app():
    with gr.Blocks() as app:
        gr.Markdown("# Ollam Phi Model Chat Interface")
        gr.Markdown("Chat with the Phi model in a conversational format.")
        
        chatbot = gr.Chatbot(label = "Chat Interface")
        user_input = gr.Textbox(label = "your message" , placeholder = "Type something ..." , lines=1)  
        send_button = gr.Button("send")
        
        def clear_chat():
            return [] , ""
    
        clear_button = gr.Button("Clear chat")
        
        send_button.click(
            fn=chat_with_model, 
            inputs = [chatbot, user_input],
            outputs = [chatbot, user_input]
        )
        clear_button.click(
            fn=clear_chat,
            inputs = [],
            outputs = [chatbot, user_input]
        )
        
    return app


if __name__ == "__main__":
    app = gradio_chat_app()
    app.launch()

  chatbot = gr.Chatbot(label = "Chat Interface")


* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.


Traceback (most recent call last):
  File "c:\Users\mahdi\OneDrive\Desktop\lara wehbi\aivenv\Lib\site-packages\gradio\queueing.py", line 626, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\mahdi\OneDrive\Desktop\lara wehbi\aivenv\Lib\site-packages\gradio\route_utils.py", line 350, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\mahdi\OneDrive\Desktop\lara wehbi\aivenv\Lib\site-packages\gradio\blocks.py", line 2235, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\mahdi\OneDrive\Desktop\lara wehbi\aivenv\Lib\site-packages\gradio\blocks.py", line 1746, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\mahdi\OneDrive\Desktop\lara wehbi\aivenv\