In [1]:
!pip install -qU \
  pinecone-client==2.2.4 \
  langchain==0.0.321 \
  datasets==2.14.6 \
  openai==0.28.1 \
  tiktoken==0.5.1

In [2]:
!pip install gradio



In [3]:
import os
from langchain.chat_models import ChatOpenAI
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
my_key = os.environ["OPENAI_API_KEY"]

In [5]:
chat = ChatOpenAI(
    openai_api_key=my_key,
    model='gpt-3.5-turbo'
)

In [6]:
import pinecone

pinecone.init(
    api_key = os.environ["PINECONE_API_KEY_MAIN"],
    environment= os.environ["PINECONE_ENV"]
)

  from tqdm.autonotebook import tqdm


In [7]:
index_name = 'med277-medquad'
index = pinecone.Index(index_name)

In [8]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.16407,
 'namespaces': {'': {'vector_count': 16407}},
 'total_vector_count': 16407}

In [9]:
from langchain.embeddings.openai import OpenAIEmbeddings
embed = OpenAIEmbeddings(
    model="text-embedding-ada-002", openai_api_key=my_key, disallowed_special=()
)

In [10]:
from langchain.vectorstores import Pinecone
text_field = "question"
vectorstore = Pinecone(index, embed.embed_query, text_field)



In [11]:
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(temperature=0, openai_api_key=my_key)

In [12]:
from langchain.retrievers.multi_query import MultiQueryRetriever

retriever = MultiQueryRetriever.from_llm(
    retriever=vectorstore.as_retriever(), llm=llm
)

In [13]:
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

In [14]:
def augment_prompt(query: str):
    # get top 3 results from knowledge base
    results = vectorstore.similarity_search(query, k=3)
    # get the text from the results
    source_knowledge = "\n".join([x.page_content for x in results])
    # feed into an augmented prompt
    augmented_prompt = f"""You are a helpful assistant who answers user queries using the
    contexts provided. If the question cannot be answered using the information
    provided say "I don't know"

    Contexts:
    {source_knowledge}

    Query: {query}"""
    return augmented_prompt

In [15]:
from transformers import pipeline
import numpy as np

transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [16]:
def transcribe(audio):
    sr, y = audio
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    return transcriber({"sampling_rate": sr, "raw": y})["text"]

In [17]:
import gradio as gr
import openai
from langchain.llms import OpenAI

In [24]:
def unified_process(input_data):
    query = transcribe(input_data)
    llm = OpenAI(temperature=1, openai_api_key=my_key)
    augmented_prompt = augment_prompt(query)
    return llm(augmented_prompt)

In [25]:
demo = gr.Interface(
    unified_process,
    gr.Audio(sources=["microphone"]),
    "text",
)

demo.launch()

Running on local URL:  http://127.0.0.1:7863

To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "/Users/samitkapadia/Desktop/Fall23/MED-277_Chatbot/med/lib/python3.11/site-packages/gradio/queueing.py", line 456, in call_prediction
    output = await route_utils.call_process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/samitkapadia/Desktop/Fall23/MED-277_Chatbot/med/lib/python3.11/site-packages/gradio/route_utils.py", line 232, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/samitkapadia/Desktop/Fall23/MED-277_Chatbot/med/lib/python3.11/site-packages/gradio/blocks.py", line 1522, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/samitkapadia/Desktop/Fall23/MED-277_Chatbot/med/lib/python3.11/site-packages/gradio/blocks.py", line 1144, in call_function
    prediction = await anyio.to_thread.run_sync(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/samitkapadia/D