<a href="https://colab.research.google.com/github/Saptarshiii/Multilingual-RAG-ChatBot/blob/main/SpeechRecognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#install all dependency

!pip install -U openai-whisper
!pip install PyPDF2
!pip install langchain
!pip install langchain-google-genai
!pip install langchain-community
!pip install faiss-cpu

Collecting langchain-community
  Using cached langchain_community-0.2.6-py3-none-any.whl (2.2 MB)
Installing collected packages: langchain-community
Successfully installed langchain-community-0.2.6


In [None]:
#import the required frameworks
import whisper
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate

In [None]:
# Function to read text from a PDF file
def get_pdf_text(pdf_path):

    text = ""
    pdf_reader = PdfReader(pdf_path)
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

# Function to split text into chunks
def get_text_chunks(text):

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
    chunks = text_splitter.split_text(text)
    return chunks

# Function to create and save a vector store using embeddings
def get_vector_store(text_chunks, api_key):

    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
    vector_store.save_local("faiss_index")

# Path to the PDF file
pdf_path = "dummy.pdf"

# Google API key
api_key = "AIzaSyCvSN9h7WkNHu5b68QqjxzNGTZmUfeHkPk"

# Load the conversational model with the specified temperature
cmodel = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, google_api_key=api_key)

# Load the Whisper model
model = whisper.load_model('medium')

# Processing the dummy PDF and converting it to chunks of data embedding it to vector
raw_text = get_pdf_text(pdf_path)  # Extract text from the PDF
text_chunks = get_text_chunks(raw_text)  # Split the text into chunks
get_vector_store(text_chunks, api_key)  # Create and save the vector store



In [None]:
# Function to create a conversational chain for question answering
def get_conversational_chain(api_key):

    prompt_template = """
    Answer the question as detailed as possible from the provided context. Make sure to provide all the details. If the answer is not in
    the provided context, just say, "answer is not available in the context." Do not provide the wrong answer.\n\n
    Context:\n {context}?\n
    Question: \n{question}\n

    Answer:
    """
    # Create a prompt template with the provided context and question
    prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

    # Load the question-answering chain
    chain = load_qa_chain(cmodel, chain_type="stuff", prompt=prompt)
    return chain

# Function to handle user input and get the response
def user_input(user_question, api_key):

    # Load embeddings and FAISS vector store
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=api_key)
    new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)

    # Search for documents similar to the user question
    docs = new_db.similarity_search(user_question)

    # Get the conversational chain
    chain = get_conversational_chain(api_key)

    # Generate the response from the chain
    response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
    return response["output_text"]

# Function to convert audio to text using Whisper model
def audio2text(audio1):
    # Load and process the audio
    audio = whisper.load_audio(audio1)
    audio = whisper.pad_or_trim(audio)

    # Create log-Mel spectrogram and move it to the same device as the model
    mel = whisper.log_mel_spectrogram(audio).to(model.device)

    # Detect the spoken language
    _, probs = model.detect_language(mel)

    # Decode the audio to text with translation options
    options = dict(beam_size=5, best_of=5)
    translate_options = dict(task="translate", **options)
    result = model.transcribe(audio, **translate_options)

    # Return the recognized text
    return result['text']

# Main function to process audio input and generate answers from PDF context
def main(audio):
    pdf_path = "dummy.pdf"

    # Convert audio to text (user's question)
    user_question = audio2text(audio)

    # Answer the question using the processed data
    return user_question, user_input(user_question, api_key)



In [None]:
!pip install gradio -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.2/318.2 kB[0m [31m23.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m63.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.4/62.4 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.9/129.9 kB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m9.0 MB/

In [None]:
import gradio as gr
import time

In [None]:
#the UI for deployment
gr.Interface(
    title='Multi-Lingual RAG ',
    fn=main,
    inputs=gr.Audio(type="filepath",label='Ask anything about Rabindranath Tagore'),
    outputs=[gr.Textbox(label="Transcription by Whisper"), gr.Textbox(label="Response")],
    live=True
).launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://75739a143b482b9c7f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Need [32m'write'[0m access token to create a Spaces repo.

    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
[31mAborted.[0m
^C
