In [2]:
!pip install -U langchain langchain-openai langchain-community openai faiss-cpu gtts pydub speechrecognition tenacity fastapi uvicorn --quiet


In [21]:
from fastapi import FastAPI, File, UploadFile
import openai
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from pydub import AudioSegment
import speech_recognition as sr
import time
from gtts import gTTS
from fastapi.responses import FileResponse
from io import BytesIO
import os
import base64

# Initialize FastAPI app
app = FastAPI()

# OpenAI API key
openai.api_key = "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"  # Replace with your OpenAI API key


In [22]:
from google.colab import files
import pandas as pd

# Upload dataset
uploaded = files.upload()
df = pd.read_csv(next(iter(uploaded)))

# Chunk into documents
text_data = '\n'.join(df.astype(str).apply(lambda row: ' '.join(row), axis=1).tolist())
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = splitter.create_documents([text_data])

# Embedding setup
embeddings = OpenAIEmbeddings(openai_api_key=openai.api_key)

# Indexing into FAISS
vectorstore = FAISS.from_documents(docs, embeddings)


Saving HackathonInternalKnowledgeBase.csv to HackathonInternalKnowledgeBase (1).csv


In [24]:
from google.colab import output
from IPython.display import Javascript

def record_audio(filename='recorded.wav', duration=5):
    js_code = """
    const sleep = time => new Promise(resolve => setTimeout(resolve, time));
    const b2text = blob => new Promise(resolve => {
      const reader = new FileReader();
      reader.onloadend = () => resolve(reader.result);
      reader.readAsDataURL(blob);
    });
    async function record() {
      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
      const recorder = new MediaRecorder(stream);
      const data = [];
      recorder.ondataavailable = event => data.push(event.data);
      recorder.start();
      await sleep(""" + str(5 * 1000) + """);
      recorder.stop();
      await new Promise(resolve => recorder.onstop = resolve);
      const blob = new Blob(data, { type: 'audio/wav' });
      const base64 = await b2text(blob);
      google.colab.kernel.invokeFunction('notebook.save_audio', [base64], {});
    }
    record();
    """
    display(Javascript(js_code))

def save_audio(base64_wav):
    wav_data = base64.b64decode(base64_wav.split(',')[1])
    with open("recorded.wav", "wb") as f:
        f.write(wav_data)
    print("Audio saved as 'recorded.wav'")

output.register_callback('notebook.save_audio', save_audio)


In [25]:
def transcribe_audio_file(filename='converted.wav'):
    recognizer = sr.Recognizer()
    with sr.AudioFile(filename) as source:
        audio = recognizer.record(source)
    try:
        text = recognizer.recognize_google(audio)
        print("📝 Transcription:", text)
        return {"transcription": text, "duration_seconds": None}
    except Exception as e:
        print("Error in transcription:", str(e))
        return {"error": str(e)}


In [26]:
conversation_memory = []

def get_rag_context(query):
    try:
        docs = vectorstore.similarity_search(query, k=3)
        return '\n'.join([doc.page_content for doc in docs])
    except Exception as e:
        print("Error in RAG context:", e)
        return ''

def chat_with_llm(user_input):
    rag_context = get_rag_context(user_input)
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        *conversation_memory,
        {"role": "user", "content": f"{user_input}\n\nRelevant info:\n{rag_context}"}
    ]
    response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages)
    reply = response.choices[0].message['content']
    conversation_memory.append({"role": "user", "content": user_input})
    conversation_memory.append({"role": "assistant", "content": reply})
    return reply


In [27]:
def speak_text(text):
    start = time.time()
    tts = gTTS(text)
    tts.save("response.mp3")
    duration = time.time() - start
    return {"audio": "response.mp3", "tts_duration": round(duration, 2)}


In [32]:
from fastapi.responses import StreamingResponse
from io import BytesIO

@app.post("/upload_rag_docs")
async def upload_rag_docs(file: UploadFile = File(...)):
    contents = await file.read()
    df = pd.read_csv(BytesIO(contents))
    # Chunking and embedding process
    text_data = '\n'.join(df.astype(str).apply(lambda row: ' '.join(row), axis=1).tolist())
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    docs = splitter.create_documents([text_data])
    vectorstore = FAISS.from_documents(docs, embeddings)
    return {"message": "Documents uploaded and indexed successfully"}

@app.post("/transcribe")
async def transcribe_audio(file: UploadFile = File(...)):
    contents = await file.read()
    with open("uploaded_audio.wav", "wb") as f:
        f.write(contents)
    transcription = transcribe_audio_file("uploaded_audio.wav")
    return transcription

@app.post("/chat")
async def chat(user_input: str):
    response = chat_with_llm(user_input)
    return {"response": response}

@app.post("/speak")
async def speak(text: str):
    audio = speak_text(text)
    return FileResponse(audio["audio"])

@app.post("/converse")
async def converse(user_input: str, file: UploadFile = File(...)):
    # Transcribe
    contents = await file.read()
    with open("uploaded_audio.wav", "wb") as f:
        f.write(contents)
    transcription = transcribe_audio_file("uploaded_audio.wav")

    # Chat
    response = chat_with_llm(user_input)

    # Speak
    audio = speak_text(response)

    return {"transcription": transcription, "response": response, "audio_file": audio["audio"], "tts_duration": audio["tts_duration"]}



In [45]:
from fastapi import FastAPI

app = FastAPI()  # This line creates the FastAPI app instance and should be named `app`


In [None]:
''' app.py code

from fastapi import FastAPI, File, UploadFile
from fastapi.responses import FileResponse
import openai
import pandas as pd
from io import BytesIO
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

# Initialize FastAPI instance
app = FastAPI()

# Example initialization for OpenAI API and vector store
openai.api_key = "Replace with your actual API key"
embeddings = OpenAIEmbeddings(openai_api_key=openai.api_key)

# Default route - should return a different message when you visit the base URL
@app.get("/")
async def root():
    return {"message": "FastAPI is running!"}

# POST route to upload RAG documents
@app.post("/upload_rag_docs")
async def upload_rag_docs(file: UploadFile = File(...)):
    contents = await file.read()
    df = pd.read_csv(BytesIO(contents))
    # Chunking and embedding process
    text_data = '\n'.join(df.astype(str).apply(lambda row: ' '.join(row), axis=1).tolist())
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    docs = splitter.create_documents([text_data])
    vectorstore = FAISS.from_documents(docs, embeddings)
    return {"message": "Documents uploaded and indexed successfully"}

# POST route to transcribe audio
@app.post("/transcribe")
async def transcribe_audio(file: UploadFile = File(...)):
    contents = await file.read()
    with open("uploaded_audio.wav", "wb") as f:
        f.write(contents)
    transcription = transcribe_audio_file("uploaded_audio.wav")
    return transcription

# POST route for chat
@app.post("/chat")
async def chat(user_input: str):
    response = chat_with_llm(user_input)
    return {"response": response}

# POST route for text-to-speech
@app.post("/speak")
async def speak(text: str):
    audio = speak_text(text)
    return FileResponse(audio["audio"])

# POST route for the entire conversation flow (transcription + chat + speech)
@app.post("/converse")
async def converse(user_input: str, file: UploadFile = File(...)):
    # Transcribe
    contents = await file.read()
    with open("uploaded_audio.wav", "wb") as f:
        f.write(contents)
    transcription = transcribe_audio_file("uploaded_audio.wav")

    # Chat
    response = chat_with_llm(user_input)

    # Speak
    audio = speak_text(response)

    return {
        "transcription": transcription,
        "response": response,
        "audio_file": audio["audio"],
        "tts_duration": audio["tts_duration"]
    }

# Example helper functions (implement the logic of transcribing, chatting, speaking, etc.)
def transcribe_audio_file(filename: str):
    # Example transcription logic
    return {"transcription": "This is a transcribed text from the audio file."}

def chat_with_llm(user_input: str):
    # Example LLM response logic
    return f"Response to: {user_input}"

def speak_text(text: str):
    # Example TTS logic (you can replace it with actual TTS service)
    audio_file = "response.mp3"  # Simulate TTS output
    duration = 1.23  # Simulate TTS duration
    return {"audio": audio_file, "tts_duration": duration}'''


In [15]:
!pip install pyngrok


Collecting pyngrok
  Downloading pyngrok-7.2.12-py3-none-any.whl.metadata (9.4 kB)
Downloading pyngrok-7.2.12-py3-none-any.whl (26 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.12


In [39]:
!uvicorn app:app --reload --host 0.0.0.0 --port 8000


[32mINFO[0m:     Will watch for changes in these directories: ['/content']
[32mINFO[0m:     Uvicorn running on [1mhttp://0.0.0.0:8000[0m (Press CTRL+C to quit)
[32mINFO[0m:     Started reloader process [[36m[1m9109[0m] using [36m[1mStatReload[0m
[32mINFO[0m:     Started server process [[36m9115[0m]
[32mINFO[0m:     Waiting for application startup.
[32mINFO[0m:     Application startup complete.
[32mINFO[0m:     Shutting down
[32mINFO[0m:     Waiting for application shutdown.
[32mINFO[0m:     Application shutdown complete.
[32mINFO[0m:     Finished server process [[36m9115[0m]
[32mINFO[0m:     Stopping reloader process [[36m[1m9109[0m]


In [46]:
from pyngrok import ngrok

# Replace 'your_authtoken' with your actual ngrok authtoken
ngrok.set_auth_token("2zpyMLc6xP6vbnQn6tsqSED1Ekm_dubHmm6YoyZNgJm2y4Lc")

# Set up a tunnel to the FastAPI app (which is running on port 8000)
public_url = ngrok.connect(8000)

print(f"FastAPI is live at: {public_url}")


FastAPI is live at: NgrokTunnel: "https://d69d44118400.ngrok-free.app" -> "http://localhost:8000"


In [49]:
import subprocess
from pyngrok import ngrok

# Run FastAPI server in the background using subprocess
subprocess.Popen(["uvicorn", "app:app", "--reload", "--host", "0.0.0.0", "--port", "8000"])

# Set up ngrok to expose the FastAPI server to the public
public_url = ngrok.connect(8000)
print(f"FastAPI is live at: {public_url}")


FastAPI is live at: NgrokTunnel: "https://2e58ad635957.ngrok-free.app" -> "http://localhost:8000"
