In [None]:
!pip install gradio_client paddleocr paddlepaddle deepface transformers TTS openai-whisper pyngrok fastapi[all] nest_asyncio -q
!curl -s https://ngrok-agent.s3.amazonaws.com/ngrok.asc | sudo tee /etc/apt/trusted.gpg.d/ngrok.asc >/dev/null && echo "deb https://ngrok-agent.s3.amazonaws.com buster main" | sudo tee /etc/apt/sources.list.d/ngrok.list && sudo apt update && sudo apt install ngrok
!ngrok authtoken 2NqtkyBMF0KF99ofEU1fGz1pCJS_3XehxpoU47JGBiqY6tV7M
!sudo apt update && sudo apt upgrade && sudo apt install ffmpeg
!pip install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu

deb https://ngrok-agent.s3.amazonaws.com buster main
Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Hit:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:3 http://security.ubuntu.com/ubuntu jammy-security InRelease
Get:4 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [495 kB]
Hit:5 https://ngrok-agent.s3.amazonaws.com buster InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:8 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/ubuntu jammy InRelease
Hit:9 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:10 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:11 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:12 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Fetched 497 kB in 2s (234 kB/s)
Reading 

In [None]:
import whisper
from fastapi import FastAPI, UploadFile, File, HTTPException, Body
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import nest_asyncio
from pyngrok import ngrok
import uuid
import os
import uvicorn
import torch
from TTS.api import TTS
from fastapi.responses import FileResponse
import asyncio
import gc
from transformers import AutoTokenizer, AutoModelForTokenClassification
from deepface import DeepFace
import paddleocr
from gradio_client import Client

device = 'mps' if torch.backends.mps.is_available() else 'cpu'

client = Client("https://huggingfaceh4-falcon-chat.hf.space/", serialize=False)
ocr_reader = paddleocr.PaddleOCR(lang="hi")
voice_model = whisper.load_model("large-v2").to(device)
tts = TTS('tts_models/en/jenny/jenny').to(device)
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/IndicNER")
model = AutoModelForTokenClassification.from_pretrained("ai4bharat/IndicNER")
quantized_ner_model = torch.ao.quantization.quantize_dynamic(model,{torch.nn.Linear},dtype=torch.qint8)
del model
port=8888
gc.collect()

100%|██████████████████████████████████████| 2.87G/2.87G [00:24<00:00, 127MiB/s]


In [None]:
app = FastAPI(title="SIH 2023 Backend")

app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)

class TTSNER(BaseModel):
    text: str
    emotion: str = "Cheerful & Professional"

class FaceResponse(BaseModel):
    prediction: bool

class FaceDetect(BaseModel):
    prediction: str

def remove(known_image_path,test_image_path):
    os.remove(known_image_path)
    os.remove(test_image_path)

@app.get("/")
async def home():
    return "SIH - LICHT DEN CODE"

@app.post("/face-detect/")
async def face_detect(img: UploadFile = File(...)):
    img_path = f"{uuid.uuid4()}.jpg"
    with open(img_path, "wb") as known_image_file:
        known_image_file.write(img.file.read())
    result = DeepFace.extract_faces(img_path,enforce_detection=False)[0].get('confidence')
    os.remove(img_path)
    if result>5:
        return FaceDetect(prediction="Done!!!")
    else:
        return FaceDetect(prediction="Face could not be detected. Please confirm that the picture is a face photo.")

@app.post("/face-match/")
async def face_match(known_face: UploadFile = File(...), test_face: UploadFile = File(...)):
    try:
        known_image_path = f"{uuid.uuid4()}.jpg"
        test_image_path = f"{uuid.uuid4()}.jpg"
        with open(known_image_path, "wb") as known_image_file:
            known_image_file.write(known_face.file.read())
        with open(test_image_path, "wb") as test_image_file:
            test_image_file.write(test_face.file.read())
        result = DeepFace.verify(known_image_path, test_image_path, model_name='Facenet512', distance_metric='euclidean_l2').get('verified')
        remove(known_image_path,test_image_path)
        return FaceResponse(prediction=result)

    except Exception as e:
        remove(known_image_path,test_image_path)
        if str(e)=='''Face could not be detected. Please confirm that the picture is a face photo or consider to set enforce_detection param to False.''':
            raise HTTPException(status_code=399, detail="Face could not be detected. Please confirm that the picture is a face photo.")
        else:
            raise HTTPException(status_code=500, detail=str(e))

@app.post("/transcribe/")
async def transcribe_audio(file: UploadFile = File(...)):
    audio_path = f"{uuid.uuid4()}.webm"
    with open(audio_path, "wb") as f:
        f.write(await file.read())
    result = voice_model.transcribe(whisper.pad_or_trim(whisper.load_audio(audio_path)))["text"]
    os.remove(audio_path)
    return {"text": result}

@app.post("/ocr/")
async def OCR(file: UploadFile = File(...)):
    pic_path = f"{uuid.uuid4()}.jpg"
    with open(pic_path, "wb") as f:
        f.write(await file.read())
    result = ' '.join([word[1][0] for line in ocr_reader.ocr(pic_path) for word in line])
    os.remove(pic_path)
    return {"text": result}

@app.post("/ner/")
async def get_ner_endpoint(request: TTSNER = Body(...)):
    sentence = request.text.strip()
    tok_sentence = tokenizer(sentence, return_tensors='pt')
    with torch.no_grad():
        logits = quantized_ner_model(**tok_sentence).logits.argmax(-1)
        predicted_tokens_classes = [
            quantized_ner_model.config.id2label[t.item()] for t in logits[0]]

        predicted_labels = []

        previous_token_id = 0
        word_ids = tok_sentence.word_ids()
        for word_index in range(len(word_ids)):
            if word_ids[word_index] == None:
                previous_token_id = word_ids[word_index]
            elif word_ids[word_index] == previous_token_id:
                previous_token_id = word_ids[word_index]
            else:
                predicted_labels.append(predicted_tokens_classes[word_index])
                previous_token_id = word_ids[word_index]

    return {"LOC": [word for word, label in zip(sentence.split(' '), predicted_labels) if "LOC" in label]}

@app.post("/coqui-tts/")
async def coqui_tts(request: TTSNER = Body(...)):
    out = f"{uuid.uuid4()}.ogg"
    async def remove():
        loop = asyncio.get_event_loop()
        await loop.run_in_executor(None, lambda: os.remove(out))
    tts.tts_to_file(request.text, file_path=out, emotion=request.emotion)
    return FileResponse(out,headers={"Content-Disposition":f"attachment; filename={out}"},background=remove)

@app.post("/chat/")
async def falcon(request: TTSNER = Body(...)):
  result = client.predict(
  request.text, # str in 'Type an input and press Enter' Textbox component
  "", # str (filepath to JSON file) in 'parameter_9' Chatbot component
  "The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, and a human user, called User. In the following interactions, User and Falcon will converse in natural language, and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. Falcon was built by the Technology Innovation Institute in Abu Dhabi. Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied with. It knows a lot, and always tells the truth. The conversation begins.", # str in 'Instructions' Textbox component
  0.7, # int | float (numeric value between 0.1 and 2.0) in 'Temperature' Slider component
  0.8, # int | float (numeric value between 0.1 and 0.99) in 'p (nucleus sampling)' Slider component
  fn_index=1)[0][1]
  return {"text":result}

if __name__=="__main__":
    ngrok_tunnel = ngrok.connect(port)
    print('Public URL:', ngrok_tunnel.public_url)
    nest_asyncio.apply()
    uvicorn.run(app,port=port)