In [None]:
import gradio as gr
import json
import numpy as np
from typing import List, Dict

# Assuming these imports work in your environment
from llms.vectorstore import FAISSEmbeddingsSearch
from ocr.paddle_ocr import PaddleOCRutil
from vlm.clip import CLiP

def numpy_to_python(obj):
    if isinstance(obj, np.integer):
        return int(obj)
    elif isinstance(obj, np.floating):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    return obj

def process_image(image, faiss_prompt, clip_prompts):
    text_prompts = {
        'FAISSPrompt': [faiss_prompt],
        'CliPPrompts': clip_prompts.split(','),
    }

    final_json_output = {
        'text_score': None,
        'image_score': None,
    }

    # OCR processing
    ocr = PaddleOCRutil(image)
    ocr_results, _ = ocr.parse_ocr_results()
    
    # CLIP processing
    vlm = CLiP()
    clip_res = vlm.inference(image, text_prompts['CliPPrompts'])
    
    clip_output = 'CLIP Matches:\n'
    for prompt, score in clip_res:
        clip_output += f"{prompt}: {numpy_to_python(score):.4f}\n"
    
    final_json_output['image_score'] = numpy_to_python(clip_res[0][1]) if clip_res else None

    # FAISS processing
    vems = FAISSEmbeddingsSearch()
    corpus = list(ocr_results.keys())  
    corpus_embeddings = vems.get_embeddings(corpus)
    vems.create_faiss_index(corpus_embeddings)

    query = text_prompts['FAISSPrompt'][0]
    dist, ind = vems.get_query_result(query, k=1)

    faiss_output = 'FAISS Results:\n'
    for i, idx in enumerate(ind[0]):
        faiss_output += f"Index: {idx}, Text: {corpus[idx]}, Distance: {numpy_to_python(dist[0][i])}\n"
    
    final_json_output['text_score'] = numpy_to_python(dist[0][0]) if len(dist) > 0 and len(dist[0]) > 0 else None

    # Save FAISS index
    vems.save_index("faiss_index.bin")

    final_output = json.dumps(final_json_output, indent=2, default=numpy_to_python)

    return clip_output, faiss_output, final_output

In [None]:
## STarting a gradio interface

iface = gr.Interface(
    fn=process_image,
    inputs=[
        gr.Image(type="filepath", label="Upload Image"),
        gr.Textbox(lines=1, label="FAISS Prompt"),
        gr.Textbox(lines=2, label="CLIP Prompts (comma-separated)")
    ],
    outputs=[
        gr.Textbox(label="CLIP Results"),
        gr.Textbox(label="FAISS Results"),
        gr.JSON(label="Final Output")
    ],
    title="IIT Roorkee Assignment Demo",
    description="Upload an image"
)

iface.launch()