In [28]:
#installing packages
%pip install groq gradio pillow pytesseract transformers accelerate torch torchvision timm

Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.



In [29]:
%pip install ipywidgets

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [30]:
%pip install jupyterlab_widgets

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [None]:
#Importing libraries
import os,time,json
from typing import Optional
from PIL import Image
import gradio as gr

In [33]:
# OCR Setup
try:
    import pytesseract
    OCR_AVAILABLE = True
except Exception:
    OCR_AVAILABLE = False

In [34]:
# BLIP Setup
from transformers import BlipProcessor,BlipForConditionalGeneration

try:
    blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
    blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
    BLIP_AVAILABLE = True

except Exception as e:
    print("BLIP not available:",e)
    BLIP_AVAILABLE = False


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

In [35]:
%pip install python-dotenv

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [36]:
import os
from dotenv import load_dotenv
from groq import Groq
load_dotenv()

GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# initialize Groq client
client = Groq(api_key=GROQ_API_KEY)

In [37]:
# Groq Setup
from groq import Groq

GROQ_API_KEY = os.getenv("GROQ_API_KEY")
MODEL_NAME = "moonshotai/kimi-k2-instruct-0905"
CONFIDENCE_THRESHOLD = 0.75
ESCALATION_KEYWORDS = ["refund","cancel","legal","sue","angry","frustrated"]

client = Groq(api_key=GROQ_API_KEY)

In [None]:
# Image processing functions
def extract_ocr_text(img:Image.Image) -> str:
   
    if not OCR_AVAILABLE or img is None:
        return ""
    try:
        return pytesseract.image_to_string(img,config="--psm 6").strip()
    except Exception:
        return ""

# Visual caption   
def genrate_caption(img:Image.Image) -> str:

    if not BLIP_AVAILABLE or img is None:
        return ""
    try:
        inputs = blip_processor(images=img, return_tensors="pt")
        out = blip_model.generate(**inputs,max_new_tokens=30)
        caption = blip_processor.decode(out[0],skip_special_tokens = True)
        return caption.strip()
    except Exception as e:
        return f"(BLIP error:{e})"

 #Image Description   
def describe_image(img:Image.Image) -> str:
   
    if img is None:
        return ""
    desc = []

    ocr_text = extract_ocr_text(img)
    if ocr_text:
        desc.append(f"OCR text:{ocr_text}")

    caption = genrate_caption(img)
    if caption:
        desc.append(f"Visual caption:{caption}")

    desc.append(f"Image size:{img.size},mode:{img.mode}")
    return "|".join(desc)

In [None]:
# Groq Confidence Estimation
def simple_confidence(reply:str,user_text:str)->float:
    
    r = (reply or "").lower()
    base = 0.85
    for w in ["might","maybe","could","not sure","uncertain","lets hope","perhaps"]:
        if w in r:
            base -=0.2
    if any (t in (user_text or "").lower() for t in ["error","crash","bug","not working"]):
        base -=0.1
    return max(0.1,min(0.99,base))

In [None]:
# Checking need for escalation.
def should_escalate(confidence:float,user_text:str) ->bool:
    
    if confidence < 0.5:
        return True
    if any(k in(user_text or "").lower() for k in ESCALATION_KEYWORDS):
        return True
    return False

In [None]:
# Forwarding to Groq Model
def call_groq(user_text: str,ocr_text:str = "") ->str:
    system_prompt = (
        "You are a professional customer support assistant."
        "Analyze the user text and OCR/visual text(if present),then give a short helpful reply(1-3 sentences)."
        "If you cannot resolve,ask for required information or say you will escalate to a human agent."
    )
    user_payload = {"user_text":user_text or "" , "ocr_text":ocr_text or""}
    messages =[
        {"role":"system","content":system_prompt},
         {"role":"user","content":json.dumps(user_payload)}
    ]
    try:
        resp = client.chat.completions.create(
            model=MODEL_NAME,
            messages = messages,
            max_tokens=400,
            temperature=0.25
        )
        return resp.choices[0].message.content
    except Exception as e:
        return f"LLM error:{e}"

In [44]:
# Workflow 
history = []
total_queries = 0
total_escalations = 0

def process_query(user_text:str,image:Optional[Image.Image]):
    global history,total_queries,total_escalations

    if not user_text and image is None:
        return history,"Please type a message or upload an image"
    
    image_desc = describe_image(image) if image is not None else ""
    prompt_text = user_text or ""
    if image_desc:
        prompt_text += f"\n\n[Image analysis]:{image_desc}"
    
    reply = call_groq(prompt_text,image_desc)
    confidence = simple_confidence(reply,user_text or "")
    escalate = should_escalate(confidence,user_text or "")

    if escalate:
        reply += "\n\n--\nThis conversation will be routed to a human agent for further assistance"
        total_escalations += 1

    history.append([user_text or "(image-only)",reply])
    total_queries += 1

    meta = (
        f"Confidence:{confidence:.2f} |"
        f"Escalate: {escalate} |"
        f"Queries: {total_queries} |"
        f"Escalations: {total_escalations}"
    )
    return history,meta


In [43]:
#UI Interface

with gr.Blocks(title="MULTIMODAL CUSTOMER AGENT") as demo:
    title="CUSTOMER SUPPORT CHATBOT",
    chat = gr.Chatbot()
    with gr.Row():
        txt = gr.Textbox(label="Question" ,placeholder="Type your message here...",lines = 5)
        img = gr.Image(type="pil",label = "Upload Image")
    send = gr.Button("Send")
    meta_box = gr.Textbox(label="Meta",interactive=False)

    def send_msg(message,image,chat_history):
        new_history, meta = process_query(message,image)
        return new_history,"",meta
    
    send.click(send_msg, inputs=[txt, img, chat], outputs=[chat, txt, meta_box])
    txt.submit(send_msg, inputs=[txt, img, chat], outputs=[chat, txt, meta_box])

demo.launch(share=False)


  chat = gr.Chatbot()


* Running on local URL:  http://127.0.0.1:7872
* To create a public link, set `share=True` in `launch()`.


