In [1]:
import sys
sys.path.append("../src")

import ollama
from torch import load

import gradio as gr
import time
from functions import *

In [2]:
# Pull the llama3.2-vision model from Ollama to ensure it is available locally
ollama.pull('llama3.2-vision')

ProgressResponse(status='success', completed=None, total=None, digest=None)

In [3]:
# Load the pre-processed text and image content from the JSON files
text_content_list = load_from_json('../data/processed_files/JSON/text_content.json')
image_content_list = load_from_json('../data/processed_files/JSON/image_content.json')

# Load the pre-computed text and image embeddings from the .pt files
text_embeddings = load('../data/processed_files/embeddings/text_embeddings.pt', weights_only=True)
image_embeddings = load('../data/processed_files/embeddings/image_embeddings.pt', weights_only=True)

In [4]:
# Define the main chat function to handle user messages and stream responses from Ollama
def stream_chat(message, history):

    # Retrieve relevant text and image context based on the user's message
    text_results, image_results = context_retrieval(message["text"], text_embeddings, image_embeddings, text_content_list, image_content_list)

    # Construct a detailed prompt including the retrieved context for the LLM
    prompt = construct_prompt(message["text"], text_results, image_results)
    
    # Prepare the list of image paths, using only the top result as the model supports single-image input
    images = [image_results[0]["image_path"]] if image_results else []
    
    # Append the formatted user prompt and image to the conversation history
    history.append({"role": "user", "content": prompt, "images": images})
    
    # Initialize a streaming chat session with the Ollama model using the full history
    stream = ollama.chat(
        model='llama3.2-vision',
        messages=history,  # Full chat history including the current user message
        stream=True,
        options={
        'temperature': 0.6,          # Slight randomness helps break loops
        'repeat_penalty': 1.15,      # <<<< THIS IS THE KEY FIX
        'top_k': 40,
        'top_p': 0.9
        }
    )
    
    response_text = ""
    for chunk in stream:
        content = chunk['message']['content']
        response_text += content
        yield response_text  # Send the response incrementally to the UI

    # Append the assistant's complete response to the conversation history
    history.append({"role": "assistant", "content": response_text})


In [7]:
# Create and launch the Gradio ChatInterface for the multimodal chatbot
gr.ChatInterface(
    fn=stream_chat,  # The function handling the chat
    type="messages",  # Using "messages" to enable chat-style conversation
    examples=[{"text": "According to this resume, what is Leo-Paul looking for?"}, 
              {"text": "What are the key projects Leo-Paul participated in?"},
              {"text": "What are Leo-Paul's hobbies?"},
              {"text": "How would you describe Leo-Paul based on the picture in the CV?"}],  # Example inputs
    multimodal=True,
).launch()

* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.


