# In this notebook, we will use and showcase the capabilities of Gemma 3n running locally via Ollama.

**Let's try to use the model as much as possible without using GPU. This shows that any computer with few resources can run our code.**

In [1]:
!pip install gradio
!pip install ollama

Collecting gradio
  Downloading gradio-5.35.0-py3-none-any.whl.metadata (16 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.14-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.6.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.10.4 (from gradio)
  Downloading gradio_client-1.10.4-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.12.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (from gradio)
  Downloading safehttpx-0.1.6-py3-none-any.whl.metadata (4.2 kB)
Collecting semantic-version~=2.0 (from gradio)
  Downloading semantic_version-2.10.0-py2.py3-none-any.whl.meta

In [2]:
%%time
import os
import sys
import psutil
import subprocess
import logging
import warnings
import gradio as gr
import ollama
from PIL import Image
import io
import base64
import torch
from transformers import AutoProcessor, AutoModelForImageTextToText

2025-06-29 00:30:21.740242: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751157021.945056      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751157022.003219      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


CPU times: user 23 s, sys: 3.55 s, total: 26.6 s
Wall time: 36 s


In [3]:
# Cell: Logger Configuration
# Create a logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# Create file handler and set level to debug
file_handler = logging.FileHandler('app.log')
file_handler.setLevel(logging.INFO)

# Create console handler and set level to error
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.ERROR)

# Create formatters
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# Add formatters to handlers
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)

# Add handlers to logger
logger.addHandler(file_handler)
logger.addHandler(console_handler)

In [4]:
# ==================================================
# _/$\_\%/_/&\_\@/_/$\_\%/_/&\_\@/_/$\_\%/_/&\_\@/_
# ==================================================
#  **********    System Information   *************
# ==================================================
# _/$\_\%/_/&\_\@/_/$\_\%/_/&\_\@/_/$\_\%/_/&\_\@/_
# ==================================================

import os
import sys
import torch
import psutil  

def print_system_info():
    print("System Information:")
    print(f"• Python version: {sys.version}")
    print(f"• Current working directory: {os.getcwd()}")
    print(f"• PyTorch version: {torch.__version__}")

    # Check GPU availability and details
    if torch.cuda.is_available():
        gpu_info = {
            "CUDA Available": torch.cuda.is_available(),
            "CUDA Device Count": torch.cuda.device_count(),
            "Current CUDA Device": torch.cuda.current_device(),
            "Device Name": torch.cuda.get_device_name(torch.cuda.current_device()),
            "Memory Allocated (MB)": round(torch.cuda.memory_allocated(0) / 1024**2, 2),
            "Memory Reserved (MB)": round(torch.cuda.memory_reserved(0) / 1024**2, 2),
        }
        
        print("\n⚡ GPU Detected:")
        for key, value in gpu_info.items():
            print(f"  • {key}: {value}")
    else:
        print("\n😭 No GPU detected. Running on CPU only.")

    # Memory information
    ram = psutil.virtual_memory()
    print("\n🐘 System Memory:")
    print(f"  • Total RAM: {round(ram.total / 1024**2, 2)} MB")
    print(f"  • Available RAM: {round(ram.available / 1024**2, 2)} MB")
    print(f"  • Used RAM: {round(ram.used / 1024**2, 2)} MB")
    print(f"  • RAM Percentage: {ram.percent}% used")

print_system_info()

System Information:
• Python version: 3.11.11 (main, Dec  4 2024, 08:55:07) [GCC 11.4.0]
• Current working directory: /kaggle/working
• PyTorch version: 2.6.0+cu124

😭 No GPU detected. Running on CPU only.

🐘 System Memory:
  • Total RAM: 32102.9 MB
  • Available RAM: 30344.14 MB
  • Used RAM: 1301.51 MB
  • RAM Percentage: 5.5% used


In [5]:
%%time
!curl -fsSL https://ollama.com/install.sh | sh

>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
############################################################################################# 100.0%
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
CPU times: user 877 ms, sys: 196 ms, total: 1.07 s
Wall time: 41 s


In [6]:
%%time
process = subprocess.Popen("ollama serve", shell=True)

CPU times: user 717 µs, sys: 112 µs, total: 829 µs
Wall time: 1.56 ms


In [7]:
%%time
# !ollama pull gemma3n:e4b 
!ollama pull gemma3n:e2b


Your new public key is: 

ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHUVDOrKH9rykCESAo7u5C9g1ZSVJu//bQbvs94ehBlQ

[GIN] 2025/06/29 - 00:31:20 | 200 |     151.695µs |       127.0.0.1 | HEAD     "/"


time=2025-06-29T00:31:20.463Z level=INFO source=routes.go:1235 msg="server config" env="map[CUDA_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:4096 OLLAMA_DEBUG:INFO OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://127.0.0.1:11434 OLLAMA_INTEL_GPU:false OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/root/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:0 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_SCHED_SPREAD:false ROCR_VISIBLE_DEVICE

[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠴ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠦ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠧ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠇ [K[?25h[?2026l

time=2025-06-29T00:31:21.331Z level=INFO source=download.go:177 msg="downloading 3839a254cf2d in 16 351 MB part(s)"


[?2026h[?25l[1Gpulling manifest ⠏ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest [K
pulling 3839a254cf2d:   0% ▕                  ▏ 178 KB/5.6 GB                  [K[?25h[?2026l[?2026h[?25l[A[1Gpulling manifest [K
pulling 3839a254cf2d:   0% ▕                  ▏  11 MB/5.6 GB                  [K[?25h[?2026l[?2026h[?25l[A[1Gpulling manifest [K
pulling 3839a254cf2d:   1% ▕                  ▏  43 MB/5.6 GB                  [K[?25h[?2026l[?2026h[?25l[A[1Gpulling manifest [K
pulling 3839a254cf2d:   1% ▕                  ▏  79 MB/5.6 GB                  [K[?25h[?2026l[?2026h[?25l[A[1Gpulling manifest [K
pulling 3839a254cf2d:   2% ▕                  ▏  95 MB/5.6 GB                  [K[?25h[?2026l[?2026h[?25l[A[1Gpulling manifest [K
pulling 3839a254cf2d:   2% ▕                  ▏ 131 MB/5.6 GB                  [K[?25h[?2026l[?2026h[?25l[A[1Gpulling manifest [K
pulling 3839a254cf2d:   3% ▕                  ▏ 162 MB/5.6 GB           

time=2025-06-29T00:31:48.626Z level=INFO source=download.go:177 msg="downloading e0a42594d802 in 1 358 B part(s)"


[?2026h[?25l[A[1Gpulling manifest [K
pulling 3839a254cf2d: 100% ▕██████████████████▏ 5.6 GB                         [K[?25h[?2026l[?2026h[?25l[A[1Gpulling manifest [K
pulling 3839a254cf2d: 100% ▕██████████████████▏ 5.6 GB                         [K
pulling e0a42594d802: 100% ▕██████████████████▏  358 B                         [K[?25h[?2026l[?2026h[?25l[A[A[1Gpulling manifest [K
pulling 3839a254cf2d: 100% ▕██████████████████▏ 5.6 GB                         [K
pulling e0a42594d802: 100% ▕██████████████████▏  358 B                         [K[?25h[?2026l[?2026h[?25l[A[A[1Gpulling manifest [K
pulling 3839a254cf2d: 100% ▕██████████████████▏ 5.6 GB                         [K
pulling e0a42594d802: 100% ▕██████████████████▏  358 B                         [K[?25h[?2026l[?2026h[?25l[A[A[1Gpulling manifest [K
pulling 3839a254cf2d: 100% ▕██████████████████▏ 5.6 GB                         [K
pulling e0a42594d802: 100% ▕██████████████████▏  358 B  

time=2025-06-29T00:31:49.919Z level=INFO source=download.go:177 msg="downloading 1adbfec9dcf0 in 1 8.4 KB part(s)"


[?2026h[?25l[A[A[1Gpulling manifest [K
pulling 3839a254cf2d: 100% ▕██████████████████▏ 5.6 GB                         [K
pulling e0a42594d802: 100% ▕██████████████████▏  358 B                         [K[?25h[?2026l[?2026h[?25l[A[A[1Gpulling manifest [K
pulling 3839a254cf2d: 100% ▕██████████████████▏ 5.6 GB                         [K
pulling e0a42594d802: 100% ▕██████████████████▏  358 B                         [K
pulling 1adbfec9dcf0: 100% ▕██████████████████▏ 8.4 KB                         [K[?25h[?2026l[?2026h[?25l[A[A[A[1Gpulling manifest [K
pulling 3839a254cf2d: 100% ▕██████████████████▏ 5.6 GB                         [K
pulling e0a42594d802: 100% ▕██████████████████▏  358 B                         [K
pulling 1adbfec9dcf0: 100% ▕██████████████████▏ 8.4 KB                         [K[?25h[?2026l[?2026h[?25l[A[A[A[1Gpulling manifest [K
pulling 3839a254cf2d: 100% ▕██████████████████▏ 5.6 GB                         [K
pulling e0a42594d8

time=2025-06-29T00:31:51.232Z level=INFO source=download.go:177 msg="downloading a3e66f51d60b in 1 417 B part(s)"


[?2026h[?25l[A[A[A[1Gpulling manifest [K
pulling 3839a254cf2d: 100% ▕██████████████████▏ 5.6 GB                         [K
pulling e0a42594d802: 100% ▕██████████████████▏  358 B                         [K
pulling 1adbfec9dcf0: 100% ▕██████████████████▏ 8.4 KB                         [K[?25h[?2026l[?2026h[?25l[A[A[A[1Gpulling manifest [K
pulling 3839a254cf2d: 100% ▕██████████████████▏ 5.6 GB                         [K
pulling e0a42594d802: 100% ▕██████████████████▏  358 B                         [K
pulling 1adbfec9dcf0: 100% ▕██████████████████▏ 8.4 KB                         [K
pulling a3e66f51d60b: 100% ▕██████████████████▏  417 B                         [K[?25h[?2026l[?2026h[?25l[A[A[A[A[1Gpulling manifest [K
pulling 3839a254cf2d: 100% ▕██████████████████▏ 5.6 GB                         [K
pulling e0a42594d802: 100% ▕██████████████████▏  358 B                         [K
pulling 1adbfec9dcf0: 100% ▕██████████████████▏ 8.4 KB                

In [8]:
# Função para interagir com o modelo Gemma 3n
def ollama_chat(history, question):
    try:
        # Crafting a prompt to instruct the model
        my_prompt = f"Question: {question}\n"
        
        # Use Ollama chat with the question only
        response = ollama.chat(model='gemma3n:e2b', messages=[
            {
                'role': 'user',
                'content': my_prompt
            },
        ])
        
        answer = response['message']['content']
        # Atualiza o histórico do chat com a nova interação
        history.append(f"<div style='color: blue;'>You: {question}</div>")
        history.append(f"<div style='color: green;'>Gemma 3n: {answer}</div>")
        history_text = "<br>".join(history)
        return history_text, answer
    except Exception as e:
        return history, f"Error occurred: {str(e)}"

# Criação da interface Gradio
with gr.Blocks() as demo:
    history = gr.State([])
    with gr.Column():
        gr.Markdown("# Welcome to the Cruzeta Chat Portal")
        question_input = gr.Textbox(lines=2, label="Ask your question")
        chat_output = gr.HTML(label="Chat History")
        submit_button = gr.Button("Submit")
    
    # Conectar inputs e outputs
    submit_button.click(
        ollama_chat,
        inputs=[history, question_input],
        outputs=[chat_output, gr.Textbox(label="Response", placeholder="Model response will appear here...")]
    )

# Lançar a interface Gradio
demo.launch()

* Running on local URL:  http://127.0.0.1:7860
It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

* Running on public URL: https://270b41f0f8e023dc89.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [9]:
# Function to interact with the Gemma model
def chat_with_gemma(history, image_url, question):
    try:
        # Craft the message content with an image URL
        if image_url:
            message_content = f"You have received an image at the URL: {image_url}. Question: {question or 'Please describe the details of the image.'}"
            
            # Use the Ollama chat with the image and question
            response = ollama.chat(model='gemma3n:e2b', messages=[
                {
                    "role": "user",
                    "content": message_content
                },
            ])
            answer = response['message']['content']
        else:
            response = ollama.chat(model='gemma3n:e2b', messages=[
                {
                    "role": "user",
                    "content": f"Question: {question}"
                }
            ])
            answer = response['message']['content']

        # Update history with the new conversation turn, using HTML for styling
        history.append(f"<div style='color: blue;'>User: {question}</div>")
        history.append(f"<div style='color: green;'>Assistant: {answer}</div>")
        history_text = "<br>".join(history)
        return history_text, answer
    except Exception as e:
        return history, f"Error occurred: {str(e)}"

# Create a Gradio interface using Blocks
with gr.Blocks() as demo:
    history = gr.State([])
    with gr.Column():
        gr.Markdown("# Welcome to Cruzeta Analysis Portal")
        image_url_input = gr.Textbox(lines=1, label="Enter Image URL")  # Changed to accept a URL
        question_input = gr.Textbox(lines=2, label="Ask a question about the image")
        chat_output = gr.HTML(label="Chat History")
        submit_button = gr.Button("Submit")

    # Connect the inputs and outputs
    submit_button.click(
        chat_with_gemma,
        inputs=[history, image_url_input, question_input],
        outputs=[chat_output, gr.Textbox(label="Response", placeholder="Model response will appear here...")]
    )

# Launch the Gradio interface
demo.launch()

* Running on local URL:  http://127.0.0.1:7861
It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

* Running on public URL: https://ae93dd63f84f519515.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


