In [None]:
import os
import json
from platform import system

import ollama
import gradio as gr
from Tools.scripts.dutree import display



In [None]:
MODEL='llama3.2:latest'

In [None]:
system_message = "You are a helpful assistant for an Airline called FlightAI. Give short, courteous answers, no more than 1 sentence. Always be accurate. If you don't know the answer, say so."

In [None]:
def chat(message, history):
    messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": message}]
    response = ollama.chat(model=MODEL, messages=messages)
    return response.message.content

In [None]:
gr.ChatInterface(fn=chat, type="messages").launch()

In [None]:
ticket_prices = {"london": "$799", "paris": "$899", "tokyo": "$1400", "berlin": "$499"}

def get_ticket_price(destination_city):
    print(f"Tool get_ticket_price called for {destination_city}")
    city = destination_city.lower()
    return ticket_prices.get(city, "Unknown")

In [None]:
get_ticket_price("London")

In [None]:
price_function = {
    "name": "get_ticket_price",
    "description": "Get the price of a return ticket to the destination city. Call this whenever you need to know the ticket price, for example when a customer asks 'How much is a ticket to this city'",
    "parameters": {
        "type": "object",
        "properties": {
            "destination_city": {
                "type": "string",
                "description": "The city that the customer wants to travel to",
            },
        },
        "required": ["destination_city"],
        "additionalProperties": False
    }
}

In [None]:
tools = [{"type": "function", "function": price_function}]

In [None]:
def chat(message, history):
    messages = [
        {"role": "system", "content": system_message}
    ] + history + [
        {"role": "user", "content": message}
    ]
    response = ollama.chat(
        model=MODEL, messages=messages, tools=tools
    )

    if response.get("done_reason") == "stop":
        tool_call_message = response["message"]
        tool_response, city = handle_tool_call(tool_call_message)
        messages.append(tool_call_message)
        messages.append(tool_response)
        response = ollama.chat(
            model=MODEL, messages=messages
        )
    return response.message.content

In [None]:
def handle_tool_call(message):
    tool_call = message.tool_calls[0]
    arguments = tool_call.function.arguments
    city = arguments.get('destination_city')
    price = get_ticket_price(city)
    response = {
        "role": "tool",
        "content": json.dumps({
            "destination_city": city,
            "price": price
        }),
    }

    return response, city

In [None]:
gr.ChatInterface(fn=chat, type="messages").launch()

## let's go multi-modal

In [None]:
from PIL import Image
import torch
from diffusers import StableDiffusionPipeline
import base64
from io import BytesIO
from IPython.display import display
from diffusers import AutoPipelineForText2Image

In [None]:
IMAGE_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"
def artist(city):
    # Initialize the Stable Diffusion pipeline for image generation
    pipe = AutoPipelineForText2Image.from_pretrained(
        IMAGE_MODEL,  # Specify the model to use (e.g., "stabilityai/stable-diffusion-xl-base-1.0")
        torch_dtype=torch.float16,  # Use half-precision (float16) for faster computation and reduced memory usage
        use_safetensors=True,  # Use the safer `.safetensors` format for loading model weights
        variant="fp16",  # Specify the variant of the model optimized for float16 precision
    ).to("cuda")  # Move the pipeline to the GPU for faster computation

    # Enable CPU offloading for faster image generation
    pipe.enable_model_cpu_offload()
    pipe.enable_xformers_memory_efficient_attention() # Enable memory-efficient attention for faster computation and reduced memory usage

    prompt = f"A vibrant pop-art style illustration of {city} vacation, highlighting famous tourists attractions and unique cultural elements, bold colors, comic book aesthetic, high contrast, and dynamic composition."

    # Generate an image using the Stable Diffusion XL pipeline
    image = pipe(
        prompt=prompt,
    ).images[0] # Extract the first image from the output list

    return image

In [None]:
image = artist("Paris")
display(image)

## Audio Version

In [1]:
!ffmpeg -version
!ffprobe -version
!ffplay -version

ffmpeg version 7.1.1-essentials_build-www.gyan.dev Copyright (c) 2000-2025 the FFmpeg developers
built with gcc 14.2.0 (Rev1, Built by MSYS2 project)
configuration: --enable-gpl --enable-version3 --enable-static --disable-w32threads --disable-autodetect --enable-fontconfig --enable-iconv --enable-gnutls --enable-libxml2 --enable-gmp --enable-bzlib --enable-lzma --enable-zlib --enable-libsrt --enable-libssh --enable-libzmq --enable-avisynth --enable-sdl2 --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxvid --enable-libaom --enable-libopenjpeg --enable-libvpx --enable-mediafoundation --enable-libass --enable-libfreetype --enable-libfribidi --enable-libharfbuzz --enable-libvidstab --enable-libvmaf --enable-libzimg --enable-amf --enable-cuda-llvm --enable-cuvid --enable-dxva2 --enable-d3d11va --enable-d3d12va --enable-ffnvcodec --enable-libvpl --enable-nvdec --enable-nvenc --enable-vaapi --enable-libgme --enable-libopenmpt --enable-libopencore-amrwb --enable-libmp3lame --ena

In [2]:
!pip install TTS

Collecting TTS
  Downloading TTS-0.22.0.tar.gz (1.7 MB)
     ---------------------------------------- 0.0/1.7 MB ? eta -:--:--
     ------------ --------------------------- 0.5/1.7 MB 2.4 MB/s eta 0:00:01
     ---------------------------------------- 1.7/1.7 MB 3.6 MB/s eta 0:00:00
  Installing build dependencies: started
  Installing build dependencies: still running...
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting cython>=0.29.30 (from TTS)
  Downloading Cython-3.0.12-cp311-cp311-win_amd64.whl.metadata (3.6 kB)
Collecting soundfile>=0.12.0 (from TTS)
  Downloading soundfile-0.13.1-py2.py3-none-win_amd64.whl.metadata (16 kB)
Collecting librosa>=0.10.0 (from TTS)
  Downloading librosa-0.11.0-py3-none-any.whl.metadata (8.7 kB)
Collecting

In [23]:
from pydub import AudioSegment
from pydub.playback import play
from TTS.api import TTS
import numpy as np
from IPython.display import display
import sounddevice as sd

AUDIO_MODEL="tts_models/en/ljspeech/glow-tts"

def talker(message):
    tts = TTS(model_name=AUDIO_MODEL, progress_bar=True, gpu=True)

    # generate speech as a numpy array
    audio_np = np.array(tts.tts(text=message)[0]).astype(np.float32)

    # convert numpy array to bytes and play
    audio = AudioSegment(
        audio_np.tobytes(),
        frame_rate=tts.synthesizer.output_sample_rate,
        sample_width=audio_np.dtype.itemsize,
        channels=1
    )
    play(audio)

In [24]:
talker("Well, hi there. Well, hi there. Well, hi there. Well, hi there. Well, hi there. Well, hi there. Well, hi there. Well, hi there. Well, hi there.")

 > tts_models/en/ljspeech/glow-tts is already downloaded.
 > vocoder_models/en/ljspeech/multiband-melgan is already downloaded.
 > Using model: glow_tts
 > Setting up Audio Processor...
 | > sample_rate:22050
 | > resample:False
 | > num_mels:80
 | > log_func:np.log10
 | > min_level_db:-100
 | > frame_shift_ms:None
 | > frame_length_ms:None
 | > ref_level_db:0
 | > fft_size:1024
 | > power:1.1
 | > preemphasis:0.0
 | > griffin_lim_iters:60
 | > signal_norm:False
 | > symmetric_norm:True
 | > mel_fmin:50.0
 | > mel_fmax:7600.0
 | > pitch_fmin:1.0
 | > pitch_fmax:640.0
 | > spec_gain:1.0
 | > stft_pad_mode:reflect
 | > max_norm:1.0
 | > clip_norm:True
 | > do_trim_silence:True
 | > trim_db:60
 | > do_sound_norm:False
 | > do_amp_to_db_linear:True
 | > do_amp_to_db_mel:True
 | > do_rms_norm:False
 | > db_level:None
 | > stats_path:None
 | > base:10
 | > hop_length:256
 | > win_length:1024
 > Vocoder Model: multiband_melgan
 > Setting up Audio Processor...
 | > sample_rate:22050
 | > resam

IndexError: invalid index to scalar variable.

In [14]:
# Generate a test audio segment
audio = AudioSegment.silent(duration=1000)  # 1 second of silence (test)
play(audio)

In [15]:
!ffmpeg

ffmpeg version 7.1.1-essentials_build-www.gyan.dev Copyright (c) 2000-2025 the FFmpeg developers
  built with gcc 14.2.0 (Rev1, Built by MSYS2 project)
  configuration: --enable-gpl --enable-version3 --enable-static --disable-w32threads --disable-autodetect --enable-fontconfig --enable-iconv --enable-gnutls --enable-libxml2 --enable-gmp --enable-bzlib --enable-lzma --enable-zlib --enable-libsrt --enable-libssh --enable-libzmq --enable-avisynth --enable-sdl2 --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxvid --enable-libaom --enable-libopenjpeg --enable-libvpx --enable-mediafoundation --enable-libass --enable-libfreetype --enable-libfribidi --enable-libharfbuzz --enable-libvidstab --enable-libvmaf --enable-libzimg --enable-amf --enable-cuda-llvm --enable-cuvid --enable-dxva2 --enable-d3d11va --enable-d3d12va --enable-ffnvcodec --enable-libvpl --enable-nvdec --enable-nvenc --enable-vaapi --enable-libgme --enable-libopenmpt --enable-libopencore-amrwb --enable-libmp3lame -

In [20]:
!pip install tempfile

ERROR: Could not find a version that satisfies the requirement tempfile (from versions: none)
ERROR: No matching distribution found for tempfile
