# Week 3: Exploring Multi-modalities
Author: Pat Pascual - AI First Season 2

In [1]:
# Update OpenAI to latest version
!pip install --upgrade openai

Collecting openai
  Downloading openai-1.82.1-py3-none-any.whl.metadata (25 kB)
Downloading openai-1.82.1-py3-none-any.whl (720 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m720.5/720.5 kB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.81.0
    Uninstalling openai-1.81.0:
      Successfully uninstalled openai-1.81.0
Successfully installed openai-1.82.1


In [2]:
import openai
openai.api_key = ""

## Mode 1 - **Text-to-Text**: Text Chat with System Prompting

In [3]:
model = "gpt-4o"
struct = [{"role": "system", "content": "You are a helpful AI tutor."}]

print("\U0001F9E0 Text Chat Assistant is ready! Type 'exit' or 'quit' to stop.\n")
while True:
    user_message = input("User: ")
    if user_message.lower() in ["exit", "quit"]:
        print("\U0001F44B Goodbye!")
        break
    struct.append({"role": "user", "content": user_message})
    response = openai.chat.completions.create(model=model, messages=struct)
    assistant_reply = response.choices[0].message.content.strip()
    print("Assistant:", assistant_reply)
    struct.append({"role": "assistant", "content": assistant_reply})


🧠 Text Chat Assistant is ready! Type 'exit' or 'quit' to stop.

User: hi musta ka sinoka
Assistant: Hello! Ayos lang ako, salamat sa pagtatanong. Ako ay isang AI tutor na nandito para tumulong sa inyong mga tanong at magbigay ng impormasyon. Ano ang maitutulong ko sa iyo ngayon?
User: exit
👋 Goodbye!


## Mode 2 - **Text-to-Image**: AI Image Generation with DALL·E 3

In [4]:
print("\n🎨 AI Image Generator - Type your prompt or 'exit' to quit")
while True:
    prompt = input("Prompt: ")
    if prompt.lower() in ["exit", "quit"]:
        print("👋 Goodbye!")
        break
    response = openai.images.generate(
        model="dall-e-3",
        prompt=prompt,
        n=1,
        size="1024x1024"
    )
    img_url = response.data[0].url
    print("Generated Image URL:", img_url)



🎨 AI Image Generator - Type your prompt or 'exit' to quit
Prompt: generate me a cat with a helmet
Generated Image URL: https://oaidalleapiprodscus.blob.core.windows.net/private/org-ZT0B8Nrfz4Ad5tARnyWhxXgH/user-VPGkpIt7AcG0HHtVt1e4aeIx/img-7YJck1Qh8EdN6K7mYb8exrgt.png?st=2025-05-30T23%3A39%3A12Z&se=2025-05-31T01%3A39%3A12Z&sp=r&sv=2024-08-04&sr=b&rscd=inline&rsct=image/png&skoid=475fd488-6c59-44a5-9aa9-31c4db451bea&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2025-05-30T03%3A14%3A15Z&ske=2025-05-31T03%3A14%3A15Z&sks=b&skv=2024-08-04&sig=tmhKoQZbG/WzyBL3IQ0JO89NOT5gqg2CZa2Nuh1P9Kk%3D
Prompt: exit
👋 Goodbye!


## Mode 3 - **Image-to-Text**: Image Interpretation with GPT-4o

In [5]:
import base64
from google.colab import files, output, widgets
import openai
import ipywidgets as ipy
from IPython.display import display, clear_output

# UI elements
upload_btn = ipy.FileUpload(accept='image/*', multiple=False)
analyze_btn = ipy.Button(description='Analyze Image', button_style='success')
clear_btn = ipy.Button(description='Clear', button_style='danger')
output_area = ipy.Output()

# Display widgets
display(ipy.VBox([upload_btn, ipy.HBox([analyze_btn, clear_btn]), output_area]))

# Analyze image handler
def analyze_image(btn):
    with output_area:
        clear_output()
        if not upload_btn.value:
            print("⚠️ Please upload an image first.")
            return

        # Show loading indicator
        print("⏳ Analyzing image, please wait...")

    # Read and encode image
    filename = next(iter(upload_btn.value))
    content = upload_btn.value[filename]['content']
    b64_img = base64.b64encode(content).decode('utf-8')

    try:
        response = openai.chat.completions.create(
            model="gpt-4o",
            messages=[{
                "role": "user",
                "content": [
                    {"type": "text", "text": "What do you see in this image?"},
                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64_img}"}}
                ]
            }]
        )
        with output_area:
            clear_output()
            print("🧠 GPT-4o Analysis:", response.choices[0].message.content.strip())
    except Exception as e:
        with output_area:
            clear_output()
            print("❌ Error during analysis:", e)

# Clear handler
def clear_all(btn):
    upload_btn.value.clear()
    upload_btn._counter = 0  # reset internal counter
    with output_area:
        clear_output()
        print("🧹 Cleared. Upload a new image to try again.")

# Bind buttons
analyze_btn.on_click(analyze_image)
clear_btn.on_click(clear_all)


VBox(children=(FileUpload(value={}, accept='image/*', description='Upload'), HBox(children=(Button(button_styl…

In [14]:
import base64
from google.colab import files, output, widgets
import openai
import ipywidgets as ipy
from IPython.display import display, clear_output

# UI elements
upload_btn = ipy.FileUpload(accept='image/*', multiple=False)
prompt_input = ipy.Textarea(
    value="What do you see in this image?",
    description='Prompt:',
    layout=ipy.Layout(width='100%', height='80px')
)
analyze_btn = ipy.Button(description='Analyze Image', button_style='success')
clear_btn = ipy.Button(description='Clear', button_style='danger')
output_area = ipy.Output()

# Display widgets
display(ipy.VBox([
    upload_btn,
    prompt_input,
    ipy.HBox([analyze_btn, clear_btn]),
    output_area
]))

# Analyze image handler
def analyze_image(btn):
    with output_area:
        clear_output()
        if not upload_btn.value:
            print("⚠️ Please upload an image first.")
            return

        print("⏳ Analyzing image, please wait...")

    # Read and encode image
    filename = next(iter(upload_btn.value))
    content = upload_btn.value[filename]['content']
    b64_img = base64.b64encode(content).decode('utf-8')
    user_prompt = prompt_input.value.strip() or "What do you see in this image?"

    try:
        response = openai.chat.completions.create(
            model="gpt-4o",
            messages=[{
                "role": "user",
                "content": [
                    {"type": "text", "text": user_prompt},
                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64_img}"}}
                ]
            }]
        )
        with output_area:
            clear_output()
            print("🧠 GPT-4o Analysis:", response.choices[0].message.content.strip())
    except Exception as e:
        with output_area:
            clear_output()
            print("❌ Error during analysis:", e)

# Clear handler
def clear_all(btn):
    upload_btn.value.clear()
    upload_btn._counter = 0
    prompt_input.value = "What do you see in this image?"
    with output_area:
        clear_output()
        print("🧹 Cleared. Upload a new image to try again.")

# Bind buttons
analyze_btn.on_click(analyze_image)
clear_btn.on_click(clear_all)


VBox(children=(FileUpload(value={}, accept='image/*', description='Upload'), Textarea(value='What do you see i…

In [6]:
import base64
from google.colab import files, output, widgets
import openai
import ipywidgets as ipy
from IPython.display import display, clear_output

# Setup UI elements
upload_btn = ipy.FileUpload(accept='image/*', multiple=False)
prompt_box = ipy.Text(
    value='What do you see in this image?',
    placeholder='Enter your prompt about the image...',
    description='Prompt:',
    layout=ipy.Layout(width='100%')
)
analyze_btn = ipy.Button(description='Analyze Image', button_style='success')
clear_btn = ipy.Button(description='Clear', button_style='danger')
output_area = ipy.Output()

# Display widgets
display(ipy.VBox([upload_btn, prompt_box, ipy.HBox([analyze_btn, clear_btn]), output_area]))

# Analyze image handler
def analyze_image(btn):
    with output_area:
        clear_output()
        if not upload_btn.value:
            print("⚠️ Please upload an image first.")
            return

        # Show loading message
        print("⏳ Analyzing image, please wait...")

    # Read image and encode to base64
    filename = next(iter(upload_btn.value))
    content = upload_btn.value[filename]['content']
    b64_img = base64.b64encode(content).decode('utf-8')

    try:
        response = openai.chat.completions.create(
            model="gpt-4o",
            messages=[{
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt_box.value},
                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64_img}"}}
                ]
            }]
        )
        with output_area:
            clear_output()
            print("🧠 GPT-4o Analysis:", response.choices[0].message.content.strip())
    except Exception as e:
        with output_area:
            clear_output()
            print("❌ Error during analysis:", e)

# Clear handler
def clear_all(btn):
    upload_btn.value.clear()
    upload_btn._counter = 0  # reset internal counter
    prompt_box.value = 'What do you see in this image?'
    with output_area:
        clear_output()
        print("🧹 Cleared. Upload a new image to try again.")

# Bind buttons
analyze_btn.on_click(analyze_image)
clear_btn.on_click(clear_all)


VBox(children=(FileUpload(value={}, accept='image/*', description='Upload'), Text(value='What do you see in th…

## Mode 4 - **Audio-to-Text**: Audio Transcription with Whisper

In [8]:
import os, base64, openai
import ipywidgets as ipy
from IPython.display import display, clear_output, Audio, HTML, Javascript
from google.colab import output as colab_output

# Widgets
upload_btn = ipy.FileUpload(accept='.mp3,.m4a,.wav,.flac', multiple=False)
record_btn = ipy.Button(description='🎙️ Record', button_style='primary')
stop_btn = ipy.Button(description='⏹️ Stop', button_style='warning', disabled=True)
transcribe_btn = ipy.Button(description='📝 Transcribe', button_style='success', disabled=True)
clear_btn = ipy.Button(description='🧹 Clear', button_style='danger')
output_area = ipy.Output()

# Display UI
display(ipy.VBox([
    ipy.Label("🔊 Audio Transcription - Upload or Record"),
    upload_btn,
    ipy.HBox([record_btn, stop_btn, transcribe_btn, clear_btn]),
    output_area
]))

# Audio waveform recorder JavaScript
RECORD_HTML = """
<canvas id="waveform" width="300" height="60" style="background: #f0f0f0; border-radius: 5px;"></canvas>
<script>
const canvas = document.getElementById('waveform');
const ctx = canvas.getContext('2d');
let mediaRecorder, audioChunks = [], audioStream, analyser, dataArray, animationId;

async function startRecording() {
  audioStream = await navigator.mediaDevices.getUserMedia({ audio: true });
  const audioCtx = new AudioContext();
  const source = audioCtx.createMediaStreamSource(audioStream);
  analyser = audioCtx.createAnalyser();
  source.connect(analyser);
  analyser.fftSize = 256;
  const bufferLength = analyser.frequencyBinCount;
  dataArray = new Uint8Array(bufferLength);

  function draw() {
    animationId = requestAnimationFrame(draw);
    analyser.getByteTimeDomainData(dataArray);
    ctx.fillStyle = '#f0f0f0';
    ctx.fillRect(0, 0, canvas.width, canvas.height);
    ctx.lineWidth = 2;
    ctx.strokeStyle = '#007bff';
    ctx.beginPath();
    const sliceWidth = canvas.width / bufferLength;
    let x = 0;
    for (let i = 0; i < bufferLength; i++) {
      const v = dataArray[i] / 128.0;
      const y = v * canvas.height / 2;
      i === 0 ? ctx.moveTo(x, y) : ctx.lineTo(x, y);
      x += sliceWidth;
    }
    ctx.lineTo(canvas.width, canvas.height / 2);
    ctx.stroke();
  }

  draw();
  audioChunks = [];
  mediaRecorder = new MediaRecorder(audioStream);
  mediaRecorder.ondataavailable = e => audioChunks.push(e.data);
  mediaRecorder.onstop = async () => {
    cancelAnimationFrame(animationId);
    ctx.clearRect(0, 0, canvas.width, canvas.height);
    audioStream.getTracks().forEach(track => track.stop());

    const blob = new Blob(audioChunks, { type: 'audio/webm' });
    const reader = new FileReader();
    reader.onloadend = () => {
      const base64Audio = reader.result.split(',')[1];
      google.colab.kernel.invokeFunction('notebook.onAudioCaptured', [base64Audio], {});
    };
    reader.readAsDataURL(blob);
  };
  mediaRecorder.start();
}
function stopRecording() {
  if (mediaRecorder && mediaRecorder.state !== "inactive") {
    mediaRecorder.stop();
  }
}
startRecording();
</script>
"""

# Callback for JS audio capture
def on_audio_captured(b64_audio):
    raw = base64.b64decode(b64_audio)
    with open("recording.webm", "wb") as f:
        f.write(raw)
    with output_area:
        clear_output()
        print("✅ Recording complete. You can now transcribe.")
        display(Audio("recording.webm"))
    transcribe_btn.disabled = False

colab_output.register_callback('notebook.onAudioCaptured', on_audio_captured)

# Handlers
def start_recording(btn):
    with output_area:
        clear_output()
        print("🎤 Recording... Speak into the mic.")
        display(HTML(RECORD_HTML))
    record_btn.disabled = True
    stop_btn.disabled = False
    transcribe_btn.disabled = True

def stop_recording(btn):
    display(Javascript("stopRecording();"))
    record_btn.disabled = False
    stop_btn.disabled = True

def transcribe_audio(btn):
    with output_area:
        clear_output()
        print("⏳ Transcribing, please wait...")
        file_path = None

        # Upload flow
        if upload_btn.value:
            filename = next(iter(upload_btn.value))
            content = upload_btn.value[filename]['content']
            file_path = filename
            with open(file_path, "wb") as f:
                f.write(content)
        # Recorded audio
        elif os.path.exists("recording.webm"):
            file_path = "recording.webm"
        else:
            print("⚠️ No audio file found.")
            return

        try:
            with open(file_path, "rb") as audio_file:
                transcript = openai.audio.transcriptions.create(
                    model="whisper-1",
                    file=audio_file
                )
            print("📝 Transcription:\n", transcript.text)
        except Exception as e:
            print("❌ Error during transcription:", e)
        finally:
            if file_path and file_path != "recording.webm" and os.path.exists(file_path):
                os.remove(file_path)

def clear_all(btn):
    upload_btn.value.clear()
    upload_btn._counter = 0
    transcribe_btn.disabled = True
    record_btn.disabled = False
    stop_btn.disabled = True
    for f in ["recording.webm"]:
        if os.path.exists(f):
            os.remove(f)
    with output_area:
        clear_output()
        print("🧹 Cleared. Upload or record new audio.")

# Bind Events
record_btn.on_click(start_recording)
stop_btn.on_click(stop_recording)
transcribe_btn.on_click(transcribe_audio)
clear_btn.on_click(clear_all)


VBox(children=(Label(value='🔊 Audio Transcription - Upload or Record'), FileUpload(value={}, accept='.mp3,.m4a…

<IPython.core.display.Javascript object>

## Mode 5 - **Text-to-Audio**: Text-to-Speech with TTS

In [9]:
import openai
import ipywidgets as ipy
from IPython.display import display, Audio, clear_output
import os

# UI elements
text_input = ipy.Textarea(
    value="",
    placeholder="Enter text to synthesize...",
    description="Text:",
    layout=ipy.Layout(width='100%', height='100px')
)
generate_btn = ipy.Button(description="Generate", button_style="success")
download_btn = ipy.Button(description="Download", button_style="info", disabled=True)
clear_btn = ipy.Button(description="Clear", button_style="danger")
output_area = ipy.Output()

# Display widgets
display(ipy.VBox([
    ipy.Label("🗣️ Text-to-Speech"),
    text_input,
    ipy.HBox([generate_btn, clear_btn, download_btn]),
    output_area
]))

# File name for the output
output_file = "output.mp3"

# Generate button handler
def generate_tts(btn):
    with output_area:
        clear_output()
        if not text_input.value.strip():
            print("⚠️ Please enter some text.")
            return
        print("🔄 Generating, please wait...")

        try:
            response = openai.audio.speech.create(
                model="tts-1",
                voice="nova",
                input=text_input.value
            )
            with open(output_file, "wb") as f:
                f.write(response.content)
            print("✅ Audio generated.")
            display(Audio(output_file))
            download_btn.disabled = False
        except Exception as e:
            print("❌ Error during generation:", e)

# Download button handler
def download_audio(btn):
    from google.colab import files
    if os.path.exists(output_file):
        files.download(output_file)

# Clear button handler
def clear_all(btn):
    text_input.value = ""
    with output_area:
        clear_output()
        print("🧹 Cleared. Enter new text to synthesize.")
    download_btn.disabled = True
    if os.path.exists(output_file):
        os.remove(output_file)

# Bind events
generate_btn.on_click(generate_tts)
clear_btn.on_click(clear_all)
download_btn.on_click(download_audio)


VBox(children=(Label(value='🗣️ Text-to-Speech'), Textarea(value='', description='Text:', layout=Layout(height=…

## Mode 6 - **Audio-to-Audio**: Full Voice Interaction (Ask with audio, respond with voice)

In [10]:
import base64, openai, os
from IPython.display import display, HTML, Javascript, Audio, clear_output
import ipywidgets as ipy
from google.colab import output as colab_output

# Container for UI
record_button = ipy.Button(description="🎙️ Start Recording", button_style="primary")
stop_button = ipy.Button(description="⏹️ Stop", button_style="warning", disabled=True)
generate_button = ipy.Button(description="🧠 Generate Response", button_style="success", disabled=True)
clear_button = ipy.Button(description="🧹 Clear", button_style="danger")
output_area = ipy.Output()

display(ipy.VBox([ipy.HBox([record_button, stop_button, generate_button, clear_button]), output_area]))

# JavaScript + HTML recorder with waveform
RECORD_HTML = """
<div id="recorder">
  <canvas id="waveform" width="300" height="60" style="background: #f0f0f0; border-radius: 5px;"></canvas>
  <script>
    const canvas = document.getElementById('waveform');
    const ctx = canvas.getContext('2d');
    let animationId;
    let mediaRecorder, audioChunks = [], audioStream, analyser, dataArray;

    async function startRecording() {
      audioStream = await navigator.mediaDevices.getUserMedia({ audio: true });
      const audioContext = new AudioContext();
      const source = audioContext.createMediaStreamSource(audioStream);
      analyser = audioContext.createAnalyser();
      source.connect(analyser);
      analyser.fftSize = 256;
      const bufferLength = analyser.frequencyBinCount;
      dataArray = new Uint8Array(bufferLength);

      function draw() {
        animationId = requestAnimationFrame(draw);
        analyser.getByteTimeDomainData(dataArray);
        ctx.fillStyle = '#f0f0f0';
        ctx.fillRect(0, 0, canvas.width, canvas.height);
        ctx.lineWidth = 2;
        ctx.strokeStyle = '#007bff';
        ctx.beginPath();
        const sliceWidth = canvas.width * 1.0 / bufferLength;
        let x = 0;
        for(let i = 0; i < bufferLength; i++) {
          let v = dataArray[i] / 128.0;
          let y = v * canvas.height / 2;
          if(i === 0) ctx.moveTo(x, y);
          else ctx.lineTo(x, y);
          x += sliceWidth;
        }
        ctx.lineTo(canvas.width, canvas.height/2);
        ctx.stroke();
      }

      draw();
      audioChunks = [];
      mediaRecorder = new MediaRecorder(audioStream);
      mediaRecorder.ondataavailable = e => audioChunks.push(e.data);
      mediaRecorder.onstop = async () => {
        cancelAnimationFrame(animationId);
        ctx.clearRect(0, 0, canvas.width, canvas.height);
        audioStream.getTracks().forEach(track => track.stop());

        const blob = new Blob(audioChunks, { type: 'audio/webm' });
        const reader = new FileReader();
        reader.onloadend = () => {
          const base64Audio = reader.result.split(',')[1];
          google.colab.kernel.invokeFunction('notebook.onAudioCaptured', [base64Audio], {});
        };
        reader.readAsDataURL(blob);
      };
      mediaRecorder.start();
    }

    function stopRecording() {
      if (mediaRecorder && mediaRecorder.state !== "inactive") {
        mediaRecorder.stop();
      }
    }

    window.startRecording = startRecording;
    window.stopRecording = stopRecording;
  </script>
</div>
"""

# Audio capture callback
def on_audio_captured(b64_audio):
    raw = base64.b64decode(b64_audio)
    with open("user_audio.webm", "wb") as f:
        f.write(raw)
    with output_area:
        clear_output()
        print("✅ Recording complete. You can now generate a response.")
        display(Audio("user_audio.webm"))
    generate_button.disabled = False

colab_output.register_callback('notebook.onAudioCaptured', on_audio_captured)

# Button actions
def start_recording(btn):
    with output_area:
        clear_output()
        print("🎙️ Recording... Speak now!")
        display(HTML(RECORD_HTML))
        display(Javascript("startRecording();"))
    record_button.disabled = True
    stop_button.disabled = False
    generate_button.disabled = True

def stop_recording(btn):
    display(Javascript("stopRecording();"))
    record_button.disabled = False
    stop_button.disabled = True

def generate_response(btn):
    with output_area:
        clear_output()
        if not os.path.exists("user_audio.webm"):
            print("⚠️ No audio file found.")
            return
        print("⏳ Transcribing...")
        with open("user_audio.webm", "rb") as f:
            transcript = openai.audio.transcriptions.create(model="whisper-1", file=f)
        question = transcript.text
        print("🗣️ You said:", question)

        print("💬 GPT-4o responding...")
        chat = openai.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "user", "content": question}]
        )
        answer = chat.choices[0].message.content.strip()
        print("🤖 GPT-4o:", answer)

        print("🔊 Generating speech...")
        tts = openai.audio.speech.create(model="tts-1", voice="nova", input=answer)
        with open("gpt_response.mp3", "wb") as f:
            f.write(tts.content)
        display(Audio("gpt_response.mp3"))

def clear_all(btn):
    for file in ["user_audio.webm", "gpt_response.mp3"]:
        if os.path.exists(file):
            os.remove(file)
    with output_area:
        clear_output()
        print("🧹 Cleared. Ready to start a new voice interaction.")
    record_button.disabled = False
    stop_button.disabled = True
    generate_button.disabled = True

# Button bindings
record_button.on_click(start_recording)
stop_button.on_click(stop_recording)
generate_button.on_click(generate_response)
clear_button.on_click(clear_all)


VBox(children=(HBox(children=(Button(button_style='primary', description='🎙️ Start Recording', style=ButtonSty…

<IPython.core.display.Javascript object>

## Mode 7 - File to Text: Upload PDF/XLSX/WORD to Text Chat

In [11]:
!pip install streamlit python-docx PyPDF2 pandas openai fpdf


Collecting streamlit
  Downloading streamlit-1.45.1-py3-none-any.whl.metadata (8.9 kB)
Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.45.1-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m113.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [13]:
import os, openai, docx, PyPDF2, pandas as pd
import ipywidgets as ipy
from IPython.display import display, clear_output
from fpdf import FPDF

# Set your OpenAI key
openai.api_key = ""

# Widgets
upload_btn = ipy.FileUpload(accept='.pdf,.docx,.xlsx', multiple=False)
generate_btn = ipy.Button(description='💬 Generate', button_style='success', disabled=True)
clear_btn = ipy.Button(description='🧹 Clear', button_style='danger')
instruction_input = ipy.Textarea(placeholder="Type your instruction or question here...", layout=ipy.Layout(width="100%", height="100px"))
output_area = ipy.Output()

# App state
doc_text = {"content": ""}

# Display UI
display(ipy.VBox([
    ipy.Label("📄 Document Q&A - Upload and Ask"),
    upload_btn,
    instruction_input,
    ipy.HBox([generate_btn, clear_btn]),
    output_area
]))

# Utilities
def extract_text_from_upload(file_info):
    filename = next(iter(file_info))
    content = file_info[filename]['content']
    ext = os.path.splitext(filename)[1].lower()

    # Save temp file
    path = "/tmp/" + filename
    with open(path, "wb") as f:
        f.write(content)

    text = ""
    if ext == ".pdf":
        with open(path, "rb") as f:
            reader = PyPDF2.PdfReader(f)
            for page in reader.pages:
                text += page.extract_text() + "\n"
    elif ext == ".docx":
        doc = docx.Document(path)
        for para in doc.paragraphs:
            text += para.text + "\n"
    elif ext == ".xlsx":
        df = pd.read_excel(path)
        text = df.to_string()
    else:
        text = "Unsupported file format."

    return text

# Handlers
def handle_upload(change):
    if upload_btn.value:
        with output_area:
            clear_output()
            print("📤 File uploaded. Extracting text...")
        try:
            doc_text["content"] = extract_text_from_upload(upload_btn.value)
            with output_area:
                clear_output()
                print("✅ Extracted text preview:\n")
                print(doc_text["content"][:500])
            generate_btn.disabled = False
        except Exception as e:
            with output_area:
                clear_output()
                print("❌ Error reading file:", e)

def handle_generate(btn):
    if not doc_text["content"]:
        with output_area:
            clear_output()
            print("⚠️ Please upload a document first.")
        return
    if not instruction_input.value.strip():
        with output_area:
            clear_output()
            print("⚠️ Please enter an instruction or question.")
        return

    with output_area:
        clear_output()
        print("⏳ Generating response...")
    try:
        struct = [
            {"role": "system", "content": "You are a helpful assistant that works with uploaded documents."},
            {"role": "user", "content": f"Here is the document content:\n{doc_text['content'][:3000]}\n\n{instruction_input.value}"}
        ]
        response = openai.chat.completions.create(model="gpt-4o", messages=struct)
        result = response.choices[0].message.content.strip()
        with output_area:
            clear_output()
            print("🤖 AI Response:\n", result)

        # Save to PDF
        pdf = FPDF()
        pdf.add_page()
        pdf.set_font("Arial", size=12)
        for line in result.split("\n"):
            pdf.multi_cell(0, 10, line)
        pdf_path = "/tmp/response_output.pdf"
        pdf.output(pdf_path)

        with open(pdf_path, "rb") as f:
            from IPython.display import FileLink
            display(FileLink(pdf_path, result_html_prefix="📄 Download: "))

    except Exception as e:
        with output_area:
            clear_output()
            print("❌ Error generating response:", e)

def handle_clear(btn):
    upload_btn.value.clear()
    upload_btn._counter = 0
    instruction_input.value = ""
    generate_btn.disabled = True
    doc_text["content"] = ""
    with output_area:
        clear_output()
        print("🧹 Cleared. You can upload a new document.")

# Bind events
upload_btn.observe(handle_upload, names='value')
generate_btn.on_click(handle_generate)
clear_btn.on_click(handle_clear)


VBox(children=(Label(value='📄 Document Q&A - Upload and Ask'), FileUpload(value={}, accept='.pdf,.docx,.xlsx',…