In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Image to Verse Retrieval Demo\n",
    "\n",
    "This notebook demonstrates how to find relevant verses from Truyện Kiều that match input images."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('..')\n",
    "\n",
    "import os\n",
    "import torch\n",
    "import numpy as np\n",
    "from PIL import Image\n",
    "import matplotlib.pyplot as plt\n",
    "from IPython.display import display, HTML\n",
    "import requests\n",
    "from io import BytesIO\n",
    "\n",
    "from src.multimodal_retriever import ImageToVerseRetriever\n",
    "from src.preprocessor import KieuPreprocessor\n",
    "from src.image_generator import VerseToImageGenerator"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Setup Verse Retriever\n",
    "\n",
    "First, we'll initialize the verse retriever with CLIP."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check if GPU is available\n",
    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
    "print(f\"Using device: {device}\")\n",
    "\n",
    "# Create output directory\n",
    "os.makedirs(\"../models/clip_index\", exist_ok=True)\n",
    "\n",
    "try:\n",
    "    # Initialize the verse retriever\n",
    "    retriever = ImageToVerseRetriever()\n",
    "    print(\"Verse retriever initialized successfully\")\n",
    "    \n",
    "except ImportError as e:\n",
    "    print(f\"Error: {e}\")\n",
    "    print(\"Please install the required packages: pip install transformers torch\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Load and Index Verses from Truyện Kiều\n",
    "\n",
    "Next, we'll load all verses and create searchable embeddings."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the poem\n",
    "preprocessor = KieuPreprocessor()\n",
    "verses = preprocessor.load_poem('../data/truyen_kieu.txt')\n",
    "print(f\"Loaded {len(verses)} verses from Truyện Kiều\")\n",
    "\n",
    "# Check if we have a saved index\n",
    "index_path = \"../models/clip_index/kieu_verses.pt\"\n",
    "\n",
    "if os.path.exists(index_path):\n",
    "    # Load existing index\n",
    "    print(\"Loading verse index from file...\")\n",
    "    retriever.load_index(index_path)\n",
    "else:\n",
    "    # Create new index\n",
    "    print(\"Creating new verse index (this may take a while)...\")\n",
    "    retriever.index_verses(verses)\n",
    "    \n",
    "    # Save index for future use\n",
    "    print(\"Saving verse index...\")\n",
    "    retriever.save_index(index_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Test Image-to-Verse Retrieval with Sample Images\n",
    "\n",
    "Let's try retrieving verses for some sample images."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def display_image_and_verses(image, results):\n",
    "    \"\"\"Display image and matching verses\"\"\"\n",
    "    plt.figure(figsize=(12, 6))\n",
    "    \n",
    "    # Display image on the left\n",
    "    plt.subplot(1, 2, 1)\n",
    "    plt.imshow(image)\n",
    "    plt.axis('off')\n",
    "    plt.title(\"Input Image\")\n",
    "    \n",
    "    # Display matching verses on the right\n",
    "    plt.subplot(1, 2, 2)\n",
    "    plt.axis('off')\n",
    "    \n",
    "    # Create text for matching verses\n",
    "    text = \"Matching Verses:\\n\\n\"\n",
    "    for i, result in enumerate(results):\n",
    "        text += f\"{i+1}. [{result['score']:.3f}] {result['verse']}\\n\\n\"\n",
    "    \n",
    "    plt.text(0, 0.5, text, fontsize=10, verticalalignment='center')\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.show()\n",
    "\n",
    "# Sample images of Vietnamese landscapes, nature, and cultural scenes\n",
    "sample_image_urls = [\n",
    "    \"https://upload.wikimedia.org/wikipedia/commons/thumb/0/0c/Ha_Long_Bay_on_a_sunny_day.jpg/1200px-Ha_Long_Bay_on_a_sunny_day.jpg\",  # Ha Long Bay\n",
    "    \"https://upload.wikimedia.org/wikipedia/commons/thumb/b/bc/A_lotus_leaf%2C_flower_and_dragonfly.jpg/1200px-A_lotus_leaf%2C_flower_and_dragonfly.jpg\",  # Lotus flower\n",
    "    \"https://upload.wikimedia.org/wikipedia/commons/thumb/e/e8/Bright_full_moon_%2840786297932%29.jpg/1200px-Bright_full_moon_%2840786297932%29.jpg\"  # Full moon\n",
    "]\n",
    "\n",
    "# Process each sample image\n",
    "for url in sample_image_urls:\n",
    "    try:\n",
    "        # Download image\n",
    "        response = requests.get(url)\n",
    "        image = Image.open(BytesIO(response.content)).convert('RGB')\n",
    "        \n",
    "        # Find matching verses\n",
    "        results = retriever.find_matching_verses(image, top_k=5)\n",
    "        \n",
    "        # Display image and verses\n",
    "        display_image_and_verses(image, results)\n",
    "        \n",
    "        # Get explanation for top match\n",
    "        if results:\n",
    "            explanation = retriever.explain_match(image, results[0]['index'])\n",
    "            print(\"Match explanation:\")\n",
    "            print(f\"Key imagery: {', '.join(explanation['key_imagery'])}\")\n",
    "            print()\n",
    "        \n",
    "    except Exception as e:\n",
    "        print(f\"Error processing image {url}: {e}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Interactive Image-to-Verse Retrieval\n",
    "\n",
    "Let's create an interactive tool for retrieving verses from uploaded images."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from ipywidgets import widgets, interact, fixed\n",
    "import ipywidgets as widgets\n",
    "\n",
    "def retrieve_verses_from_upload(upload_widget):\n",
    "    \"\"\"Process uploaded image and find matching verses\"\"\"\n",
    "    if not upload_widget.value:\n",
    "        print(\"Please upload an image first.\")\n",
    "        return\n",
    "    \n",
    "    # Process the uploaded image\n",
    "    image_data = upload_widget.value[0]['content']\n",
    "    image = Image.open(BytesIO(image_data))\n",
    "    \n",
    "    # Find matching verses\n",
    "    results = retriever.find_matching_verses(image, top_k=5)\n",
    "    \n",
    "    # Display image and verses\n",
    "    display_image_and_verses(image, results)\n",
    "    \n",
    "    # Get explanation for top match\n",
    "    if results:\n",
    "        explanation = retriever.explain_match(image, results[0]['index'])\n",
    "        print(\"Match explanation:\")\n",
    "        print(f\"Key imagery: {', '.join(explanation['key_imagery'])}\")\n",
    "        \n",
    "        # Also find complementary verses\n",
    "        print(\"\\nComplementary verses (that add context rather than directly match):\")\n",
    "        complement_results = retriever.find_complementary_verses(image, top_k=3)\n",
    "        for i, result in enumerate(complement_results):\n",
    "            print(f\"{i+1}. {result['verse']}\")\n",
    "\n",
    "# Create upload widget\n",
    "upload = widgets.FileUpload(\n",
    "    accept='image/*',  # Accept only images\n",
    "    multiple=False,    # Only one file at a time\n",
    "    description='Upload Image:'\n",
    ")\n",
    "\n",
    "# Create button to trigger processing\n",
    "button = widgets.Button(\n",
    "    description='Find Matching Verses',\n",
    "    button_style='primary',\n",
    "    tooltip='Click to find verses that match the image'\n",
    ")\n",
    "\n",
    "# Define button click action\n",
    "def on_button_clicked(b):\n",
    "    retrieve_verses_from_upload(upload)\n",
    "\n",
    "button.on_click(on_button_clicked)\n",
    "\n",
    "# Display widgets\n",
    "display(widgets.VBox([upload, button]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Testing with Generated Images\n",
    "\n",
    "Let's test the retrieval system with images generated from verses, to see if it can match them back to the original or similar verses."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize the image generator\n",
    "try:\n",
    "    image_generator = VerseToImageGenerator()\n",
    "    print(\"Image generator initialized successfully\")\n",
    "    \n",
    "    # Select some verses with rich imagery\n",
    "    test_verses = [\n",
    "        \"Vầng trăng vằng vặc giữa trời,\",\n",
    "        \"Mây trôi bèo nổi thiếu gì là nơi!\"\n",
    "    ]\n",
    "    \n",
    "    for verse in test_verses:\n",
    "        print(f\"\\nGenerating image for: {verse}\")\n",
    "        \n",
    "        # Generate image from verse\n",
    "        image = image_generator.generate_image(verse)\n",
    "        \n",
    "        # Now retrieve verses for this generated image\n",
    "        results = retriever.find_matching_verses(image, top_k=5)\n",
    "        \n",
    "        # Display results\n",
    "        display_image_and_verses(image, results)\n",
    "        \n",
    "        # Check if original verse is in results\n",
    "        original_in_results = any(verse in result['verse'] for result in results)\n",
    "        if original_in_results:\n",
    "            print(\"✓ Original verse found in results!\")\n",
    "        else:\n",
    "            print(\"✗ Original verse not found in top results\")\n",
    "except ImportError:\n",
    "    print(\"Image generator not available. Please install required packages.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Batch Processing of Multiple Images\n",
    "\n",
    "Let's demonstrate how to process multiple images in batch mode."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def batch_process_demo():\n",
    "    # Create a temporary directory for images\n",
    "    temp_dir = \"../temp_images\"\n",
    "    os.makedirs(temp_dir, exist_ok=True)\n",
    "    \n",
    "    # Sample image URLs\n",
    "    image_urls = [\n",
    "        \"https://upload.wikimedia.org/wikipedia/commons/thumb/0/01/Han_River_Bridge_in_Da_Nang.jpg/1200px-Han_River_Bridge_in_Da_Nang.jpg\",  # Bridge\n",
    "        \"https://upload.wikimedia.org/wikipedia/commons/thumb/e/e2/Vietnamese_woman_with_conical_leaf_hat.jpg/800px-Vietnamese_woman_with_conical_leaf_hat.jpg\",  # Woman with hat\n",
    "        \"https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Thuy_Ta_restaurant_by_Hoan_Kiem_Lake_Hanoi.jpg/1200px-Thuy_Ta_restaurant_by_Hoan_Kiem_Lake_Hanoi.jpg\"  # Lake\n",
    "    ]\n",
    "    \n",
    "    # Download images\n",
    "    image_files = []\n",
    "    for i, url in enumerate(image_urls):\n",
    "        try:\n",
    "            response = requests.get(url)\n",
    "            image_path = os.path.join(temp_dir, f\"image_{i}.jpg\")\n",
    "            with open(image_path, 'wb') as f:\n",
    "                f.write(response.content)\n",
    "            image_files.append(image_path)\n",
    "        except Exception as e:\n",
    "            print(f\"Error downloading {url}: {e}\")\n",
    "    \n",
    "    # Process batch\n",
    "    results_file = \"../temp_images/results.json\"\n",
    "    retriever.batch_process_images(temp_dir, results_file)\n",
    "    \n",
    "    # Display results\n",
    "    import json\n",
    "    with open(results_file, 'r', encoding='utf-8') as f:\n",
    "        batch_results = json.load(f)\n",
    "    \n",
    "    for image_file, matches in batch_results.items():\n",
    "        print(f\"\\nImage: {image_file}\")\n",
    "        print(\"Top matching verses:\")\n",
    "        for i, match in enumerate(matches[:3], 1):  # Show top 3\n",
    "            print(f\"{i}. [{match['score']:.3f}] {match['verse']}\")\n",
    "\n",
    "# Run batch processing demo\n",
    "batch_process_demo()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Evaluation of Verse-Image Matching\n",
    "\n",
    "Let's create a simple evaluation framework to assess how well our system matches images to verses."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def evaluate_matching():\n",
    "    \"\"\"Evaluate the verse-image matching system\"\"\"\n",
    "    # List of verses with clear visual elements\n",
    "    eval_verses = [\n",
    "        \"Vầng trăng vằng vặc giữa trời,\",  # Moon\n",
    "        \"Lạnh lùng bóng nguyệt rọi thềm,\",  # Moonlight\n",
    "        \"Hoa trôi giạt mái sông xuôi,\",  # Floating flowers\n",
    "        \"Mây trôi nước chảy xuôi dòng,\",  # Clouds and water\n",
    "        \"Cỏ non xanh tận chân trời,\"  # Green grass\n",
    "    ]\n",
    "    \n",
    "    # Create empty results chart\n",
    "    results = []\n",
    "    \n",
    "    # For each verse, generate image and then retrieve verses\n",
    "    for verse in eval_verses:\n",
    "        try:\n",
    "            # Generate image\n",
    "            image = image_generator.generate_image(verse)\n",
    "            \n",
    "            # Get matching verses\n",
    "            matches = retriever.find_matching_verses(image, top_k=10)\n",
    "            \n",
    "            # Check if original verse is in results and at what position\n",
    "            position = -1\n",
    "            for i, match in enumerate(matches):\n",
    "                if verse in match['verse']:\n",
    "                    position = i + 1  # 1-based position\n",
    "                    break\n",
    "            \n",
    "            # Store result\n",
    "            results.append({\n",
    "                'verse': verse,\n",
    "                'found': position > 0,\n",
    "                'position': position if position > 0 else \"Not found\",\n",
    "                'top_match': matches[0]['verse'] if matches else \"No matches\"\n",
    "            })\n",
    "            \n",
    "        except Exception as e:\n",
    "            print(f\"Error processing {verse}: {e}\")\n",
    "    \n",
    "    # Display results as a table\n",
    "    html = \"<table border='1' style='border-collapse: collapse; width: 100%;'>\\n\"\n",
    "    html += \"<tr><th>Original Verse</th><th>Found?</th><th>Position</th><th>Top Match</th></tr>\\n\"\n",
    "    \n",
    "    for result in results:\n",
    "        color = \"green\" if result['found'] else \"red\"\n",
    "        html += f\"<tr>\\n\"\n",
    "        html += f\"<td>{result['verse']}</td>\\n\"\n",
    "        html += f\"<td style='color: {color};'>{result['found']}</td>\\n\"\n",
    "        html += f\"<td>{result['position']}</td>\\n\"\n",
    "        html += f\"<td>{result['top_match']}</td>\\n\"\n",
    "        html += f\"</tr>\\n\"\n",
    "    \n",
    "    html += \"</table>\"\n",
    "    display(HTML(html))\n",
    "    \n",
    "    # Calculate success rate\n",
    "    success_count = sum(1 for r in results if r['found'])\n",
    "    success_rate = success_count / len(results) if results else 0\n",
    "    print(f\"\\nSuccess rate: {success_count}/{len(results)} ({success_rate*100:.1f}%)\")\n",
    "\n",
    "# Run evaluation\n",
    "try:\n",
    "    evaluate_matching()\n",
    "except Exception as e:\n",
    "    print(f\"Error during evaluation: {e}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. Conclusion\n",
    "\n",
    "In this notebook, we've demonstrated how to find verses from Truyện Kiều that match input images. We've shown:\n",
    "\n",
    "1. Setting up the image-to-verse retrieval system\n",
    "2. Testing retrieval with sample images\n",
    "3. Interactive image-to-verse retrieval\n",
    "4. Testing with generated images (round-trip verification)\n",
    "5. Batch processing of multiple images\n",
    "6. Evaluation of the matching system\n",
    "\n",
    "This completes the multimodal aspect of our Truyện Kiều project, allowing us to connect visual content with textual verses. The performance of the system depends on how well the CLIP model understands Vietnamese cultural contexts, and how descriptive the verses are. The cultural context enhancements help bridge some of these gaps."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}