In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Forest Fire Detection - Exploratory Data Analysis\n",
    "\n",
    "This notebook explores the satellite imagery data and fire detection dataset.\n",
    "We'll examine:\n",
    "1. Image metadata and structure\n",
    "2. Visual inspection of fire spots\n",
    "3. Coordinate mapping validation\n",
    "4. Data quality assessment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('../src')\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from pathlib import Path\n",
    "import cv2\n",
    "from datetime import datetime\n",
    "\n",
    "# Our custom modules\n",
    "from io.readers import MetadataReader, GeoTIFFReader, ImageMetadata\n",
    "from io.geo import GeoTransformer, FirePointMapper, detect_fire_pixels_auto\n",
    "\n",
    "# Set up plotting\n",
    "plt.style.use('seaborn-v0_8')\n",
    "plt.rcParams['figure.figsize'] = (12, 8)\n",
    "\n",
    "%matplotlib inline\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Load Metadata and Explore Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load metadata\n",
    "metadata_reader = MetadataReader(\"../data/raw/WorldView Metadata - Sheet1.csv\")\n",
    "geotiff_reader = GeoTIFFReader(\"../data/raw/forest_fire_dataset\")\n",
    "\n",
    "print(f\"Total images in metadata: {len(metadata_reader.df)}\")\n",
    "print(f\"States: {metadata_reader.df['State'].unique()}\")\n",
    "print(f\"Date range: {metadata_reader.df['Date'].min()} to {metadata_reader.df['Date'].max()}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Display metadata structure\n",
    "metadata_reader.df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Ontario-specific analysis\n",
    "ontario_files = metadata_reader.get_state_files(\"Ontario\")\n",
    "print(f\"Ontario images: {len(ontario_files)}\")\n",
    "\n",
    "# Check date distribution for Ontario\n",
    "ontario_df = metadata_reader.df[metadata_reader.df['State'] == 'Ontario'].copy()\n",
    "ontario_df['Date'] = pd.to_datetime(ontario_df['Date'])\n",
    "ontario_df = ontario_df.sort_values('Date')\n",
    "\n",
    "print(\"\\nOntario dates:\")\n",
    "print(ontario_df[['File Name', 'Date']].head(10))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Load and Visualize Sample Images"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load a sample image\n",
    "sample_filename = ontario_files[0]\n",
    "print(f\"Loading sample image: {sample_filename}\")\n",
    "\n",
    "# Get metadata for this image\n",
    "img_metadata = metadata_reader.get_metadata(sample_filename)\n",
    "print(f\"Metadata: {img_metadata}\")\n",
    "print(f\"Bounds: {img_metadata.get_bounds()}\")\n",
    "\n",
    "# Load the actual image\n",
    "try:\n",
    "    image, raster_metadata = geotiff_reader.read_image(sample_filename, \"Ontario\")\n",
    "    print(f\"Image shape: {image.shape}\")\n",
    "    print(f\"Image dtype: {image.dtype}\")\n",
    "    print(f\"Image value range: {image.min()} - {image.max()}\")\n",
    "except Exception as e:\n",
    "    print(f\"Error loading image: {e}\")\n",
    "    # Try to discover actual files\n",
    "    from io.readers import discover_images\n",
    "    actual_files = discover_images(\"../data/raw/forest_fire_dataset\", \"Ontario\")\n",
    "    print(f\"Actual files found: {actual_files[:5]}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Visualize the full image\n",
    "if 'image' in locals():\n",
    "    fig, axes = plt.subplots(1, 2, figsize=(15, 7))\n",
    "    \n",
    "    # Full image\n",
    "    axes[0].imshow(image)\n",
    "    axes[0].set_title(f\"Full Image: {sample_filename}\")\n",
    "    axes[0].axis('off')\n",
    "    \n",
    "    # Histogram of pixel values\n",
    "    axes[1].hist(image.flatten(), bins=50, alpha=0.7, color='blue')\n",
    "    axes[1].set_title(\"Pixel Value Distribution\")\n",
    "    axes[1].set_xlabel(\"Pixel Value\")\n",
    "    axes[1].set_ylabel(\"Frequency\")\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.show()\n",
    "    \n",
    "    # Show RGB channel statistics\n",
    "    if len(image.shape) == 3 and image.shape[2] == 3:\n",
    "        print(\"\\nRGB Channel Statistics:\")\n",
    "        for i, channel in enumerate(['Red', 'Green', 'Blue']):\n",
    "            ch_data = image[:, :, i]\n",
    "            print(f\"{channel}: mean={ch_data.mean():.1f}, std={ch_data.std():.1f}, \"\n",
    "                  f\"min={ch_data.min()}, max={ch_data.max()}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Fire Detection and Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Set up coordinate transformer\n",
    "if 'image' in locals() and 'img_metadata' in locals():\n",
    "    bounds = img_metadata.get_bounds()\n",
    "    geo_transformer = GeoTransformer(bounds, image.shape[:2])\n",
    "    \n",
    "    print(f\"Image bounds: {bounds}\")\n",
    "    print(f\"Pixel size: {geo_transformer.pixel_size_x:.6f}° x {geo_transformer.pixel_size_y:.6f}°\")\n",
    "    print(f\"Pixel area: {geo_transformer.get_pixel_area_km2():.6f} km²\")\n",
    "    \n",
    "    # Test coordinate conversion\n",
    "    center_lat = (bounds[1] + bounds[3]) / 2\n",
    "    center_lon = (bounds[0] + bounds[2]) / 2\n",
    "    \n",
    "    center_row, center_col = geo_transformer.latlon_to_pixel(center_lat, center_lon)\n",
    "    print(f\"Image center: lat={center_lat:.4f}, lon={center_lon:.4f} -> pixel=({center_row}, {center_col})\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Automatic fire detection\n",
    "if 'image' in locals():\n",
    "    print(\"Attempting automatic fire detection...\")\n",
    "    \n",
    "    # Try different thresholds\n",
    "    fire_pixels_conservative = detect_fire_pixels_auto(image, red_threshold=220, orange_ratio=1.2)\n",
    "    fire_pixels_moderate = detect_fire_pixels_auto(image, red_threshold=200, orange_ratio=0.8)\n",
    "    fire_pixels_liberal = detect_fire_pixels_auto(image, red_threshold=180, orange_ratio=0.6)\n",
    "    \n",
    "    print(f\"Fire pixels detected:\")\n",
    "    print(f\"  Conservative: {len(fire_pixels_conservative)}\")\n",
    "    print(f\"  Moderate: {len(fire_pixels_moderate)}\")\n",
    "    print(f\"  Liberal: {len(fire_pixels_liberal)}\")\n",
    "    \n",
    "    # Create masks\n",
    "    fire_mapper = FirePointMapper(geo_transformer)\n",
    "    \n",
    "    if fire_pixels_moderate:\n",
    "        # Convert pixel coordinates to lat/lon for mask creation\n",
    "        fire_latlons = []\n",
    "        for row, col in fire_pixels_moderate[:100]:  # Limit to first 100 for performance\n",
    "            lat, lon = geo_transformer.pixel_to_latlon(row, col)\n",
    "            fire_latlons.append((lat, lon))\n",
    "        \n",
    "        fire_mask = fire_mapper.create_binary_mask(fire_latlons, radius=3)\n",
    "        print(f\"Fire mask created with {np.sum(fire_mask)} pixels\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Visualize fire detection results\n",
    "if 'image' in locals() and fire_pixels_moderate:\n",
    "    fig, axes = plt.subplots(2, 2, figsize=(15, 12))\n",
    "    \n",
    "    # Original image\n",
    "    axes[0, 0].imshow(image)\n",
    "    axes[0, 0].set_title(\"Original Image\")\n",
    "    axes[0, 0].axis('off')\n",
    "    \n",
    "    # Fire detection overlay\n",
    "    image_with_fires = image.copy()\n",
    "    for row, col in fire_pixels_moderate[:500]:  # Show first 500 detections\n",
    "        if 0 <= row < image.shape[0] and 0 <= col < image.shape[1]:\n",
    "            # Draw small red circle\n",
    "            cv2.circle(image_with_fires, (col, row), 2, (255, 0, 0), -1)\n",
    "    \n",
    "    axes[0, 1].imshow(image_with_fires)\n",
    "    axes[0, 1].set_title(f\"Detected Fire Pixels ({len(fire_pixels_moderate)})\")\n",
    "    axes[0, 1].axis('off')\n",
    "    \n",
    "    # Fire mask\n",
    "    if 'fire_mask' in locals():\n",
    "        axes[1, 0].imshow(fire_mask, cmap='Reds')\n",
    "        axes[1, 0].set_title(\"Fire Mask\")\n",
    "        axes[1, 0].axis('off')\n",
    "        \n",
    "        # Overlay mask on image\n",
    "        overlay = image.copy()\n",
    "        overlay[fire_mask > 0] = [255, 0, 0]  # Red overlay\n",
    "        axes[1, 1].imshow(overlay)\n",
    "        axes[1, 1].set_title(\"Fire Mask Overlay\")\n",
    "        axes[1, 1].axis('off')\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Analyze Fire Hotspots and Patterns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Analyze fire hotspot distribution\n",
    "if fire_pixels_moderate:\n",
    "    # Convert to numpy arrays for analysis\n",
    "    fire_rows = np.array([pixel[0] for pixel in fire_pixels_moderate])\n",
    "    fire_cols = np.array([pixel[1] for pixel in fire_pixels_moderate])\n",
    "    \n",
    "    # Plot fire distribution\n",
    "    fig, axes = plt.subplots(1, 3, figsize=(18, 5))\n",
    "    \n",
    "    # Spatial distribution\n",
    "    axes[0].scatter(fire_cols, fire_rows, alpha=0.6, s=1)\n",
    "    axes[0].set_xlim(0, image.shape[1])\n",
    "    axes[0].set_ylim(image.shape[0], 0)  # Invert y-axis\n",
    "    axes[0].set_title(\"Fire Pixel Distribution\")\n",
    "    axes[0].set_xlabel(\"Column (pixels)\")\n",
    "    axes[0].set_ylabel(\"Row (pixels)\")\n",
    "    \n",
    "    # Row distribution\n",
    "    axes[1].hist(fire_rows, bins=50, orientation='horizontal', alpha=0.7)\n",
    "    axes[1].set_title(\"Row Distribution\")\n",
    "    axes[1].set_ylabel(\"Row (pixels)\")\n",
    "    axes[1].set_xlabel(\"Count\")\n",
    "    axes[1].invert_yaxis()\n",
    "    \n",
    "    # Column distribution\n",
    "    axes[2].hist(fire_cols, bins=50, alpha=0.7)\n",
    "    axes[2].set_title(\"Column Distribution\")\n",
    "    axes[2].set_xlabel(\"Column (pixels)\")\n",
    "    axes[2].set_ylabel(\"Count\")\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.show()\n",
    "    \n",
    "    # Print statistics\n",
    "    print(f\"\\nFire hotspot statistics:\")\n",
    "    print(f\"Total detected pixels: {len(fire_pixels_moderate)}\")\n",
    "    print(f\"Row range: {fire_rows.min()} - {fire_rows.max()}\")\n",
    "    print(f\"Column range: {fire_cols.min()} - {fire_cols.max()}\")\n",
    "    print(f\"Centroid: ({fire_rows.mean():.1f}, {fire_cols.mean():.1f})\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Multi-day Analysis (if multiple images available)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Analyze multiple days if available\n",
    "print(f\"Available Ontario files: {len(ontario_files)}\")\n",
    "\n",
    "if len(ontario_files) > 1:\n",
    "    # Try to load 2-3 more images for comparison\n",
    "    sample_files = ontario_files[:min(4, len(ontario_files))]\n",
    "    \n",
    "    fire_counts_by_date = []\n",
    "    \n",
    "    for filename in sample_files:\n",
    "        try:\n",
    "            print(f\"\\nProcessing {filename}...\")\n",
    "            img_meta = metadata_reader.get_metadata(filename)\n",
    "            img, _ = geotiff_reader.read_image(filename, \"Ontario\")\n",
    "            \n",
    "            fire_pixels = detect_fire_pixels_auto(img, red_threshold=200, orange_ratio=0.8)\n",
    "            \n",
    "            fire_counts_by_date.append({\n",
    "                'filename': filename,\n",
    "                'date': img_meta.date,\n",
    "                'fire_pixels': len(fire_pixels)\n",
    "            })\n",
    "            \n",
    "            print(f\"  Date: {img_meta.date.date()}\")\n",
    "            print(f\"  Fire pixels: {len(fire_pixels)}\")\n",
    "            \n",
    "        except Exception as e:\n",
    "            print(f\"  Error: {e}\")\n",
    "            continue\n",
    "    \n",
    "    # Plot temporal trend\n",
    "    if len(fire_counts_by_date) > 1:\n",
    "        df_fires = pd.DataFrame(fire_counts_by_date)\n",
    "        df_fires = df_fires.sort_values('date')\n",
    "        \n",
    "        plt.figure(figsize=(12, 6))\n",
    "        plt.plot(df_fires['date'], df_fires['fire_pixels'], 'o-', linewidth=2, markersize=8)\n",
    "        plt.title(\"Fire Activity Over Time\")\n",
    "        plt.xlabel(\"Date\")\n",
    "        plt.ylabel(\"Number of Fire Pixels\")\n",
    "        plt.xticks(rotation=45)\n",
    "        plt.grid(True, alpha=0.3)\n",
    "        plt.tight_layout()\n",
    "        plt.show()\n",
    "        \n",
    "        print(\"\\nTemporal analysis:\")\n",
    "        print(df_fires.to_string(index=False))\nelse:\n",
    "    print(\"Only one image available for temporal analysis\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Data Quality Assessment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Check for missing or corrupted files\n",
    "print(\"Data Quality Assessment:\")\n",
    "print(\"=\" * 30)\n",
    "\n",
    "valid_files = []\n",
    "invalid_files = []\n",
    "\n",
    "for filename in ontario_files[:10]:  # Check first 10 files\n",
    "    try:\n",
    "        info = geotiff_reader.get_image_info(filename, \"Ontario\")\n",
    "        valid_files.append({\n",
    "            'filename': filename,\n",
    "            'width': info['width'],\n",
    "            'height': info['height'],\n",
    "            'bands': info['bands'],\n",
    "            'dtype': str(info['dtype'])\n",
    "        })\n",
    "    except Exception as e:\n",
    "        invalid_files.append({'filename': filename, 'error': str(e)})\n",
    "\n",
    "print(f\"Valid files: {len(valid_files)}\")\n",
    "print(f\"Invalid files: {len(invalid_files)}\")\n",
    "\n",
    "if valid_files:\n",
    "    df_valid = pd.DataFrame(valid_files)\n",
    "    print(\"\\nValid file statistics:\")\n",
    "    print(df_valid.describe())\n",
    "    \n",
    "if invalid_files:\n",
    "    print(\"\\nInvalid files:\")\n",
    "    for invalid in invalid_files:\n",
    "        print(f\"  {invalid['filename']}: {invalid['error']}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Summary and Recommendations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"EXPLORATORY DATA ANALYSIS SUMMARY\")\n",
    "print(\"=\" * 40)\n",
    "print(f\"📊 Dataset Overview:\")\n",
    "print(f\"   • Total images: {len(metadata_reader.df)}\")\n",
    "print(f\"   • Ontario images: {len(ontario_files)}\")\n",
    "print(f\"   • Date range: {metadata_reader.df['Date'].min().date()} to {metadata_reader.df['Date'].max().date()}\")\n",
    "\n",
    "if 'image' in locals():\n",
    "    print(f\"\\n🖼️  Image Properties:\")\n",
    "    print(f\"   • Dimensions: {image.shape}\")\n",
    "    print(f\"   • Data type: {image.dtype}\")\n",
    "    print(f\"   • Value range: {image.min()} - {image.max()}\")\n",
    "\n",
    "if 'geo_transformer' in locals():\n",
    "    print(f\"\\n🌍 Geospatial Properties:\")\n",
    "    print(f\"   • Pixel area: {geo_transformer.get_pixel_area_km2():.6f} km²\")\n",
    "    print(f\"   • Total image area: {geo_transformer.get_pixel_area_km2() * image.shape[0] * image.shape[1]:.2f} km²\")\n",
    "\n",
    "if 'fire_pixels_moderate' in locals():\n",
    "    print(f\"\\n🔥 Fire Detection:\")\n",
    "    print(f\"   • Detected fire pixels: {len(fire_pixels_moderate)}\")\n",
    "    print(f\"   • Fire area (estimated): {len(fire_pixels_moderate) * geo_transformer.get_pixel_area_km2():.2f} km²\")\n",
    "\n",
    "print(f\"\\n✅ Next Steps:\")\n",
    "print(f\"   1. Implement manual fire annotation tool\")\n",
    "print(f\"   2. Create tiling pipeline for training\")\n",
    "print(f\"   3. Set up data augmentation\")\n",
    "print(f\"   4. Build PyTorch dataset class\")\n",
    "print(f\"   5. Start U-Net model development\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}