In [1]:
pip install --upgrade pip

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install requests numpy pandas scikit-learn geopy tqdm joblib nominatim

Note: you may need to restart the kernel to use updated packages.


In [3]:
import requests
import numpy as np
import pandas as pd
import os
import time
import csv
import io
import json
from datetime import datetime, timedelta
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import geopy.distance
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
from tqdm import tqdm
import joblib
import warnings
import traceback 
warnings.filterwarnings('ignore')



class FireHotspotAnalyzer:
    def __init__(self):
        """Initializes the analyzer, loads/trains the model."""
        self.model = None
        self.scaler = None
        self.nasa_base_url_template = "https://firms.modaps.eosdis.nasa.gov/api/area/csv/{api_key}/{source}/{area}/{day_range}/{date}"
        self.nasa_api_key = "6504a8db9e133f9b5b02786e7738c49f" 
        self.ca_bounds = { 'min_lat': 32.5343, 'max_lat': 42.0095, 'min_lon': -124.4096, 'max_lon': -114.1308 }
        self.land_cover_categories = { 'urban': [21, 22, 23, 24], 'forest': [41, 42, 43], 'shrubland': [51, 52], 'grassland': [71, 72, 73, 74], 'cropland': [81, 82], 'wetland': [90, 95], 'barren': [11, 12, 31] }
        self.land_cover_category_to_int = { 'urban': 1, 'cropland': 2, 'grassland': 3, 'forest': 4, 'shrubland': 5, 'wetland': 6, 'barren': 7, 'other': 8 }
        self.features = ['bright_ti4', 'frp', 'confidence', 'hour', 'month', 'is_fire_season', 'land_cover', 'is_day']
        self.model_ready = False
        self.load_or_train_model()

    def zipcode_to_coordinates(self, zipcode):
        """Convert zipcode to lat/long coordinates using geopy (Nominatim)."""
        print(f"Attempting to geocode zipcode: {zipcode} using Nominatim (OSM)")
        geolocator = Nominatim(user_agent="ca_fire_analyzer_app/1.0")
        location = None
        try:
            query = f"{zipcode}, CA, USA"; location = geolocator.geocode(query, exactly_one=True, timeout=10)
            if location and location.latitude is not None and location.longitude is not None:
                lat, lon = location.latitude, location.longitude; print(f"Geocoding successful: {lat:.4f}, {lon:.4f}")
                if (self.ca_bounds['min_lat'] <= lat <= self.ca_bounds['max_lat'] and self.ca_bounds['min_lon'] <= lon <= self.ca_bounds['max_lon']): return lat, lon
                else: print(f"Warning: Geocoded Zipcode {zipcode} ({lat:.4f}, {lon:.4f}) outside CA."); print("Using Sacramento default."); return 38.5816, -121.4944
            else: print(f"Could not find coordinates for zipcode {zipcode} using Nominatim."); return 38.5816, -121.4944
        except GeocoderTimedOut: print("Error: Nominatim timed out."); print("Using Sacramento default."); return 38.5816, -121.4944
        except GeocoderServiceError as e: print(f"Error: Nominatim service error: {e}"); print("Using Sacramento default."); return 38.5816, -121.4944
        except Exception as e: print(f"Unexpected geocoding error: {e}"); print("Using Sacramento default."); return 38.5816, -121.4944

    def get_live_fire_data(self, lat, lon, radius_miles=20):
        """Get live (last 24h) fire data from NASA FIRMS API."""
        if self.nasa_api_key == "YOUR_NASA_FIRMS_API_KEY": print("\nERROR: NASA FIRMS API Key needs to be set.\n"); return pd.DataFrame()
        try:
            radius_deg = radius_miles / 69.0; end_date_utc = datetime.utcnow(); start_date_utc = end_date_utc - timedelta(days=1)
            min_lat = max(lat - radius_deg, self.ca_bounds['min_lat']); max_lat = min(lat + radius_deg, self.ca_bounds['max_lat']); min_lon = max(lon - radius_deg, self.ca_bounds['min_lon']); max_lon = min(lon + radius_deg, self.ca_bounds['max_lon'])
            area_coords = f"{min_lon:.4f},{min_lat:.4f},{max_lon:.4f},{max_lat:.4f}"; source = 'VIIRS_SNPP_NRT'; day_range = 1; query_date = start_date_utc.strftime('%Y-%m-%d')
            api_endpoint = self.nasa_base_url_template.format(api_key=self.nasa_api_key, source=source, area=area_coords, day_range=day_range, date=query_date)
            print(f"Requesting fire data using URL: {api_endpoint}"); response = requests.get(api_endpoint, timeout=45)
            if response.status_code == 200:
                csv_data = response.text
                if not csv_data or csv_data.strip() == "" or csv_data.startswith(("No hotspots detected", "No FIRMS data available")) or "<!DOCTYPE html>" in csv_data.lower() or "error" in csv_data.lower()[:100] or "invalid map key" in csv_data.lower() or "invalid coordinates" in csv_data.lower() or "Invalid date" in csv_data: print("No fire hotspots found or API returned non-CSV data."); return pd.DataFrame()
                try: df = pd.read_csv(io.StringIO(csv_data))
                except Exception as e: print(f"Error parsing/reading FIRMS CSV: {e}"); print("--- Response ---\n", csv_data, "\n--- End Response ---"); return pd.DataFrame()

                print(f"Received {len(df)} hotspots from FIRMS API for the bounding box.")

                column_mapping = { 'latitude': 'lat', 'longitude': 'lon', 'bright_ti4': 'bright_ti4', 'brightness': 'bright_ti4', 'bright_ti5': 'bright_ti5', 'acq_date': 'acq_date', 'acq_time': 'acq_time', 'satellite': 'satellite', 'instrument': 'instrument', 'confidence': 'confidence', 'version': 'version', 'bright_t31': 'bright_t31', 'frp': 'frp', 'daynight': 'daynight' }
                df.columns = df.columns.str.lower().str.strip(); df = df.rename(columns=column_mapping, errors='ignore')
                required_columns = ['lat', 'lon'];
                if not all(col in df.columns for col in required_columns): print(f"Error: Missing essential columns. Found: {df.columns.tolist()}"); return pd.DataFrame()
                # Ensure a brightness column exists, prioritizing ti4 -> ti5 -> t31
                if 'bright_ti4' not in df.columns or df['bright_ti4'].isnull().all():
                    if 'bright_ti5' in df.columns and not df['bright_ti5'].isnull().all():
                        df['bright_ti4'] = df['bright_ti5']
                        print("Using bright_ti5 for brightness.")
                    elif 'bright_t31' in df.columns and not df['bright_t31'].isnull().all():
                        df['bright_ti4'] = df['bright_t31']
                        print("Using bright_t31 for brightness.")
                    else:
                        print("Error: No usable brightness column (ti4, ti5, t31). Cannot process live data."); return pd.DataFrame()

                numeric_cols = ['lat', 'lon', 'bright_ti4', 'frp', 'confidence']
                for col in numeric_cols:
                    if col in df.columns:
                        if col == 'confidence':
                            conf_map = {'l': 30, 'n': 70, 'h': 90, 'low': 30, 'nominal': 70, 'high': 90}
                            df['confidence'] = df['confidence'].apply(lambda x: conf_map.get(str(x).lower(), x))
                            df[col] = pd.to_numeric(df[col], errors='coerce').fillna(50)
                        else:
                            df[col] = pd.to_numeric(df[col], errors='coerce')
                    if col == 'frp': df['frp'].fillna(0.0, inplace=True)
                    elif col == 'confidence': df['confidence'].fillna(50.0, inplace=True)
                    elif col == 'bright_ti4': df['bright_ti4'].fillna(300.0, inplace=True)

                df.dropna(subset=['lat', 'lon', 'bright_ti4'], inplace=True)
                if df.empty: print("No valid hotspots remaining after cleaning."); return pd.DataFrame()

                user_coords = (lat, lon)
                df['distance_miles'] = df.apply(lambda row: geopy.distance.distance(user_coords, (row['lat'], row['lon'])).miles if pd.notna(row['lat']) and pd.notna(row['lon']) else np.nan, axis=1)
                df_filtered = df[df['distance_miles'] <= radius_miles].copy()
                print(f"Filtered down to {len(df_filtered)} hotspots within the specified radius of {radius_miles:.1f} miles.")
                return df_filtered.reset_index(drop=True)
            elif response.status_code in [401, 403]: print(f"Error: NASA FIRMS API returned {response.status_code}. Check API Key."); print(f"Response: {response.text[:500]}")
            elif response.status_code == 400: print(f"Error: NASA FIRMS API returned {response.status_code} (Bad Request). Check params."); print(f"Response: {response.text[:500]}")
            elif response.status_code == 404: print(f"Error: NASA FIRMS API returned {response.status_code} (Not Found). Check URL."); print(f"Response: {response.text[:500]}")
            elif response.status_code == 429: print(f"Error: NASA FIRMS API returned {response.status_code} (Too Many Requests).")
            else: print(f"Error: NASA FIRMS API status {response.status_code}"); print(f"Response: {response.text[:500]}")
            return pd.DataFrame()
        except requests.exceptions.Timeout: print("Error: Request to NASA FIRMS API timed out."); return pd.DataFrame()
        except requests.exceptions.RequestException as e: print(f"Error fetching fire data (RequestException): {e}"); return pd.DataFrame()
        except Exception as e: print(f"Unexpected error in get_live_fire_data: {e}"); traceback.print_exc(); return pd.DataFrame()

    # ************************************************************************
    # *** MODIFIED FUNCTION: Uses hardcoded rules instead of API           ***
    # ************************************************************************
    def get_land_cover_data(self, lat, lon):
        if not (isinstance(lat, (int, float)) and isinstance(lon, (int, float))):
            return 'other'

        ca_min_lat, ca_max_lat = 32.4, 42.1
        ca_min_lon, ca_max_lon = -124.5, -114.0
        if not (ca_min_lat <= lat <= ca_max_lat and ca_min_lon <= lon <= ca_max_lon):
             if (33.3 <= lat <= 33.5) and (-118.6 <= lon <= -118.3): return 'shrubland'
             if (32.8 <= lat <= 33.1) and (-118.6 <= lon <= -118.3): return 'barren'
             return 'other'

        if (32.7 <= lat <= 33.1) and (-115.2 <= lon <= -114.7): return 'barren'
        if (36.1 <= lat <= 36.6) and (-117.1 <= lon <= -116.7): return 'barren'
        if (35.6 <= lat <= 35.75) and (-116.3 <= lon <= -116.1): return 'barren'
        if (36.05 <= lat <= 36.2) and (-117.4 <= lon <= -117.25): return 'barren'
        if (36.2 <= lat <= 36.5) and (-118.1 <= lon <= -117.8): return 'barren'
        if (34.8 <= lat <= 35.05) and (-117.95 <= lon <= -117.7): return 'barren'
        if (34.58 <= lat <= 34.68) and (-117.63 <= lon <= -117.55): return 'barren'

        if (37.8 <= lat <= 38.3) and (-121.9 <= lon <= -121.3): return 'wetland'
        if (38.05 <= lat <= 38.25) and (-122.15 <= lon <= -119.9): return 'wetland'
        if (37.4 <= lat <= 37.6) and (-122.2 <= lon <= -121.95): return 'wetland'
        if (38.0 <= lat <= 38.15) and (-122.5 <= lon <= -122.2): return 'wetland'
        if (40.7 <= lat <= 40.9) and (-124.25 <= lon <= -124.05): return 'wetland'
        if (35.3 <= lat <= 35.4) and (-120.9 <= lon <= -120.8): return 'wetland'
        if (36.78 <= lat <= 36.85) and (-121.8 <= lon <= -121.7): return 'wetland'
        if (41.7 <= lat <= 42.0) and (-121.9 <= lon <= -121.3): return 'wetland' 

        if (33.5 <= lat <= 34.4) and (-119.3 <= lon <= -117.0): return 'urban'
        if (37.0 <= lat <= 38.3) and (-122.7 <= lon <= -121.4): return 'urban'
        if (32.5 <= lat <= 33.3) and (-117.5 <= lon <= -116.8): return 'urban'
        if (38.3 <= lat <= 38.9) and (-121.7 <= lon <= -121.0): return 'urban'
        if (33.8 <= lat <= 34.3) and (-117.6 <= lon <= -116.8): return 'urban'
        if (36.6 <= lat <= 37.0) and (-120.0 <= lon <= -119.5): return 'urban'
        if (35.1 <= lat <= 35.6) and (-119.25 <= lon <= -118.7): return 'urban'
        if (37.8 <= lat <= 38.2) and (-121.5 <= lon <= -121.0): return 'urban'
        if (37.4 <= lat <= 37.8) and (-121.2 <= lon <= -120.7): return 'urban'
        if (34.35 <= lat <= 34.55) and (-120.0 <= lon <= -119.4): return 'urban'
        if (34.05 <= lat <= 34.35) and (-119.4 <= lon <= -118.9): return 'urban'
        if (38.2 <= lat <= 38.6) and (-122.9 <= lon <= -122.5): return 'urban'
        if (38.05 <= lat <= 38.4) and (-122.3 <= lon <= -121.8): return 'urban'
        if (37.9 <= lat <= 38.1) and (-122.0 <= lon <= -121.6): return 'urban'
        if (40.4 <= lat <= 40.7) and (-122.5 <= lon <= -122.1): return 'urban'
        if (39.6 <= lat <= 39.85) and (-121.95 <= lon <= -121.65): return 'urban'
        if (34.5 <= lat <= 34.85) and (-118.35 <= lon <= -117.75): return 'urban'
        if (34.4 <= lat <= 34.7) and (-117.5 <= lon <= -117.1): return 'urban'
        if (33.4 <= lat <= 33.7) and (-117.3 <= lon <= -117.0): return 'urban'
        if (36.9 <= lat <= 37.05) and (-122.1 <= lon <= -121.85): return 'urban'
        if (36.5 <= lat <= 36.75) and (-121.9 <= lon <= -121.5): return 'urban'
        if (37.2 <= lat <= 37.4) and (-120.6 <= lon <= -120.3): return 'urban'
        if (36.1 <= lat <= 36.4) and (-119.5 <= lon <= -119.1): return 'urban'
        if (39.05 <= lat <= 39.25) and (-121.7 <= lon <= -121.5): return 'urban'
        if (40.7 <= lat <= 40.9) and (-124.2 <= lon <= -123.95): return 'urban'
        if (32.6 <= lat <= 32.9) and (-115.7 <= lon <= -115.3): return 'urban'
        if (38.85 <= lat <= 38.98) and (-120.05 <= lon <= -119.88): return 'urban'

        if (35.7 <= lat <= 37.2) and (-118.0 <= lon <= -116.5): return 'barren'
        if (34.5 <= lat <= 35.8) and (lon >= -116.5): return 'barren'
        if (32.8 <= lat <= 33.5) and (-116.6 <= lon <= -115.9): return 'barren'
        if (33.6 <= lat <= 34.2) and (-116.5 <= lon <= -115.5): return 'barren'
        if (36.3 <= lat <= 38.5) and (-119.2 <= lon <= -118.0):
             return 'barren'
        if (36.8 <= lat <= 38.0) and (-118.3 <= lon <= -117.8): return 'barren'
        if (32.6 <= lat <= 33.4) and (-115.9 <= lon <= -115.0): return 'cropland' 
        if (33.5 <= lat <= 33.8) and (-116.3 <= lon <= -115.8): return 'cropland'
        if (33.4 <= lat <= 33.8) and (-114.8 <= lon <= -114.4): return 'cropland'
        if (36.0 <= lat <= 36.9) and (-121.8 <= lon <= -120.8): return 'cropland'
        if (34.6 <= lat <= 35.1) and (-120.7 <= lon <= -120.2): return 'cropland'
        if (35.0 <= lat <= 37.0) and (-120.8 <= lon <= -118.8): return 'cropland'
        if (37.0 <= lat <= 38.0) and (-121.5 <= lon <= -119.4): return 'cropland'
        if (38.0 <= lat <= 40.0) and (-122.4 <= lon <= -121.3): return 'cropland'
        if (41.4 <= lat <= 41.7) and (-123.0 <= lon <= -122.6): return 'cropland'
        if (41.1 <= lat <= 41.7) and (-120.4 <= lon <= -120.0): return 'cropland'
        if (41.8 <= lat <= 42.0) and (-121.6 <= lon <= -121.2): return 'cropland'

        if (35.0 <= lat <= 35.4) and (-119.9 <= lon <= -119.2): return 'grassland'
        if (34.8 <= lat <= 35.2) and (-119.0 <= lon <= -118.4): return 'grassland'
        if (35.5 <= lat <= 37.5) and (-121.0 <= lon <= -120.0): return 'grassland' 
        if (36.0 <= lat <= 39.5) and (-121.2 <= lon <= -120.0): return 'grassland'
        if (32.8 <= lat <= 33.4) and (-117.0 <= lon <= -116.5): return 'grassland'

        if (lat >= 37.0) and (lon <= -123.4) and (lon >= -124.4): return 'forest'
        if (39.0 <= lat <= 42.0) and (-123.8 <= lon <= -121.2): return 'forest'
        if (lat >= 41.0) and (lon >= -121.8): return 'forest'
        if (40.0 <= lat <= 41.5) and (-121.8 <= lon <= -120.8): return 'forest'
        if (35.5 <= lat <= 40.0) and (-121.0 <= lon <= -118.5): return 'forest'
        if (38.8 <= lat <= 39.4) and (-120.3 <= lon <= -119.7): return 'forest'
        if (33.5 <= lat <= 35.0) and (-118.8 <= lon <= -116.2): return 'forest'
        if (34.5 <= lat <= 36.8) and (-121.5 <= lon <= -119.8): return 'forest'
        if (34.8 <= lat <= 35.2) and (-119.0 <= lon <= -118.2): return 'forest'

        if (32.6 <= lat <= 34.5) and (-119.0 <= lon <= -116.5): return 'shrubland'
        if (34.5 <= lat <= 37.5) and (-121.8 <= lon <= -119.0): return 'shrubland'
        if (35.5 <= lat <= 40.0) and (-121.5 <= lon <= -119.5): return 'shrubland'
        if (lat >= 36.5) and (lon >= -120.0): 
             return 'shrubland'
        if (38.5 <= lat <= 40.0) and (-123.0 <= lon <= -122.0): return 'shrubland'

        return 'shrubland'

    def get_land_cover_category(self, code):
        """Convert NLCD land cover code to our category string (Kept for potential future use/consistency)"""
        for category, codes in self.land_cover_categories.items():
            if code in codes: return category
        return 'other'

    # --- REFINED: Build training data from LOCAL FILE ---
    def build_training_dataset_from_local_file(self):
        """
        Builds training dataset by reading a pre-downloaded historical FIRMS CSV file,
        processing it robustly, and inferring labels. Uses hardcoded land cover.
        """
        training_file = 'historical_training_data.csv'
        print(f"--- Building Training Data from: {training_file} ---")
        print("Ensure this file contains historical FIRMS data (VIIRS or MODIS).")
        print("Download Tool: https://firms.modaps.eosdis.nasa.gov/download/")

        if not os.path.exists(training_file):
            print(f"Error: Training file '{training_file}' not found.")
            return pd.DataFrame()

        col_map = { 'latitude': 'lat', 'longitude': 'lon', 'acq_date': 'acq_date', 'acq_time': 'acq_time', 'confidence': 'confidence', 'frp': 'frp', 'daynight': 'daynight' }
        brightness_cols = ['bright_ti4', 'bright_t31', 'brightness']
        cols_to_load = list(col_map.keys()) + brightness_cols

        try:
            print(f"Loading essential columns from {training_file}...")
            df_hist = pd.read_csv(training_file, usecols=lambda c: c.lower() in cols_to_load, low_memory=False)
            df_hist.columns = df_hist.columns.str.lower()
            print(f"Loaded {len(df_hist)} records.")
            df_processed = df_hist.rename(columns=col_map, errors='ignore')

            selected_brightness_col = None
            for b_col in brightness_cols:
                if b_col in df_processed.columns and not df_processed[b_col].isnull().all():
                    selected_brightness_col = b_col
                    break
            if selected_brightness_col:
                print(f"Using '{selected_brightness_col}' as primary brightness source.")
                if selected_brightness_col != 'bright_ti4':
                    df_processed['bright_ti4'] = df_processed[selected_brightness_col]
            else:
                print("Warning: No usable brightness column found. Assigning default 300K.")
                df_processed['bright_ti4'] = 300.0
            cols_to_drop = [b_col for b_col in brightness_cols if b_col != 'bright_ti4' and b_col in df_processed.columns]
            if cols_to_drop: df_processed = df_processed.drop(columns=cols_to_drop)

            essential_cols = ['lat', 'lon', 'acq_date', 'acq_time', 'bright_ti4', 'confidence', 'frp', 'daynight']
            missing_essentials = [c for c in essential_cols if c not in df_processed.columns]
            if missing_essentials:
                if 'frp' in missing_essentials: df_processed['frp'] = 0.0; print("Added missing 'frp' column with default 0.")
                if 'confidence' in missing_essentials: df_processed['confidence'] = 50.0; print("Added missing 'confidence' column with default 50.")
                if 'daynight' in missing_essentials: df_processed['daynight'] = 'D'; print("Added missing 'daynight' column with default 'D'.")
                missing_essentials = [c for c in essential_cols if c not in df_processed.columns]
                if missing_essentials: print(f"ERROR: Essential columns still missing: {missing_essentials}. Check CSV."); return pd.DataFrame()

        except FileNotFoundError: print(f"Error: Training file '{training_file}' not found."); return pd.DataFrame()
        except Exception as e: print(f"Error reading CSV structure: {e}"); traceback.print_exc(); return pd.DataFrame()

        print("Cleaning data and converting types...")
        try:
            df_processed['lat'] = pd.to_numeric(df_processed['lat'], errors='coerce')
            df_processed['lon'] = pd.to_numeric(df_processed['lon'], errors='coerce')
            df_processed['bright_ti4'] = pd.to_numeric(df_processed['bright_ti4'], errors='coerce')
            df_processed['frp'] = pd.to_numeric(df_processed['frp'], errors='coerce')
            conf_map = {'low': 30, 'nominal': 70, 'high': 90, 'l': 30, 'n': 70, 'h': 90}
            if pd.api.types.is_object_dtype(df_processed['confidence']):
                df_processed['confidence'] = df_processed['confidence'].astype(str).str.lower().map(conf_map).fillna(df_processed['confidence'])
            df_processed['confidence'] = pd.to_numeric(df_processed['confidence'], errors='coerce')

            df_processed['lat'].fillna(0, inplace=True); df_processed['lon'].fillna(0, inplace=True)
            df_processed['bright_ti4'].fillna(300.0, inplace=True); df_processed['frp'].fillna(0.0, inplace=True)
            df_processed['confidence'].fillna(50.0, inplace=True)
            df_processed.dropna(subset=['acq_date', 'acq_time'], inplace=True)
            df_processed = df_processed[(df_processed['lat'] != 0) & (df_processed['lon'] != 0)]
            if df_processed.empty: print("No valid records after cleaning."); return pd.DataFrame()
        except Exception as e: print(f"Error during cleaning/conversion: {e}"); traceback.print_exc(); return pd.DataFrame()

        print("Adding time features...")
        try:
            df_processed['acq_time'] = df_processed['acq_time'].astype(str).str.replace(r'\.0$', '', regex=True).str.zfill(4)
            df_processed['datetime'] = pd.to_datetime(df_processed['acq_date'] + ' ' + df_processed['acq_time'], format='%Y-%m-%d %H%M', errors='coerce')
            df_processed.dropna(subset=['datetime'], inplace=True)
            df_processed['hour'] = df_processed['datetime'].dt.hour; df_processed['month'] = df_processed['datetime'].dt.month
            df_processed['is_fire_season'] = df_processed['month'].between(6, 11).astype(int)
            df_processed['is_day'] = df_processed['daynight'].apply(lambda x: 1 if str(x).upper() == 'D' else 0) if 'daynight' in df_processed.columns else df_processed['hour'].between(7, 18).astype(int)
            if df_processed.empty: print("No valid records after adding time."); return pd.DataFrame()
        except Exception as e:
            print(f"Error adding time features: {e}.");
            if 'hour' not in df_processed: df_processed['hour'] = 12
            if 'month' not in df_processed: df_processed['month'] = 6
            if 'is_fire_season' not in df_processed: df_processed['is_fire_season'] = 1
            if 'is_day' not in df_processed: df_processed['is_day'] = 1
            # Ensure datetime is present even if derived features failed
            if 'datetime' not in df_processed or df_processed['datetime'].isnull().all():
                print("FATAL: Cannot proceed without valid datetime information.")
                return pd.DataFrame()
            df_processed.dropna(subset=['datetime'], inplace=True) # Drop rows where datetime conversion failed
            if df_processed.empty: print("No valid records after time error handling."); return pd.DataFrame()


        print("Adding land cover (using hardcoded rules)...")
        try:
            land_covers_int = []
            # No tqdm here as it should be faster
            for _, row in df_processed.iterrows():
                land_cover_str = self.get_land_cover_data(row['lat'], row['lon'])
                land_covers_int.append(self.land_cover_category_to_int.get(land_cover_str, 8)) 
            df_processed['land_cover'] = land_covers_int
            df_processed['land_cover'] = df_processed['land_cover'].astype(int)
        except Exception as e:
            print(f"Error adding land cover using hardcoded rules: {e}. Assigning default 'other'.")
            traceback.print_exc()
            df_processed['land_cover'] = self.land_cover_category_to_int['other']

        print("Inferring 'actual_fire' labels (adjusted rules)...")
        try:
            vegetation_mask = df_processed['land_cover'].isin([2, 3, 4, 5]) 
            high_conf_mask = df_processed['confidence'] >= 75
            nominal_conf_mask = (df_processed['confidence'] >= 50) & (df_processed['confidence'] < 75)
            moderate_energy_mask = (df_processed['frp'] >= 5) & (df_processed['bright_ti4'] >= 320)
            df_processed['actual_fire'] = 0
            df_processed.loc[ (high_conf_mask & vegetation_mask) | (nominal_conf_mask & moderate_energy_mask & vegetation_mask), 'actual_fire' ] = 1
            label_distribution = df_processed['actual_fire'].value_counts().to_dict()
            print(f"Inferred label distribution: {label_distribution}")
            if len(label_distribution) < 2: print("\nWARNING: Only one inferred class found. Check CSV content/inference rules.");
        except Exception as e: print(f"Error during label inference: {e}"); traceback.print_exc(); return pd.DataFrame()

        print("Filtering and balancing dataset...")
        try:
            low_conf_non_fire_mask = (df_processed['actual_fire'] == 0) & (df_processed['confidence'] < 30)
            df_filtered = df_processed[~low_conf_non_fire_mask].copy()
            print(f"Filtered to {len(df_filtered)} points.")
            label_counts = df_filtered['actual_fire'].value_counts()
            if not label_counts.empty and len(label_counts) > 1:
                min_count = label_counts.min(); min_sample_size = max(100, min_count)
                print(f"Attempting to balance classes to approx {min_sample_size} samples each.")
                n_fire = label_counts.get(1, 0)
                n_non_fire = label_counts.get(0, 0)
                sample_fire = min(min_sample_size, n_fire) if n_fire > 0 else 0
                sample_non_fire = min(min_sample_size, n_non_fire) if n_non_fire > 0 else 0

                df_balanced_list = []
                if sample_fire > 0:
                    df_balanced_list.append(df_filtered[df_filtered['actual_fire'] == 1].sample(n=sample_fire, random_state=42))
                if sample_non_fire > 0:
                    df_balanced_list.append(df_filtered[df_filtered['actual_fire'] == 0].sample(n=sample_non_fire, random_state=42))

                if df_balanced_list:
                    df_balanced = pd.concat(df_balanced_list)
                    print(f"Balanced dataset to {len(df_balanced)} points.")
                    final_df = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True) 
                else:
                     print("Error: Could not create balanced dataset (insufficient data after filtering).")
                     final_df = pd.DataFrame()

            elif not label_counts.empty: print("Only one class remains. Using filtered data (training might fail)."); final_df = df_filtered
            else: print("Error: No data remaining after filtering."); final_df = pd.DataFrame()
        except Exception as e: print(f"Error during filtering/balancing: {e}"); traceback.print_exc(); return pd.DataFrame()

        if not final_df.empty:
            cols_for_model = self.features + ['actual_fire']
            missing_final_cols = [c for c in cols_for_model if c not in final_df.columns]
            if missing_final_cols: print(f"ERROR: Final dataset missing model columns: {missing_final_cols}"); return pd.DataFrame()
            final_df = final_df[cols_for_model].reset_index(drop=True)
            if final_df.isnull().any().any():
                print("Warning: NaNs found in final data. Filling with median/mode.")
                for col in final_df.columns:
                    if final_df[col].isnull().any():
                        try:
                            fill_value = final_df[col].median() if pd.api.types.is_numeric_dtype(final_df[col]) else final_df[col].mode()[0]
                            final_df[col].fillna(fill_value, inplace=True)
                        except IndexError: 
                             print(f"Could not determine mode for column {col}. Filling with 0 or 'Unknown'.")
                             fill_value = 0 if pd.api.types.is_numeric_dtype(final_df[col]) else 'Unknown'
                             final_df[col].fillna(fill_value, inplace=True)

            print(f"Prepared final training dataset with {len(final_df)} records.")
            print(f"Final columns: {final_df.columns.tolist()}")
            print(f"Final label distribution:\n{final_df['actual_fire'].value_counts()}")
        else: print("Error: Final training dataset is empty.")
        print("--- Finished Building Training Data ---")
        return final_df

    def load_or_train_model(self):
        """Load existing model/scaler or train new one using LOCAL historical data file."""
        model_path = 'wildfire_classifier.pkl'; scaler_path = 'feature_scaler.pkl'
        if os.path.exists(model_path) and os.path.exists(scaler_path):
            try:
                print("Loading existing model and scaler..."); self.model = joblib.load(model_path); self.scaler = joblib.load(scaler_path)
                if hasattr(self.model, 'n_features_in_') and self.model.n_features_in_ != len(self.features):
                   raise ValueError(f"Model feature mismatch. Expected {self.model.n_features_in_}, need {len(self.features)} based on current config.")
                if hasattr(self.scaler, 'n_features_in_') and self.scaler.n_features_in_ != len(self.features):
                    raise ValueError(f"Scaler feature mismatch. Expected {self.scaler.n_features_in_}, need {len(self.features)} based on current config.")

                if self.model is None or self.scaler is None: raise ValueError("Loaded model or scaler is None.")
                print("Model and scaler loaded successfully!"); self.model_ready = True; return
            except Exception as e:
                print(f"Error loading model/scaler or feature mismatch: {e}. Forcing retrain.")
                self.model = None; self.scaler = None; self.model_ready = False;
                try:
                    if os.path.exists(model_path): os.remove(model_path)
                    if os.path.exists(scaler_path): os.remove(scaler_path)
                    print("Removed potentially incompatible model/scaler files.")
                except OSError as oe: print(f"Warning: Could not remove old model/scaler file: {oe}")

        print("\n--- Training New Model ---")
        try:
            df_train = self.build_training_dataset_from_local_file()
            if df_train is None or df_train.empty: raise SystemExit("Model training failed: No training data generated or returned.")
            if len(df_train) < 50: raise SystemExit(f"Model training failed: Insufficient training data ({len(df_train)} records). Needs at least 50.")
            missing_cols = [col for col in self.features if col not in df_train.columns]
            if missing_cols: raise SystemExit(f"Model training failed: Missing required feature columns in final training data: {missing_cols}")

            X = df_train[self.features]; y = df_train['actual_fire']

            if y.nunique() < 2:
                print(f"\nERROR: Training data contains only one class: {y.unique().tolist()}. Cannot train classifier.")
                raise SystemExit("Model training failed: Training data only contains one class label.")

            print(f"Using {len(df_train)} records for training. Class distribution:\n{y.value_counts(normalize=True)}")
            if y.value_counts().min() < 2: 
                 print("Warning: Very few samples of one class, using standard train/test split instead of stratified.")
                 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
            else:
                 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

            self.scaler = StandardScaler(); X_train_scaled = self.scaler.fit_transform(X_train); X_test_scaled = self.scaler.transform(X_test)
            self.model = RandomForestClassifier(n_estimators=150, max_depth=12, min_samples_split=5, min_samples_leaf=3, random_state=42, class_weight='balanced', n_jobs=-1)
            self.model.fit(X_train_scaled, y_train)
            train_score = self.model.score(X_train_scaled, y_train); test_score = self.model.score(X_test_scaled, y_test)
            print(f"\nModel training complete! Train Acc: {train_score:.4f}, Test Acc: {test_score:.4f}")

            try:
                from sklearn.metrics import classification_report
                y_pred_test = self.model.predict(X_test_scaled)
                print("\nClassification Report (Test Set):\n", classification_report(y_test, y_pred_test, target_names=['Non-Wildfire', 'Wildfire'], zero_division=0))
            except ImportError: pass
            except Exception as cr_e: print(f"Could not generate classification report: {cr_e}")

            feature_importance = sorted(zip(self.features, self.model.feature_importances_), key=lambda x: x[1], reverse=True)
            print("\nFeature Importance:"); [print(f"- {f}: {imp:.4f}") for f, imp in feature_importance]
            joblib.dump(self.model, model_path); joblib.dump(self.scaler, scaler_path); print(f"\nModel saved to {model_path}, Scaler saved to {scaler_path}"); self.model_ready = True

        except SystemExit as se: print(f"\nExiting due to training failure: {se}"); self.model_ready = False
        except Exception as e: print(f"\nAn unexpected error occurred during model training: {e}"); traceback.print_exc(); self.model = None; self.scaler = None; self.model_ready = False

    def enrich_fire_data(self, fire_df):
        if fire_df.empty: return fire_df
        if not self.model_ready: print("Error: Model not ready."); return pd.DataFrame()

        enriched_df = fire_df.copy()
        now_utc = datetime.utcnow() 

        if 'acq_date' in enriched_df.columns and 'acq_time' in enriched_df.columns:
             try:
                enriched_df['acq_time_str'] = enriched_df['acq_time'].astype(str).str.replace(r'\.0$', '', regex=True).str.zfill(4)
                enriched_df['datetime_utc'] = pd.to_datetime(enriched_df['acq_date'] + ' ' + enriched_df['acq_time_str'], format='%Y-%m-%d %H%M', errors='coerce')
                enriched_df['datetime_utc'].fillna(now_utc, inplace=True)
             except Exception as dt_err:
                 print(f"Warning: Error processing FIRMS acq_date/time: {dt_err}. Using current time.")
                 enriched_df['datetime_utc'] = now_utc
        else:
             print("Warning: FIRMS acq_date/time not found. Using current time for enrichment.")
             enriched_df['datetime_utc'] = now_utc

        enriched_df['hour'] = enriched_df['datetime_utc'].dt.hour
        enriched_df['month'] = enriched_df['datetime_utc'].dt.month
        enriched_df['is_fire_season'] = enriched_df['month'].between(6, 11).astype(int)

        if 'daynight' in enriched_df.columns:
             enriched_df['is_day'] = enriched_df['daynight'].apply(lambda x: 1 if str(x).upper() == 'D' else 0)
        else:
             enriched_df['is_day'] = enriched_df['hour'].between(7, 18).astype(int) # Approx. daytime hours

        print("Enriching live data with land cover (using hardcoded rules)...")
        land_covers_int = []
        for _, row in enriched_df.iterrows():
            land_cover_str = self.get_land_cover_data(row['lat'], row['lon'])
            land_covers_int.append(self.land_cover_category_to_int.get(land_cover_str, 8)) # 8 = other
        enriched_df['land_cover'] = land_covers_int
        enriched_df['land_cover'] = enriched_df['land_cover'].astype(int)

        if 'frp' not in enriched_df.columns or enriched_df['frp'].isnull().any():
            if 'frp' not in enriched_df.columns: enriched_df['frp'] = 0.0
            enriched_df['frp'] = pd.to_numeric(enriched_df['frp'], errors='coerce').fillna(0.0)
            print("Filled missing/invalid FRP values in live data with 0.")

        if 'confidence' not in enriched_df.columns or enriched_df['confidence'].isnull().any():
             if 'confidence' not in enriched_df.columns: enriched_df['confidence'] = 50.0 # Assign default if column missing
             conf_map = {'l': 30, 'n': 70, 'h': 90, 'low': 30, 'nominal': 70, 'high': 90}
             if pd.api.types.is_object_dtype(enriched_df['confidence']):
                enriched_df['confidence'] = enriched_df['confidence'].astype(str).str.lower().map(conf_map).fillna(enriched_df['confidence'])
             enriched_df['confidence'] = pd.to_numeric(enriched_df['confidence'], errors='coerce').fillna(50.0) # Default missing to 50
             print("Filled/Standardized confidence values in live data (defaulting missing to 50).")

        if 'bright_ti4' not in enriched_df.columns or enriched_df['bright_ti4'].isnull().any():
             if 'bright_ti4' not in enriched_df.columns: enriched_df['bright_ti4'] = 300.0 # Assign default if column missing
             enriched_df['bright_ti4'] = pd.to_numeric(enriched_df['bright_ti4'], errors='coerce').fillna(300.0) # Default missing to 300
             print("Filled missing/invalid bright_ti4 values in live data with 300K.")

        return enriched_df


    def predict_fire_types(self, fire_df):
        if fire_df is None or fire_df.empty: return pd.DataFrame()
        if not self.model_ready: print("Error: Model not ready."); return pd.DataFrame()

        enriched_df = self.enrich_fire_data(fire_df)
        if enriched_df.empty: print("Enrichment resulted in empty dataframe."); return pd.DataFrame()

        missing_features = [f for f in self.features if f not in enriched_df.columns]
        if missing_features: print(f"ERROR: Features missing for prediction: {missing_features}. Check enrichment process."); return pd.DataFrame()

        # Final check for NaNs before scaling
        if enriched_df[self.features].isnull().any().any():
            print("Warning: NaNs detected before scaling/prediction. Filling again.");
            for feature in self.features:
                if enriched_df[feature].isnull().any():
                    try:
                         fill_value = enriched_df[feature].median() if pd.api.types.is_numeric_dtype(enriched_df[feature]) else enriched_df[feature].mode()[0]
                         enriched_df[feature].fillna(fill_value, inplace=True)
                    except IndexError: # Handle cases where mode() is empty
                         print(f"Could not determine mode for feature {feature}. Filling with 0 or 'Unknown'.")
                         fill_value = 0 if pd.api.types.is_numeric_dtype(enriched_df[feature]) else 'Unknown'
                         enriched_df[feature].fillna(fill_value, inplace=True)


        X_live = enriched_df[self.features]
        try:
             X_live_scaled = self.scaler.transform(X_live)
        except ValueError as ve:
             print(f"Error scaling features: {ve}. Data shape: {X_live.shape}, Scaler expects {self.scaler.n_features_in_} features.")
             print(f"Columns being scaled: {X_live.columns.tolist()}")
             return pd.DataFrame()
        except Exception as e:
             print(f"Unexpected error during scaling: {e}"); traceback.print_exc(); return pd.DataFrame()

        try:
             y_prob = self.model.predict_proba(X_live_scaled)[:, 1]
        except Exception as e:
             print(f"Error predicting probabilities: {e}"); traceback.print_exc(); return pd.DataFrame()

        enriched_df['wildfire_probability'] = y_prob
        wildfire_threshold = 0.55 # Keep threshold consistent
        enriched_df['is_wildfire_pred'] = (y_prob >= wildfire_threshold).astype(int)

        def get_fire_type(row):
            if row['is_wildfire_pred'] == 1: return 'Wildfire (Predicted)'
            elif row['land_cover'] == 2 and row['bright_ti4'] < 340 and row['frp'] < 50:
                 return 'Agricultural Burning'
            elif row['confidence'] < 40 and row['frp'] < 5 and row['bright_ti4'] < 320:
                 return 'Other Thermal Anomaly (Low Confidence)'
            else: return 'Other Heat Source'

        enriched_df['predicted_type'] = enriched_df.apply(get_fire_type, axis=1)
        return enriched_df


    def get_reason_for_classification(self, row):
        int_to_land_cover_name = {v: k for k, v in self.land_cover_category_to_int.items()}
        land_cover_type = int_to_land_cover_name.get(row.get('land_cover', 8), 'other').capitalize()

        prob = row.get('wildfire_probability', 0) * 100
        pred_type = row.get('predicted_type', 'Unknown')
        bright = row.get('bright_ti4', 0)
        frp = row.get('frp', 0)
        conf = row.get('confidence', 0)

        reason = f"Located in {land_cover_type} terrain. "

        if pred_type == 'Wildfire (Predicted)':
            factors = []
            if bright > 335: factors.append(f"high heat ({bright:.0f}K)")
            if frp > 20: factors.append(f"significant energy release ({frp:.1f} MW)") # Lowered FRP threshold slightly
            if conf > 70: factors.append(f"high confidence ({conf:.0f}%)")
            if row.get('land_cover') in [3, 4, 5]: factors.append("typical wildfire environment")

            if factors:
                reason += f"Classified as potential Wildfire (Prob: {prob:.1f}%) due to factors like: {', '.join(factors)}."
            else:
                reason += f"Classified as potential Wildfire based on overall model prediction (Prob: {prob:.1f}%)."

        elif pred_type == 'Agricultural Burning':
            reason += f"Likely Agricultural burning. Reasons: Location classified as Cropland, thermal signature within expected range (Heat: {bright:.0f}K, FRP: {frp:.1f} MW), low predicted wildfire probability ({prob:.1f}%)."

        elif pred_type == 'Other Thermal Anomaly (Low Confidence)':
            reason += f"Likely non-fire thermal anomaly. Reasons: Low detection confidence ({conf:.0f}%), low energy ({frp:.1f} MW), low heat ({bright:.0f}K), very low wildfire probability ({prob:.1f}%)."

        else: # Other Heat Source
            reason += f"Classified as Other Heat Source (non-wildfire). Reasons: Does not match typical wildfire profile (Prob: {prob:.1f}%), characteristics (Heat: {bright:.0f}K, FRP: {frp:.1f} MW, Conf: {conf:.0f}%) inconsistent with wildfire or specific other types like Ag burning."

        return reason


    def get_direction(self, from_lat, from_lon, to_lat, to_lon):
        try:
            delta_lon = to_lon - from_lon
            y = np.sin(np.radians(delta_lon)) * np.cos(np.radians(to_lat))
            x = np.cos(np.radians(from_lat)) * np.sin(np.radians(to_lat)) - \
                np.sin(np.radians(from_lat)) * np.cos(np.radians(to_lat)) * np.cos(np.radians(delta_lon))
            bearing = (np.degrees(np.arctan2(y, x)) + 360) % 360
            directions = ['North', 'Northeast', 'East', 'Southeast', 'South', 'Southwest', 'West', 'Northwest']
            index = round(bearing / 45) % 8
            return directions[index]
        except (TypeError, ValueError):
             return "Unknown" 

    def run_interactive_detection(self):
        """Runs the main interactive loop for fire detection."""
        print("\n" + "="*40 + "\n🔥 CA FIRE HOTSPOT DETECTION SYSTEM 🔥\n" + "="*40); print("Detects & classifies heat hotspots in California."); print("Uses NASA FIRMS, Geopy (OSM), Hardcoded Land Cover & ML model."); print("-" * 40) # Updated description
        if self.nasa_api_key == "YOUR_NASA_FIRMS_API_KEY" or not self.nasa_api_key: print("\nERROR: NASA FIRMS API Key not set in the script.\n"); return
        if not self.model_ready: print("\nERROR: ML model not ready. Check loading/training logs.\n"); return

        while True:
            print("\n--- Location Input ---"); print("1. Enter California Zipcode"); print("2. Enter Latitude/Longitude"); print("3. Exit")
            choice = input("Select option (1-3): ").strip(); lat, lon = None, None
            if choice == '3': print("\nExiting system. Stay safe!"); break
            elif choice == '1':
                zipcode = input("Enter CA zipcode: ").strip()
                if not zipcode.isdigit() or len(zipcode) != 5: print("Invalid zipcode format."); continue
                lat, lon = self.zipcode_to_coordinates(zipcode)
                # Check if default Sacramento coords were returned AND it wasn't a Sac zipcode
                if lat == 38.5816 and lon == -121.4944 and not (zipcode.startswith('958') or zipcode.startswith('956')): print("Warning: Using default location (Sacramento) due to geocoding failure or zipcode outside CA.")
                else: print(f"Using location for {zipcode}: {lat:.4f}, {lon:.4f}")
            elif choice == '2':
                try:
                     lat_str = input("Enter Latitude (e.g., 34.05): ").strip(); lon_str = input("Enter Longitude (e.g., -118.24): ").strip();
                     lat = float(lat_str); lon = float(lon_str)
                except ValueError: print("Invalid coordinate format. Please enter numbers."); continue
                if not (self.ca_bounds['min_lat'] <= lat <= self.ca_bounds['max_lat'] and self.ca_bounds['min_lon'] <= lon <= self.ca_bounds['max_lon']):
                    print("Coordinates are outside the approximate bounds of California. Please enter valid CA coordinates."); continue
            else: print("Invalid choice."); continue

            try:
                 radius_str = input("Enter search radius in km (e.g., 50-250, default 100): ").strip();
                 radius_km = float(radius_str) if radius_str else 100.0;
                 radius_km = max(20.0, min(300.0, radius_km)) # Adjusted bounds slightly
            except ValueError: print("Invalid radius format. Using default 100 km."); radius_km = 100.0
            radius_miles = radius_km * 0.621371

            print(f"\n--- Searching ---"); print(f"Searching within {radius_km:.1f} km ({radius_miles:.1f} miles) of ({lat:.4f}, {lon:.4f})...")
            fire_df = self.get_live_fire_data(lat, lon, radius_miles=radius_miles)

            if fire_df is None or fire_df.empty:
                 print(f"\n--- Results ---\n✅ No active fire hotspots detected/retrieved within {radius_km:.1f} km.")
            else:
                print("Analyzing detected hotspots...")
                results_df = self.predict_fire_types(fire_df)
                if results_df is None or results_df.empty:
                     print("\n--- Results ---\n❌ Analysis failed after retrieving data. Check logs for errors during enrichment or prediction.")
                else:
                    if 'distance_miles' in results_df.columns and results_df['distance_miles'].notna().all():
                         results_df['distance_km'] = results_df['distance_miles'] / 0.621371
                    else:
                         results_df['distance_km'] = np.nan 
                    results_df['reason'] = results_df.apply(self.get_reason_for_classification, axis=1)
                    results_df['direction'] = results_df.apply(lambda row: self.get_direction(lat, lon, row.get('lat'), row.get('lon')), axis=1)
                    results_df = results_df.sort_values('distance_miles', na_position='last') # Sort by distance

                    print("\n" + "="*25 + " RESULTS " + "="*25); print(f"Found {len(results_df)} thermal hotspots within {radius_km:.1f} km.")
                    type_counts = results_df['predicted_type'].value_counts();
                    print("\nSummary:"); [print(f"- {count} classified as: {ftype}") for ftype, count in type_counts.items()]

                    wildfires = results_df[results_df['predicted_type'] == 'Wildfire (Predicted)']
                    if not wildfires.empty:
                         print("\n" + "!"*15 + " ⚠️ WILDFIRE ALERT ⚠️ " + "!"*15);
                         print(f"🚨 {len(wildfires)} hotspot(s) classified as potential WILDFIRES!");
                         closest_wildfire = wildfires.iloc[0]
                         dist_km_str = f"{closest_wildfire.get('distance_km', 'N/A'):.1f}" if pd.notna(closest_wildfire.get('distance_km')) else "N/A"
                         dist_mi_str = f"{closest_wildfire.get('distance_miles', 'N/A'):.1f}" if pd.notna(closest_wildfire.get('distance_miles')) else "N/A"
                         direction_str = closest_wildfire.get('direction', '')
                         print(f"   Closest: {dist_km_str} km ({dist_mi_str} mi) {direction_str}.")
                         print("!"*50)

                    print("\n--- Detailed Hotspot Information (Closest First) ---")
                    for i, (_, hotspot) in enumerate(results_df.iterrows(), 1):
                         print(f"\n[{i}] Details:"); print(f"  Type: {hotspot.get('predicted_type', 'N/A').upper()}")
                         dist_km_str = f"{hotspot.get('distance_km', 'N/A'):.1f}" if pd.notna(hotspot.get('distance_km')) else "N/A"
                         dist_mi_str = f"{hotspot.get('distance_miles', 'N/A'):.1f}" if pd.notna(hotspot.get('distance_miles')) else "N/A"
                         direction_str = hotspot.get('direction', '')
                         print(f"  Dist: {dist_km_str} km ({dist_mi_str} mi) {direction_str}")
                         lat_str = f"{hotspot.get('lat', 'N/A'):.4f}" if pd.notna(hotspot.get('lat')) else "N/A"
                         lon_str = f"{hotspot.get('lon', 'N/A'):.4f}" if pd.notna(hotspot.get('lon')) else "N/A"
                         print(f"  Loc:  {lat_str}, {lon_str}")
                         bright_str = f"{hotspot.get('bright_ti4', 'N/A'):.1f} K" if pd.notna(hotspot.get('bright_ti4')) else "Heat: N/A"
                         frp_str = f" | FRP: {hotspot['frp']:.1f} MW" if 'frp' in hotspot and pd.notna(hotspot['frp']) else ""
                         conf_str = f" | Conf: {hotspot['confidence']:.0f}%" if 'confidence' in hotspot and pd.notna(hotspot['confidence']) else ""
                         print(f"  Sensor: {bright_str}{frp_str}{conf_str}")
                         prob_str = f"{hotspot['wildfire_probability']*100:.1f}%" if 'wildfire_probability' in hotspot and pd.notna(hotspot['wildfire_probability']) else "N/A"
                         print(f"  Wildfire Prob: {prob_str}")
                         print(f"  Assessment: {hotspot.get('reason', 'N/A')}")

                    if not wildfires.empty:
                        print("\n--- ⚠️ Safety Recommendations (Wildfire Predicted) ⚠️ ---");
                        print("1. VERIFY via official sources (CAL FIRE, county alerts, news). This is a prediction.");
                        print("2. MONITOR the situation closely for changes.");
                        print("3. REVIEW your evacuation plan & ensure your 'go bag' is ready.");
                        closest_dist_miles = wildfires.iloc[0].get('distance_miles', float('inf')) # Use infinity if distance is missing
                        if closest_dist_miles < 5: print("4. ❗ URGENT: Check for IMMEDIATE evacuation orders/warnings (<5 miles).")
                        elif closest_dist_miles < 15: print("4. ❗ WARNING: PREPARE for potential evacuation (<15 miles). Stay highly vigilant.")
                        else: print("4. CAUTION: Stay alert and informed. Conditions can change rapidly.")
                        print("5. CHECK current air quality (e.g., AirNow.gov)."); print("-" * 60)

            again = input("\nSearch another location? (y/n): ").strip().lower()
            if again != 'y': print("\nExiting system. Stay safe!"); break


if __name__ == "__main__":
    try:
        analyzer = FireHotspotAnalyzer()
        if analyzer.model_ready:
            analyzer.run_interactive_detection()
        else:
            print("\nInitialization failed (Model not ready). Cannot run detection.")
            print("Please check the errors above, especially during model loading or training.")
            print("Ensure 'historical_training_data.csv' exists, is valid FIRMS data,")
            print("and contains data that allows for inferring both fire/non-fire examples.")
            print("Also ensure 'feature_scaler.pkl' and 'wildfire_classifier.pkl' are compatible if loading.")
    except SystemExit as se:
        print(f"\nSystem exit during initialization or training: {se}")
    except KeyboardInterrupt:
         print("\nExecution interrupted by user.")
    except Exception as e:
        print("\nAn unexpected critical error occurred in the main execution block:")
        traceback.print_exc()


Loading existing model and scaler...
Model and scaler loaded successfully!

🔥 CA FIRE HOTSPOT DETECTION SYSTEM 🔥
Detects & classifies heat hotspots in California.
Uses NASA FIRMS, Geopy (OSM), Hardcoded Land Cover & ML model.
----------------------------------------

--- Location Input ---
1. Enter California Zipcode
2. Enter Latitude/Longitude
3. Exit
Attempting to geocode zipcode: 92336 using Nominatim (OSM)
Error: Nominatim service error: HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?q=92336%2C+CA%2C+USA&format=json&limit=1 (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:997)')))
Using Sacramento default.

--- Searching ---
Searching within 250.0 km (155.3 miles) of (38.5816, -121.4944)...
Requesting fire data using URL: https://firms.modaps.eosdis.nasa.gov/api/area/csv/6504a8db9e133f9b5b02786e7738c49f/VIIRS_SNPP_NRT/