In [2]:
import pandas as pd
import requests
import unicodedata
import time

class MexicanDroughtIndex:
    def __init__(self, msm_path="MunicipiosSequia.xlsx"):
        """
        Initializes the index by loading the official CONAGUA drought table.
        """
        print("Loading Drought Data...")
        # Load the Excel file (assuming 'MUNICIPIOS' is the sheet name)
        # If using the CSV version, change to pd.read_csv()
        try:
            self.df = pd.read_excel(msm_path, sheet_name="MUNICIPIOS")
        except:
            # Fallback if user converted to CSV
            self.df = pd.read_csv("MunicipiosSequia.xlsx - MUNICIPIOS.csv")

        # --- 1. Dynamic Date Extraction ---
        # Find all columns that look like dates (YYYY-MM-DD)
        # We assume they start with "20"
        date_cols = [c for c in self.df.columns if str(c).strip().startswith('20')]

        # Sort and pick the LAST TWO available periods
        self.target_dates = sorted(date_cols)[-2:]
        print(f"Tracking Drought for periods: {self.target_dates}")

        # --- 2. Pre-process Names for Matching ---
        # Create a clean column for matching (UPPERCASE, NO ACCENTS)
        self.df['MATCH_KEY'] = self.df['NOMBRE_MUN'].apply(self._clean_text)
        self.df['ENTIDAD_KEY'] = self.df['ENTIDAD'].apply(self._clean_text)

        # Mapping D0-D4 to numeric risk (0-5)
        self.risk_map = {
            'D0': 1, 'D1': 2, 'D2': 3, 'D3': 4, 'D4': 5,
            'NaN': 0, 'Sin Sequia': 0
        }

    def _clean_text(self, text):
        """Standardizes text: 'Asientos' -> 'ASIENTOS', 'NuÃ±ez' -> 'NUNEZ'"""
        if pd.isna(text): return ""
        text = str(text)
        # Remove accents
        text = unicodedata.normalize('NFD', text).encode('ascii', 'ignore').decode("utf-8")
        return text.upper().strip()

    def get_location_name(self, lat, lon):
        """
        Uses OpenStreetMap API to get Municipality and State from Lat/Lon.
        Rate Limit: 1 request per second.
        """
        url = f"https://nominatim.openstreetmap.org/reverse?format=json&lat={lat}&lon={lon}"
        headers = {'User-Agent': 'MexicoDroughtApp/1.0'} # Required by OSM

        try:
            response = requests.get(url, headers=headers)
            data = response.json()

            address = data.get('address', {})
            # OSM keys for municipality can vary: 'county', 'city', 'town', 'village'
            municipality = address.get('county', address.get('city', address.get('town', '')))
            state = address.get('state', '')

            return municipality, state
        except Exception as e:
            print(f"API Error: {e}")
            return None, None

    def get_risk_vector(self, lat, lon):
        """
        Main function to get the vector for your ML model.
        Returns: [Drought_Last_Month, Drought_This_Month]
        """
        # 1. Get Location Name from API
        mun_name, state_name = self.get_location_name(lat, lon)

        if not mun_name:
            return [0, 0] # API failed or location not found

        # 2. Clean Names
        clean_mun = self._clean_text(mun_name)
        # Remove common prefixes from API (e.g., "Municipio de Monterrey" -> "Monterrey")
        clean_mun = clean_mun.replace("MUNICIPIO DE ", "").replace("MUNICIPIO ", "")

        # 3. Lookup in DataFrame
        # Filter by Municipality Name
        match = self.df[self.df['MATCH_KEY'] == clean_mun]

        # Optional: Filter by State if multiple municipalities have same name
        # clean_state = self._clean_text(state_name)
        # match = match[match['ENTIDAD_KEY'] == clean_state]

        if match.empty:
            print(f"Warning: Could not match '{clean_mun}' in database.")
            return [0, 0]

        # 4. Extract Values
        row = match.iloc[0]
        val1 = row[self.target_dates[0]] # 2 months ago
        val2 = row[self.target_dates[1]] # Last month

        # Convert to numeric
        score1 = self.risk_map.get(str(val1), 0)
        score2 = self.risk_map.get(str(val2), 0)

        return [score1, score2]

# --- USAGE EXAMPLE ---
if __name__ == "__main__":
    # Initialize Engine
    engine = MexicanDroughtIndex("MunicipiosSequia.xlsx")

    # Test: Monterrey, NL (Lat: 25.6866, Lon: -100.3161)
    # The API will find "Monterrey", the code will look it up in the Excel.
    user_lat = 25.6866
    user_lon = -100.3161

    risk_vector = engine.get_risk_vector(user_lat, user_lon)

    print(f"\nPrediction Vector for ({user_lat}, {user_lon}):")
    print(f"  > Date {engine.target_dates[0]}: Level {risk_vector[0]}")
    print(f"  > Date {engine.target_dates[1]}: Level {risk_vector[1]}")
    print(f"  > Model Input: {risk_vector}")


Loading Drought Data...
Tracking Drought for periods: [datetime.datetime(2025, 12, 31, 0, 0), datetime.datetime(2026, 1, 15, 0, 0)]

Prediction Vector for (25.6866, -100.3161):
  > Date 2025-12-31 00:00:00: Level 0
  > Date 2026-01-15 00:00:00: Level 0
  > Model Input: [0, 0]
