In [None]:
import pandas as pd
import glob
import os

# Define paths
raw_data_path = "data/raw"
processed_data_path = "data/processed"
os.makedirs(processed_data_path, exist_ok=True)

# Step 1: Get all MODIS CSVs
csv_files = glob.glob(os.path.join(raw_data_path, "*.csv"))
print(f"Found {len(csv_files)} CSV files.")

# Step 2: Read and combine
df_list = [pd.read_csv(file) for file in csv_files]
fires_df = pd.concat(df_list, ignore_index=True)
print("✅ Combined data shape:", fires_df.shape)

# Step 3: Optional - convert acq_date to datetime
fires_df["acq_date"] = pd.to_datetime(fires_df["acq_date"], errors="coerce")

# Step 4: Filter by confidence (e.g., keep >= 80)
# Note: If confidence is string like "nominal", "low", "high", you may need to remap
if fires_df["confidence"].dtype == "object":
    fires_df = fires_df[fires_df["confidence"].str.lower() == "high"]
else:
    fires_df = fires_df[fires_df["confidence"] >= 80]

print("✅ Filtered high-confidence fires:", fires_df.shape)

# Step 5: Save to processed
output_file = os.path.join(processed_data_path, "fires_combined_clean.csv")
fires_df.to_csv(output_file, index=False)
print(f"✅ Saved cleaned data to: {output_file}")


In [None]:
# wildfire_data_ingestion.ipynb

## 📦 Imports
import pandas as pd
import requests
from datetime import datetime
import os

from dotenv import load_dotenv
load_dotenv()

api_key = os.getenv("OPENWEATHER_API_KEY")

# Create data folder if not exists
os.makedirs("data/raw", exist_ok=True)

## 🔥 Load NASA FIRMS Fire Data (CSV)
# Example: Manually downloaded and placed in `data/raw`
csv_path = "data/raw/MODIS_C6_Global_7d.csv"  # Change this to your actual file name
fires_df = pd.read_csv(csv_path)

print("✅ Fire data loaded:")
fires_df.head()

## 🌡️ OpenWeatherMap API - Real-Time Weather
API_KEY = api_key  # Replace this with your actual key

def get_weather(city="Los Angeles"):
    url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={API_KEY}&units=metric"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        weather = {
            "city": city,
            "temperature": data["main"]["temp"],
            "humidity": data["main"]["humidity"],
            "wind_speed": data["wind"]["speed"],
            "weather": data["weather"][0]["description"],
            "datetime": datetime.fromtimestamp(data["dt"])
        }
        return weather
    else:
        print("Failed to fetch data:", response.status_code)
        return None

# Example usage
weather_data = get_weather("Los Angeles")
print("✅ Current weather:")
print(weather_data)

## 🧹 Quick Preprocessing Example (for FIRMS data)
fires_df_clean = fires_df.dropna()
fires_df_clean = fires_df_clean[fires_df_clean["confidence"] >= 80]  # Filter high confidence fires

# Convert date field to datetime if present
if "acq_date" in fires_df_clean.columns:
    fires_df_clean["acq_date"] = pd.to_datetime(fires_df_clean["acq_date"])

print("✅ Cleaned fire data:")
fires_df_clean.head()

## 💾 Save cleaned data
fires_df_clean.to_csv("data/processed/fires_cleaned.csv", index=False)
