# Extracting Non-Wildfire data

In [5]:
import pandas as pd
import numpy as np

# =========================
# Configurable Parameters
# =========================
INPUT_FIRE_DATA = '../data/raw/mapdataall.csv'
OUTPUT_NON_FIRE_EVENTS = '../data/processed/non_fire_events.csv'
DATE_COLUMN = 'incident_dateonly_created'
LAT_COLUMN = 'incident_latitude'
LON_COLUMN = 'incident_longitude'
ROUND_DECIMALS = 2

# =========================
# Data Loading Functions
# =========================
def load_data(filepath):
    """Load fire data from a CSV file."""
    return pd.read_csv(filepath)

# =========================
# Data Processing Functions
# =========================
def process_fire_data(df, date_col, lat_col, lon_col, round_decimals):
    """
    Convert date column to datetime and round latitude and longitude columns.
    
    Parameters:
    - df: DataFrame with fire data.
    - date_col: Name of the date column.
    - lat_col: Name of the latitude column.
    - lon_col: Name of the longitude column.
    - round_decimals: Number of decimals for rounding coordinates.
    
    Returns:
    - Processed DataFrame with new columns 'date', 'lat_rnd', and 'lon_rnd'.
    """
    df['date'] = pd.to_datetime(df[date_col])
    df['lat_rnd'] = df[lat_col].round(round_decimals)
    df['lon_rnd'] = df[lon_col].round(round_decimals)
    return df

def get_unique_dates_and_locations(df):
    """
    Retrieve unique dates and unique location pairs from the processed DataFrame.
    
    Returns:
    - unique_dates: Sorted unique dates.
    - unique_locations: DataFrame of unique rounded lat/lon pairs.
    """
    unique_dates = df['date'].drop_duplicates().sort_values()
    unique_locations = df[['lat_rnd', 'lon_rnd']].drop_duplicates()
    return unique_dates, unique_locations

def generate_non_fire_events(df, unique_dates, unique_locations):
    """
    Generate a DataFrame of non-fire events by subtracting fire locations from all unique locations.
    
    For each date, it writes out locations where no fire was recorded.
    """
    # Create a set of all possible location tuples (lat, lon)
    all_locs_set = set(zip(unique_locations['lat_rnd'], unique_locations['lon_rnd']))
    non_fire_events = []  # List to store non-fire event chunks

    for date in unique_dates:
        # Get fire locations for this date
        fire_locs = df[df['date'] == date][['lat_rnd', 'lon_rnd']]
        fire_set = set(zip(fire_locs['lat_rnd'], fire_locs['lon_rnd']))
        # Non-fire locations: those present in the overall set but not on this date
        non_fire_locs = all_locs_set - fire_set
        
        # Create a DataFrame for this date's non-fire locations
        chunk = pd.DataFrame({
            'date': [date.strftime('%Y-%m-%d')] * len(non_fire_locs),
            'lat': [loc[0] for loc in non_fire_locs],
            'lon': [loc[1] for loc in non_fire_locs]
        })
        non_fire_events.append(chunk)
    
    # Combine all chunks into one DataFrame
    return pd.concat(non_fire_events, ignore_index=True)

# =========================
# Output Functions
# =========================
def save_non_fire_events(df, output_filepath):
    """Save the non-fire events DataFrame to a CSV file."""
    df.to_csv(output_filepath, index=False)

# =========================
# Main Execution Function
# =========================
def main():
    # Load the fire data
    df_fire = load_data(INPUT_FIRE_DATA)
    
    # Process the fire data (convert dates and round coordinates)
    df_fire = process_fire_data(df_fire, DATE_COLUMN, LAT_COLUMN, LON_COLUMN, ROUND_DECIMALS)
    
    # Get unique dates and locations from the fire data
    unique_dates, unique_locations = get_unique_dates_and_locations(df_fire)
    
    # Generate non-fire events (locations without fire on each date)
    non_fire_events_df = generate_non_fire_events(df_fire, unique_dates, unique_locations)
    
    # Save the non-fire events to the output CSV file
    save_non_fire_events(non_fire_events_df, OUTPUT_NON_FIRE_EVENTS)
    print(f"Non-fire events saved to {OUTPUT_NON_FIRE_EVENTS}")

    # After processing, verify counts
    fire_count = len(df_fire)
    non_fire_count = sum(1 for line in open('../data/processed/non_fire_events.csv')) - 1  # minus header

    # After processing, verify counts
    print(f"====== Verification ======")
    print(f"Fire events: {fire_count}")
    print(f"Non-fire events: {non_fire_count}")
    print(f"Total locations: {len(unique_locations)}")
    print(f"==========================")

if __name__ == '__main__':
    main()


Non-fire events saved to ../data/processed/non_fire_events.csv
Fire events: 2837
Non-fire events: 3620528
Total locations: 2680
