In [None]:
import pandas as pd
!pip install -q -U google-generativeai

In [None]:
emissions=pd.read_parquet("/content/su_emission_audit.parquet")

#Explain the Emission -button

In [None]:
import os
import pandas as pd
import google.generativeai as genai
from datetime import datetime, timezone
from google.colab import userdata

try:
    genai.configure(api_key=userdata.get('GEMINI_API_KEY'))
except Exception as e:
    print(f"ERROR: Failed to configure Gemini API. Ensure GEMINI_API_KEY is set in Colab secrets. Details: {e}")
    exit()


# --- Data Processing Functions ---

def format_duration(start_str, end_str):
    """
    Calculates and formats duration. If duration is over 72 hours, it returns days.
    """
    try:
        start_time = pd.to_datetime(start_str)
        end_time = pd.to_datetime(end_str)
        duration_delta = end_time - start_time

        total_minutes = int(duration_delta.total_seconds() / 60)
        hours = total_minutes // 60

        # --- MODIFIED: Check for duration greater than 72 hours ---
        if hours > 72:
            days = hours // 24
            return f"{days} days"

        # Original logic for durations less than or equal to 72 hours
        minutes = total_minutes % 60
        if hours > 0 and minutes > 0:
            return f"{hours} hours and {minutes} minutes"
        elif hours > 0:
            return f"{hours} hours"
        else:
            return f"{minutes} minutes"
    except (ValueError, TypeError):
        return "an unknown duration"

def get_emission_context(current_rate, historical_rate):
    """Compares current rate to historical average to generate context."""
    if pd.isna(historical_rate) or historical_rate == 0:
        return {"descriptor": "", "comparison": "historical data is not available"}

    try:
        percent_diff = ((current_rate - historical_rate) / historical_rate) * 100
        comparison_text = f"{abs(percent_diff):.0f}%"

        if percent_diff > 25:
            descriptor = "high-rate"
            comparison_text += " higher"
        elif percent_diff < -25:
            descriptor = "low-rate"
            comparison_text += " lower"
        else:
            descriptor = ""
            comparison_text += " higher" if percent_diff > 0 else " lower"

        return {"descriptor": descriptor, "comparison": comparison_text}
    except (ValueError, TypeError):
        return {"descriptor": "", "comparison": "historical data could not be compared"}

# --- Main Gemini API Function ---

def explain_emission(emission_data):
    """Takes a dictionary of processed emission data and generates a narrative."""
    duration = format_duration(emission_data.get("ee_startTime"), emission_data.get("ee_endTime"))
    context = get_emission_context(emission_data.get("ee_emissionsRate"), emission_data.get("historical_average_rate"))
    detection_date_str = pd.to_datetime(emission_data.get("ee_startTime")).strftime('%b %d, %Y')

    # --- MODIFIED: Prompt instructions are now conditional ---
    prompt = f"""
    You are an expert assistant for an emissions management platform. Your task is to translate raw emission data into a concise, human-readable narrative paragraph.

    Follow these rules for the narrative:
    1. Start with the emission rate, severity (if applicable), and asset name.
    2. State the duration and the total resulting volume.
    3. If historical data is provided in the data section below (i.e., not 'not available'), provide context by comparing the current rate to the historical average. Otherwise, do not mention historical data at all.
    4. Mention how and when the emission was first detected.
    5. Conclude with the likely source or cause. If a specific cause is provided, use it.

    Here is the data for the emission event:
    - Emission Rate: {emission_data.get('ee_emissionsRate')} kg/h
    - Total Volume: {emission_data.get('ee_emissionsVolume')} kg
    - Asset Name: '{emission_data.get('a_name')}'
    - Duration: {duration}
    - Detection Method: {emission_data.get('eo_detectionSource')}
    - Detection Date: {detection_date_str}
    - Source Category: {emission_data.get('ee_sourceCategory')}
    - Specific Cause: {emission_data.get('ee_sourceEmissionsCause', 'Not specified')}
    - Severity Descriptor: '{context.get('descriptor')}'
    - Historical Comparison: '{context.get('comparison')}'

    Now, generate the single paragraph explanation based on this data.
    """

    model = genai.GenerativeModel('models/gemini-1.5-flash-latest')
    response = model.generate_content(prompt)

    return response.text

# --- Main Execution Logic ---

# 1. Calculate historical average rate for each asset (using a_FLOC as unique ID)
historical_averages = emissions.groupby('a_FLOC')['ee_emissionsRate'].mean().reset_index()
historical_averages = historical_averages.rename(columns={'ee_emissionsRate': 'historical_average_rate'})

# 2. Merge historical averages back into the main DataFrame
emissions_with_history = pd.merge(emissions, historical_averages, on='a_FLOC', how='left')

# 3. Select a single event to explain
#    change this to get data from the current viewing record (button action record)
event_index_to_explain = 10
single_emission_event = emissions_with_history.iloc[event_index_to_explain]

# 4. Convert the row (Pandas Series) to a dictionary for our function
data_for_gemini = single_emission_event.to_dict()

# 5. Generate the story and print it
if not data_for_gemini:
    print(f"Could not retrieve data for event at index {event_index_to_explain}.")
else:
    print(f"--- Generating story for event at index: {event_index_to_explain} ---")
    emission_story = explain_emission(data_for_gemini)
    print(emission_story)

--- Generating story for event at index: 10 ---
CHAPMAN STATE BAT experienced a low-rate emission event at a rate of 2.74 kg/h, resulting in a total volume of 12,802.84 kg over 194 days.  Detected by Bridger Photonics on August 10, 2021, this emission rate is 86% lower than the historical average.  The specific cause remains undetermined.

