In [2]:
import csv
import json

import pandas as pd
import requests
import streamlit as st

RASTER_API_URL = "https://earth.gov/ghgcenter/api/raster"
STAC_API_URL = "https://earth.gov/ghgcenter/api/stac"

ModuleNotFoundError: No module named 'pandas'

In [6]:
############ FUNCTIONS ###############


def fetch_collection_items(collection_name, limit=100):
    """
    Function to fetch all items of a STAC collection.
    """
    url = f"{STAC_API_URL}/collections/{collection_name}/items"
    response = requests.get(url, params={"limit": limit})
    if response.status_code == 200:
        items = response.json().get("features", [])
        print(f"Found {len(items)} items for collection: {collection_name}")
        return items
    else:
        print(f"Failed to fetch items. Status code: {response.status_code}")
        return []


def generate_stats(item, geojson, asset_name, i):
    """
    Generate statistics for a given item and GeoJSON region.
    """
    try:
        result = requests.post(
            f"{RASTER_API_URL}/cog/statistics",
            params={"url": item["assets"][asset_name]["href"]},
            json=geojson,
        ).json()


        print(f">> DONE FETCHING STATS - {i+1}")
        print("RESULT: ", result)

        return {
            **result["properties"],
            "ddatetime": item["properties"]["datetime"][:7], # start_datetime or datetime 
        }
    except Exception as e:
        print(f"Error generating stats: {e}")
        return {}


def clean_stats(stats_json):
    """
    Clean and prepare stats for saving as a DataFrame.
    """
    
    print("JSON ", stats_json)
        
    df = pd.json_normalize(stats_json)
    df.columns = [col.replace("statistics.b1.", "") for col in df.columns]
    df["date"] = pd.to_datetime(df["start_datetime"])
    return df


def save_stats_to_csv(df, collection_name):
    """
    Save the cleaned DataFrame to a CSV file.
    """
    csv_file = f"streamlit/data/{collection_name}.csv"
    df.to_csv(csv_file, index=False)
    print(f"Data saved to {csv_file}")
    return csv_file


def get_all_assets(items):

    try:
        return list(items[0]["assets"].keys())[:-1]

    except:
        return []


In [3]:
############ CUSTOMIZABLE PARAMS ###############

collection_name = "epa-ch4emission-yeargrid-v2express"
world_geojson = {
    "type": "Feature",
    "properties": {"name": "World"},
    "geometry": {
        "type": "Polygon",
        "coordinates": [
            [
                [-180.0, -90.0],
                [180.0, -90.0],
                [180.0, 90.0],
                [-180.0, 90.0],
                [-180.0, -90.0],
            ]
        ],
    },
}

In [4]:
# Fetch items from the collection
items = fetch_collection_items(collection_name, limit=10)
assets = get_all_assets(items)

print(assets)

asset_name = assets[0]  # customizable

Found 9 items for collection: epa-ch4emission-yeargrid-v2express
['dwtd-waste', 'iwtd-waste', 'post-meter', 'refining-ps', 'total-other', 'total-waste', 'surface-coal', 'transport-ps', 'abn-ong-other', 'field-burning', 'production-ps', 'total-methane', 'exploration-ps', 'processing-ngs', 'production-ngs', 'exploration-ngs', 'composting-waste', 'distribution-ngs', 'rice-cultivation', 'total-coal-mines', 'underground-coal', 'manure-management', 'total-agriculture', 'msw-landfill-waste', 'abn-underground-coal', 'enteric-fermentation', 'petro-production-other', 'mobile-combustion-other', 'total-petroleum-systems', 'transmission-storage-ngs', 'industrial-landfill-waste', 'total-natural-gas-systems', 'ferroalloy-production-other', 'stationary-combustion-other']


In [7]:
# Generate statistics for each item based on the world_geojson
stats = [
    generate_stats(item, world_geojson, asset_name, i) for i, item in enumerate(items)
]

print(stats[0])

>> DONE FETCHING STATS - 1
RESULT:  {'type': 'Feature', 'geometry': {'type': 'Polygon', 'coordinates': [[[-180.0, -90.0], [180.0, -90.0], [180.0, 90.0], [-180.0, 90.0], [-180.0, -90.0]]]}, 'properties': {'statistics': {'b1': {'min': 1.3621397343399622e-08, 'max': 250.26608276367188, 'mean': 0.0640193372964859, 'count': 75972.0, 'sum': 4863.67724609375, 'std': 1.3164696381020087, 'median': 0.0019118647323921323, 'majority': 8.104947823994735e-07, 'minority': 1.3621397343399622e-08, 'unique': 75802.0, 'histogram': [[75965.0, 1.0, 2.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 1.0], [1.3621397343399622e-08, 25.026607513427734, 50.05321502685547, 75.07981872558594, 100.10643005371094, 125.13304138183594, 150.15963745117188, 175.18624877929688, 200.21286010742188, 225.23947143554688, 250.26608276367188]], 'valid_percent': 1.17, 'masked_pixels': 6407628.0, 'valid_pixels': 75972.0, 'percentile_2': 2.01231409846514e-06, 'percentile_98': 0.5368320941925049}}, 'name': 'World'}}
>> DONE FETCHING STATS - 2
RE

In [None]:
# Clean the statistics data
df = clean_stats(stats)
df.columns

In [None]:
df.head()

In [None]:
# Save the cleaned data to a CSV file
csv_file = save_stats_to_csv(df, collection_name)