In [None]:
import pandas as pd
import geopandas as gpd
from tqdm.auto import tqdm

In [None]:
from coal_emissions_monitoring.satellite_imagery import (
    create_aoi_for_plants,
    get_image_metadata_for_plants,
    get_image_from_cog
)
from coal_emissions_monitoring.data_viz import view_satellite_image

In [None]:
df = pd.read_csv("/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/labeled_geospatial_data.csv")
df

In [None]:
df.is_powered_on.value_counts()

In [None]:
# get unique combinations of lat/lon
unique_coords = df[["lat", "lon"]].drop_duplicates().reset_index(drop=True)
unique_coords.reset_index(inplace=True)
unique_coords.set_index(["lat", "lon"], inplace=True)
unique_coords = unique_coords["index"].to_dict()
unique_coords

In [None]:
# set an epsg code for each unique lat/lon
df["facility_id"] = df.apply(
    lambda x: unique_coords[(x["lat"], x["lon"])], axis=1
)
df

In [None]:
df.facility_id.value_counts()

In [None]:
df.rename(columns={"lat": "latitude", "lon": "longitude"}, inplace=True)
df

In [None]:
df.rename(columns={"timestamp": "ts"}, inplace=True)
df.ts = pd.to_datetime(df.ts)
df.dtypes

In [None]:
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.longitude, df.latitude), crs="EPSG:4326")
gdf

In [None]:
gdf = create_aoi_for_plants(gdf)
gdf

In [None]:
gdf.geometry.explore()

In [None]:
# image_metadata_df = get_image_metadata_for_plants(
#     gdf,
#     start_date=gdf.ts.min(),
#     end_date=gdf.ts.max(),
#     max_cloud_cover_prct=50,
# )
image_metadata_df = pd.read_csv("/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/image_metadata.csv")
image_metadata_df.ts = pd.to_datetime(image_metadata_df.ts)
image_metadata_df

In [None]:
# filter the image metadata to match the day of each row of gdf
image_metadata_df["date"] = image_metadata_df.ts.dt.date
gdf["date"] = gdf.ts.dt.date
image_metadata_df = image_metadata_df.merge(
    gdf[["facility_id", "date"]], on=["facility_id", "date"]
)
image_metadata_df

In [None]:
image_metadata_df.to_csv("/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/image_metadata.csv", index=False)

In [None]:
gdf.merge(
    image_metadata_df.drop(columns=["ts"]),
    on=["facility_id", "date"]
).to_csv("/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/all_urls_dataset.csv", index=False)

In [None]:
gdf = gdf.merge(
    image_metadata_df[["facility_id", "date", "cloud_cover", "visual"]],
    on=["facility_id", "date"]
)
gdf.rename(columns={"visual": "cog_url"}, inplace=True)
gdf.drop(columns=["date"], inplace=True)
gdf

In [None]:
gdf.sort_values(by=["facility_id", "ts"], inplace=True)
gdf.to_csv("/Users/adminuser/GitHub/ccai-ss23-ai-monitoring-tutorial/data/google/final_dataset.csv", index=False)

In [None]:
row = gdf.iloc[0]
image = get_image_from_cog(
    cog_url=row.cog_url,
    geometry=row.geometry,
    size=64,
)
image.shape

In [None]:
view_satellite_image(image)