# Build (Extract, Transform, and Load)
Builds the `etl` table from the `ee`, `gegd`, and `mgp` tables.

### Import libraries

In [1]:
# Basic stack
from datetime import datetime

# Web Stack
import json
import requests

# Database stack
import sqlite3

# Data Science stack
import shapely.wkt
import pandas as pd
import geopandas as gpd
import folium

### User defined variables

In [2]:
db = "C:/gis/gaia/data/databases/gaia.db"

### Drop table
This is for demonstration purposes

In [3]:
conn = sqlite3.connect(db)
conn.enable_load_extension(True)
conn.execute("SELECT load_extension('mod_spatialite')")

c = conn.cursor()
c.execute('''DROP TABLE IF EXISTS etl''')
conn.commit()
conn.close()

### Create `etl` table from selection of `ee`, `gegd`, and `mgp`
Might want to include the following other fields: cloud cover, satellite, sensor, number of bands, among others.

In [4]:
conn = sqlite3.connect(db)
conn.enable_load_extension(True)
conn.execute("SELECT load_extension('mod_spatialite')")

c = conn.cursor()

c.execute('''
    CREATE TABLE etl AS    
        SELECT 'EE' AS table_name,
               e.aoi_id AS aoi_id,
               e.catalog_id AS id,
               e.vendor_id AS vendor_id,
               e.entity_id AS entity_id,
               e.vendor as vendor,
               e.satellite AS platform,
               e.pixel_size_x AS pixel_size_x,
               e.pixel_size_y AS pixel_size_y,
               e.acquisition_date AS date,
               Date(e.publish_date) AS publish_date,
               AsText(e.bounds) as geometry
           FROM ee e
        
        UNION
        
        SELECT 'GEGD' AS table_name,
               g.aoi_id AS aoi_id,
               g.legacy_id AS id,
               NULL AS vendor_id,
               NULL AS entity_id,
               g.company_name as vendor,
               g.source AS platform,
               g.per_pixel_x AS pixel_size_x,
               g.per_pixel_y AS pixel_size_y,
               Date(g.acquisition_date) AS date,
               NULL AS publish_date,
               AsText(g.geometry) AS geometry
           FROM gegd g
        LEFT JOIN ee e ON g.legacy_id = e.catalog_id
            WHERE
                e.catalog_id IS NULL
        
        UNION

        SELECT 'MGP' AS table_name,
               m.aoi_id AS aoi_id,
               m.id AS id,
               NULL AS vendor_id,
               NULL AS entity_id,
               "Maxar" AS vendor,
               m.platform AS platform,
               m.gsd AS pixel_size_x,
               m.gsd AS pixel_size_y,
               Date(datetime) AS date,
               NULL AS publish_date,
               AsText(m.bbox) AS geometry
           FROM mgp m
        LEFT JOIN ee e ON m.id = e.catalog_id
        LEFT JOIN gegd g ON m.id = g.legacy_id
            WHERE
                e.catalog_id IS NULL AND g.legacy_id IS NULL;
''')

c.execute('''ALTER TABLE etl ADD COLUMN sea_state_qual VARVHAR(15)''')
c.execute('''ALTER TABLE etl ADD COLUMN sea_state_quant NUMERIC(2, 2)''')
c.execute('''ALTER TABLE etl ADD COLUMN shareable VARVHAR(3)''')

conn.commit()
conn.close()

### Select new table and show it

In [5]:
db = "C:/gis/gaia/data/databases/gaia.db"
conn = sqlite3.connect(db)
conn.enable_load_extension(True)
conn.execute("SELECT load_extension('mod_spatialite')")

df = pd.read_sql_query(f"SELECT * FROM etl", conn)
df['geometry'] = shapely.wkt.loads(df['geometry'])
gdf = gpd.GeoDataFrame(df, geometry='geometry')

conn.commit()
conn.close()

print("Your database shape is: {}\n".format(df.shape))
df_ee = df[df['table_name'] == 'EE']
print("Your database has {} records from EarthExplorer!".format(len(df_ee)))
print("\tYour EarthExplorer records have {} unique Catalog IDs!".format(len(df_ee['id'].unique())))
print("\tYour EarthExplorer records have {} unique Entity IDs!".format(len(df_ee['entity_id'].unique())))
print("Your database has {} records from GEGD!".format(len(df[df['table_name'] == 'GEGD'])))
print("Your database has {} records from MGP!\n".format(len(df[df['table_name'] == 'MGP'])))

df.head()

Your database shape is: (4160, 15)

Your database has 4104 records from EarthExplorer!
	Your EarthExplorer records have 303 unique Catalog IDs!
	Your EarthExplorer records have 4104 unique Entity IDs!
Your database has 3 records from GEGD!
Your database has 53 records from MGP!



Unnamed: 0,table_name,aoi_id,id,vendor_id,entity_id,vendor,platform,pixel_size_x,pixel_size_y,date,publish_date,geometry,sea_state_qual,sea_state_quant,shareable
0,EE,1,1030010031782D00,14MAY21213601-M1BS-500123972010_01_P001,WV220140521213601M00,DIGITAL GLOBE,WORLDVIEW-2,2.0,2.0,2014-05-21,,"POLYGON ((-151.360419 60.335807, -151.360419 6...",,,
1,EE,1,1030010031782D00,14MAY21213601-P1BS-500123972010_01_P001,WV220140521213601P00,DIGITAL GLOBE,WORLDVIEW-2,0.5,0.5,2014-05-21,,"POLYGON ((-151.360034 60.335807, -151.360034 6...",,,
2,EE,1,10300100335E4200,14JUN03215705-M1BS-500127230010_01_P001,WV220140603215705M00,DIGITAL GLOBE,WORLDVIEW-2,2.0,2.0,2014-06-03,,"POLYGON ((-151.393333 60.318514, -151.393333 6...",,,
3,EE,1,10300100335E4200,14JUN03215705-P1BS-500127230010_01_P001,WV220140603215705P00,DIGITAL GLOBE,WORLDVIEW-2,0.5,0.5,2014-06-03,,"POLYGON ((-151.392926 60.318529, -151.392926 6...",,,
4,EE,1,1030010082605B00,18AUG24220947-M1BS-506798099050_01_P006,WV220180824220947M00,MAXAR TECHNOLOGIES,WORLDVIEW-2,2.0,2.0,2018-08-24,,"POLYGON ((-151.789722 60.659444, -151.789722 6...",,,


### Plot Images on an Interactive Map

In [6]:
def style_function(hex_value):
    return {'color': hex_value, 'fillOpacity': 0}

# Add OpenStreetMap as a basemap
map = folium.Map()
folium.TileLayer('openstreetmap').add_to(map)

# Create a GeoJson layer from the response_geojson and add it to the map
folium.GeoJson(
    gdf[gdf['table_name'] == 'EE']['geometry'].to_json(),
    style_function = lambda x: style_function('#0000FF')
).add_to(map)

folium.GeoJson(
    gdf[gdf['table_name'] == 'GEGD']['geometry'].to_json(),
    style_function = lambda x: style_function('#037c6e')
).add_to(map)

folium.GeoJson(
    gdf[gdf['table_name'] == 'MGP']['geometry'].to_json(),
    style_function = lambda x: style_function('#DAA520')
).add_to(map)

# Zoom to collected images
map.fit_bounds(map.get_bounds(), padding=(100, 100))

# Display the map
map

### Add additional fields to `elt` for processing steps
- Assume there is a linear, non-experimental, production chain for this work.
- Unsure about the exact production chain to be used. Referenced Schaeffer et al (2022) and Lebrasse et al (2022).
- See PGC Tools: https://github.com/PolarGeospatialCenter/imagery_utils
     - Correct for terrain and radiometry
     - Pansharpen (only GDAL Brovey at this time)
     - Generate indecies


**Example Extract, Transform, Load, and Show Workflow**
``` mermaid
flowchart LR
    subgraph id2[Microsoft Azure]
        direction TB
        id4["💻 Virtual Machine"]-- Update ---> id5[(GAIA Database)]
        id4["💻 Virtual Machine"]-- Upload --> id6[(Blob Store Container)]
    end
    
    id1[(USGS EarthExplorer)]-- Query via Python ---> id2[Microsoft Azure] --> id3[WHALE Web App]
```

Azure Blob Storage
- admin
- data
     - databases
     - imagery
          - raw
               - ee
                    -  WV320240301151629P00
          - ortho_calib
               - 21APR24154042-P1BS-506967344060_01_P003_u16rd32619.tif
     - rasters
     - shapefiles
     - kmls
- documents
     - pdf
     - pptx
- transfer

In [7]:
# conn = sqlite3.connect(db)
# conn.enable_load_extension(True)
# conn.execute("SELECT load_extension('mod_spatialite')")

# c = conn.cursor()
# c.execute('''ALTER TABLE etl DROP COLUMN downloaded''')
# conn.commit()
# conn.close()

In [8]:
conn = sqlite3.connect(db)
conn.enable_load_extension(True)
conn.execute("SELECT load_extension('mod_spatialite')")

c = conn.cursor()

### Add Downloaded Column
c.execute('''
    ALTER TABLE etl
        ADD COLUMN downloaded BOOLEAN DEFAULT 0 NOT NULL
            CHECK (downloaded IN (0, 1))
''')
conn.commit()


### Add Raw Storage Location Column
c.execute('''
    ALTER TABLE etl
        ADD COLUMN raw_storage_location VARCHAR(50) DEFAULT NULL
''')
conn.commit()


### Add Processed Column
c.execute('''
    ALTER TABLE etl
        ADD COLUMN processed BOOLEAN DEFAULT 0 NOT NULL CHECK (processed IN (0, 1))
''')
conn.commit()


### Add Processed Command Column
c.execute('''
    ALTER TABLE etl
        ADD COLUMN processing_command VARCHAR(500) DEFAULT NULL
''')
conn.commit()


### Add Processed Storage Location Command Column
c.execute('''
    ALTER TABLE etl
        ADD COLUMN processed_storage_location VARCHAR(50) DEFAULT NULL
''')
conn.commit()

conn.close()

### Select updated table and show it

In [9]:
db = "C:/gis/gaia/data/databases/gaia.db"
conn = sqlite3.connect(db)
conn.enable_load_extension(True)
conn.execute("SELECT load_extension('mod_spatialite')")

df = pd.read_sql_query(f"SELECT * FROM etl", conn)
df['geometry'] = shapely.wkt.loads(df['geometry'])
gdf = gpd.GeoDataFrame(df, geometry='geometry')

conn.commit()
conn.close()

df.head()

Unnamed: 0,table_name,aoi_id,id,vendor_id,entity_id,vendor,platform,pixel_size_x,pixel_size_y,date,publish_date,geometry,sea_state_qual,sea_state_quant,shareable,downloaded,raw_storage_location,processed,processing_command,processed_storage_location
0,EE,1,1030010031782D00,14MAY21213601-M1BS-500123972010_01_P001,WV220140521213601M00,DIGITAL GLOBE,WORLDVIEW-2,2.0,2.0,2014-05-21,,"POLYGON ((-151.360419 60.335807, -151.360419 6...",,,,0,,0,,
1,EE,1,1030010031782D00,14MAY21213601-P1BS-500123972010_01_P001,WV220140521213601P00,DIGITAL GLOBE,WORLDVIEW-2,0.5,0.5,2014-05-21,,"POLYGON ((-151.360034 60.335807, -151.360034 6...",,,,0,,0,,
2,EE,1,10300100335E4200,14JUN03215705-M1BS-500127230010_01_P001,WV220140603215705M00,DIGITAL GLOBE,WORLDVIEW-2,2.0,2.0,2014-06-03,,"POLYGON ((-151.393333 60.318514, -151.393333 6...",,,,0,,0,,
3,EE,1,10300100335E4200,14JUN03215705-P1BS-500127230010_01_P001,WV220140603215705P00,DIGITAL GLOBE,WORLDVIEW-2,0.5,0.5,2014-06-03,,"POLYGON ((-151.392926 60.318529, -151.392926 6...",,,,0,,0,,
4,EE,1,1030010082605B00,18AUG24220947-M1BS-506798099050_01_P006,WV220180824220947M00,MAXAR TECHNOLOGIES,WORLDVIEW-2,2.0,2.0,2018-08-24,,"POLYGON ((-151.789722 60.659444, -151.789722 6...",,,,0,,0,,


# End