# Reviewing Data in the Spatialite Database

### Import libraries

In [None]:
# Data Science stack
import sqlite3
import shapely
import pandas as pd
import geopandas as gpd
import folium

# Custom stack
import sys; sys.path.append("../../")
from dba import utilities

### User defined variables

In [2]:
db = "../../db.sqlite3"
aoi_id = 6

### Show distinct Area of Interest Identifiers, count Entity Identifiers

In [3]:
conn = sqlite3.connect(db)
conn.enable_load_extension(True)
conn.execute("SELECT load_extension('mod_spatialite')")

sql_string = '''SELECT DISTINCT aoi_id_id, COUNT(entity_id)
                    FROM whale_earthexplorer
                    GROUP BY aoi_id_id
             '''
df = pd.read_sql_query(sql_string, conn)

conn.commit()
conn.close()

df.head()

Unnamed: 0,aoi_id_id,COUNT(entity_id)
0,1,12
1,6,1417
2,24,5


### Show some Catalog Identifiers from Cape Cod Bay

In [4]:
conn = sqlite3.connect(db)
conn.enable_load_extension(True)
conn.execute("SELECT load_extension('mod_spatialite')")

sql_string = ''' SELECT DISTINCT catalog_id FROM whale_earthexplorer WHERE aoi_id_id = 6'''
df = pd.read_sql_query(sql_string, conn)

conn.commit()
conn.close()

df.head()

Unnamed: 0,catalog_id
0,10400100959B5400
1,1040010093625500
2,1040010093A5AA00
3,10400100918AEE00
4,1040010091057E00


### Output the above Cape Cod Bay Catalog Identifiers to a CSV

In [5]:
df.to_csv('../outputs/wv_ccb.csv')

### Illustrate that Catalog Identifiers are non-unique

In [6]:
conn = sqlite3.connect(db)
conn.enable_load_extension(True)
conn.execute("SELECT load_extension('mod_spatialite')")

sql_string = ''' SELECT * FROM whale_earthexplorer WHERE catalog_id = '10400100674B2100' '''
df = pd.read_sql_query(sql_string, conn)

conn.commit()
conn.close()

df.head()

Unnamed: 0,entity_id,catalog_id,acquisition_date,vendor,vendor_id,cloud_cover,satellite,sensor,number_of_bands,map_projection,...,event,event_date,date_entered,center_latitude_dec,center_longitude_dec,thumbnail,publish_date,aoi_id_id,sun_elevation,bounds
0,WV320210424154040M00,10400100674B2100,2021-04-24,DIGITAL GLOBE,21APR24154040-M2AS-505250699010_02_P001,0,WORLDVIEW-3,MSI,8,UTM,...,UCDAM,,2021-04-26,42.074845,-70.140402,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2021-04-26 17:27:07.395274,6,58.0,b'\x00\x01\xe6\x10\x00\x00\xc0\x06D\x88+\x8fQ\...
1,WV320210424154040M01,10400100674B2100,2021-04-24,MAXAR TECHNOLOGIES,21APR24154040-M1BS-506967344060_01_P001,0,WORLDVIEW-3,MSI,8,GCP,...,UCDAM,,2022-11-23,42.081465,-70.140307,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2022-11-23 16:57:10.688847,6,58.0,b'\x00\x01\xe6\x10\x00\x00a\xc1\xfd\x80\x07\x8...
2,WV320210424154040P00,10400100674B2100,2021-04-24,DIGITAL GLOBE,21APR24154040-P2AS-505250699010_02_P001,0,WORLDVIEW-3,PAN,1,UTM,...,UCDAM,,2021-04-26,42.074845,-70.140402,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2021-04-26 17:27:04.325017,6,58.0,b'\x00\x01\xe6\x10\x00\x00\xc0\x06D\x88+\x8fQ\...
3,WV320210424154040P01,10400100674B2100,2021-04-24,MAXAR TECHNOLOGIES,21APR24154040-P1BS-506967344060_01_P001,0,WORLDVIEW-3,PAN,1,GCP,...,UCDAM,,2022-11-23,42.081465,-70.140302,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2022-11-23 16:58:59.569377,6,58.0,"b'\x00\x01\xe6\x10\x00\x00\xc9""M\xbc\x03\x8fQ\..."
4,WV320210424154041M00,10400100674B2100,2021-04-24,DIGITAL GLOBE,21APR24154041-M2AS-505250699010_02_P002,0,WORLDVIEW-3,MSI,8,UTM,...,UCDAM,,2021-04-26,42.009855,-70.140448,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2021-04-26 17:27:07.876252,6,58.0,b'\x00\x01\xe6\x10\x00\x00V\x9e@\xd8)\x8fQ\xc0...


### Export the non-unique Catalog Identifiers to a CSV

In [7]:
df.to_csv('../outputs/wv3_10400100674B2100_ids.csv')

### Display the DataFrame

In [8]:
df

Unnamed: 0,entity_id,catalog_id,acquisition_date,vendor,vendor_id,cloud_cover,satellite,sensor,number_of_bands,map_projection,...,event,event_date,date_entered,center_latitude_dec,center_longitude_dec,thumbnail,publish_date,aoi_id_id,sun_elevation,bounds
0,WV320210424154040M00,10400100674B2100,2021-04-24,DIGITAL GLOBE,21APR24154040-M2AS-505250699010_02_P001,0,WORLDVIEW-3,MSI,8,UTM,...,UCDAM,,2021-04-26,42.074845,-70.140402,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2021-04-26 17:27:07.395274,6,58.0,b'\x00\x01\xe6\x10\x00\x00\xc0\x06D\x88+\x8fQ\...
1,WV320210424154040M01,10400100674B2100,2021-04-24,MAXAR TECHNOLOGIES,21APR24154040-M1BS-506967344060_01_P001,0,WORLDVIEW-3,MSI,8,GCP,...,UCDAM,,2022-11-23,42.081465,-70.140307,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2022-11-23 16:57:10.688847,6,58.0,b'\x00\x01\xe6\x10\x00\x00a\xc1\xfd\x80\x07\x8...
2,WV320210424154040P00,10400100674B2100,2021-04-24,DIGITAL GLOBE,21APR24154040-P2AS-505250699010_02_P001,0,WORLDVIEW-3,PAN,1,UTM,...,UCDAM,,2021-04-26,42.074845,-70.140402,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2021-04-26 17:27:04.325017,6,58.0,b'\x00\x01\xe6\x10\x00\x00\xc0\x06D\x88+\x8fQ\...
3,WV320210424154040P01,10400100674B2100,2021-04-24,MAXAR TECHNOLOGIES,21APR24154040-P1BS-506967344060_01_P001,0,WORLDVIEW-3,PAN,1,GCP,...,UCDAM,,2022-11-23,42.081465,-70.140302,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2022-11-23 16:58:59.569377,6,58.0,"b'\x00\x01\xe6\x10\x00\x00\xc9""M\xbc\x03\x8fQ\..."
4,WV320210424154041M00,10400100674B2100,2021-04-24,DIGITAL GLOBE,21APR24154041-M2AS-505250699010_02_P002,0,WORLDVIEW-3,MSI,8,UTM,...,UCDAM,,2021-04-26,42.009855,-70.140448,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2021-04-26 17:27:07.876252,6,58.0,b'\x00\x01\xe6\x10\x00\x00V\x9e@\xd8)\x8fQ\xc0...
5,WV320210424154041M01,10400100674B2100,2021-04-24,MAXAR TECHNOLOGIES,21APR24154041-M1BS-506967344060_01_P002,0,WORLDVIEW-3,MSI,8,GCP,...,UCDAM,,2022-11-23,42.021665,-70.140328,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2022-11-23 16:57:37.064169,6,58.0,b'\x00\x01\xe6\x10\x00\x00J\xb3y\x1c\x06\x8fQ\...
6,WV320210424154041P00,10400100674B2100,2021-04-24,DIGITAL GLOBE,21APR24154041-P2AS-505250699010_02_P002,0,WORLDVIEW-3,PAN,1,UTM,...,UCDAM,,2021-04-26,42.009855,-70.140448,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2021-04-26 17:27:22.192448,6,58.0,b'\x00\x01\xe6\x10\x00\x00V\x9e@\xd8)\x8fQ\xc0...
7,WV320210424154041P01,10400100674B2100,2021-04-24,MAXAR TECHNOLOGIES,21APR24154041-P1BS-506967344060_01_P002,0,WORLDVIEW-3,PAN,1,GCP,...,UCDAM,,2022-11-23,42.021666,-70.140322,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2022-11-23 16:59:03.629726,6,58.0,b'\x00\x01\xe6\x10\x00\x00\xb1\x14\xc9W\x02\x8...
8,WV320210424154042M00,10400100674B2100,2021-04-24,DIGITAL GLOBE,21APR24154042-M2AS-505250699010_02_P003,0,WORLDVIEW-3,MSI,8,UTM,...,UCDAM,,2021-04-26,41.944891,-70.140483,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2021-04-26 17:27:20.767136,6,58.0,"b""\x00\x01\xe6\x10\x00\x00c\x9a\xe9^'\x8fQ\xc0..."
9,WV320210424154042M01,10400100674B2100,2021-04-24,MAXAR TECHNOLOGIES,21APR24154042-M1BS-506967344060_01_P003,0,WORLDVIEW-3,MSI,8,GCP,...,UCDAM,,2022-11-23,41.956643,-70.140379,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2022-11-23 16:57:08.369477,6,58.0,b'\x00\x01\xe6\x10\x00\x00\xce\xdf\x84B\x04\x8...


### Select all records associated with a user defined AOI ID, show the table
Use `ee` as an example

In [9]:
conn = sqlite3.connect(db)
conn.enable_load_extension(True)
conn.execute("SELECT load_extension('mod_spatialite')")

sql_string = '''SELECT * FROM whale_earthexplorer WHERE aoi_id_id = {}'''.format(aoi_id)

df = pd.read_sql_query(sql_string, conn)

conn.commit()
conn.close()

print(df.shape)
df.head()

(1417, 28)


Unnamed: 0,entity_id,catalog_id,acquisition_date,vendor,vendor_id,cloud_cover,satellite,sensor,number_of_bands,map_projection,...,event,event_date,date_entered,center_latitude_dec,center_longitude_dec,thumbnail,publish_date,aoi_id_id,sun_elevation,bounds
0,WV320240510151159M00,10400100959B5400,2024-05-10,MAXAR TECHNOLOGIES,24MAY10151159-M1BS-508496072030_01_P001,76,WORLDVIEW-3,MSI,8,GCP,...,UCDAM,,2024-05-21,41.867831,-70.227235,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2024-05-21 13:51:35.011890,6,60.0,b'\x00\x01\xe6\x10\x00\x00\x92\x96\xca\xdb\x11...
1,WV320240510151159P00,10400100959B5400,2024-05-10,MAXAR TECHNOLOGIES,24MAY10151159-P1BS-508496072030_01_P001,76,WORLDVIEW-3,PAN,1,GCP,...,UCDAM,,2024-05-21,41.867839,-70.227219,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2024-05-21 13:51:36.918914,6,60.0,b'\x00\x01\xe6\x10\x00\x00_\x0bzo\x0c\x97Q\xc0...
2,WV320240510151200M00,10400100959B5400,2024-05-10,MAXAR TECHNOLOGIES,24MAY10151200-M1BS-508496072030_01_P002,50,WORLDVIEW-3,MSI,8,GCP,...,UCDAM,,2024-05-21,41.932398,-70.227268,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2024-05-21 13:52:06.426599,6,60.0,b'\x00\x01\xe6\x10\x00\x00\xb2\x9c\x84\xd2\x17...
3,WV320240510151200P00,10400100959B5400,2024-05-10,MAXAR TECHNOLOGIES,24MAY10151200-P1BS-508496072030_01_P002,50,WORLDVIEW-3,PAN,1,GCP,...,UCDAM,,2024-05-21,41.932406,-70.227252,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2024-05-21 13:51:38.676543,6,60.0,b'\x00\x01\xe6\x10\x00\x00\xaf\x95\xd0]\x12\x9...
4,WV320240510151201M00,10400100959B5400,2024-05-10,MAXAR TECHNOLOGIES,24MAY10151201-M1BS-508496072030_01_P003,23,WORLDVIEW-3,MSI,8,GCP,...,UCDAM,,2024-05-21,41.997315,-70.227204,https://ims.cr.usgs.gov/thumbnail/CRSSP/WV/202...,2024-05-21 13:51:37.779161,6,59.0,b'\x00\x01\xe6\x10\x00\x00\x98\x8a\x8dy\x1d\x9...


### Inner Join
Show images that are in both `ee` and `mgp`. Note that the forgein key for `ee` is catalog_id whereas this value corresponds to the primary key id in `mgp`. There is M:1 relationship between these records.

In [10]:
conn = sqlite3.connect(db)
conn.enable_load_extension(True)
conn.execute("SELECT load_extension('mod_spatialite')")

sql_string = '''SELECT a.id, b.catalog_id, AsText(a.bbox)
                    FROM whale_maxargeospatialplatform AS a
                    INNER JOIN whale_earthexplorer AS b
                    ON b.catalog_id = a.id
                    WHERE (a.aoi_id_id = {})
             '''.format(aoi_id)

df = pd.read_sql_query(sql_string, conn)
df = df.rename(columns={'AsText(a.bbox)': 'geometry'}, errors='raise')
df['geometry'] = shapely.wkt.loads(df['geometry'])
gdf = gpd.GeoDataFrame(df, geometry='geometry')

conn.commit()
conn.close()

print(gdf.shape)
gdf.head()

(1358, 3)


Unnamed: 0,id,catalog_id,geometry
0,10400100959B5400,10400100959B5400,"POLYGON ((-70.09283 41.80274, -70.09283 42.154..."
1,10400100959B5400,10400100959B5400,"POLYGON ((-70.09283 41.80274, -70.09283 42.154..."
2,10400100959B5400,10400100959B5400,"POLYGON ((-70.09283 41.80274, -70.09283 42.154..."
3,10400100959B5400,10400100959B5400,"POLYGON ((-70.09283 41.80274, -70.09283 42.154..."
4,10400100959B5400,10400100959B5400,"POLYGON ((-70.09283 41.80274, -70.09283 42.154..."


In [11]:
len(set(gdf['catalog_id']))

115

### Left Outer Join 1
Select images that are present in `mgp`, but not `ee`

In [12]:
conn = sqlite3.connect(db)
conn.enable_load_extension(True)
conn.execute("SELECT load_extension('mod_spatialite')")

sql_string = '''SELECT a.id, b.catalog_id, a.platform, AsText(a.bbox), a.datetime
                    FROM whale_maxargeospatialplatform AS a
                    LEFT OUTER JOIN whale_earthexplorer AS b
                    ON a.id = b.catalog_id
                    WHERE b.catalog_id IS NULL
                    AND (a.aoi_id_id = {})
             '''.format(aoi_id)

df = pd.read_sql_query(sql_string, conn)
df = df.rename(columns={'AsText(a.bbox)': 'geometry'}, errors='raise')
df['geometry'] = shapely.wkt.loads(df['geometry'])
gdf_ee = gpd.GeoDataFrame(df, geometry='geometry')

conn.commit()
conn.close()

print(gdf_ee.shape)
gdf_ee.head()

(56, 5)


Unnamed: 0,id,catalog_id,platform,geometry,datetime
0,1040010096672600,,worldview-03,"POLYGON ((-69.99209 41.46543, -69.99209 42.029...",2024-05-04 15:18:49.787012
1,104001008D3FC800,,worldview-03,"POLYGON ((-70.28385 41.79629, -70.28385 42.162...",2023-12-19 15:52:23.272514
2,104001008D95DA00,,worldview-03,"POLYGON ((-70.09448 41.80892, -70.09448 42.131...",2023-11-28 15:24:14.394268
3,104001008B104700,,worldview-03,"POLYGON ((-70.49245 41.77276, -70.49245 42.167...",2023-11-02 15:18:43.869752
4,104001008BA6B300,,worldview-03,"POLYGON ((-70.09289 41.80311, -70.09289 42.139...",2023-11-02 15:18:19.920204


### Left Outer Join 2
Select images that are present in `ee`, but not `mgp`

In [13]:
conn = sqlite3.connect(db)
conn.enable_load_extension(True)
conn.execute("SELECT load_extension('mod_spatialite')")

sql_string = '''SELECT a.catalog_id, b.id, a.satellite, AsText(a.bounds), a.publish_date
                    FROM whale_earthexplorer AS a
                    LEFT OUTER JOIN whale_maxargeospatialplatform AS b
                    ON a.catalog_id = b.id
                    WHERE b.id IS NULL
                    AND (a.aoi_id_id = {})
             '''.format(aoi_id)

df = pd.read_sql_query(sql_string, conn)
df = df.rename(columns={'AsText(a.bounds)': 'geometry'}, errors='raise')
df['geometry'] = shapely.wkt.loads(df['geometry'])
gdf_mgp = gpd.GeoDataFrame(df, geometry='geometry')

conn.commit()
conn.close()

print(gdf_mgp.shape)
gdf_mgp.head()

(59, 5)


Unnamed: 0,catalog_id,id,satellite,geometry,publish_date
0,10400100918AEE00,,WORLDVIEW-3,"POLYGON ((-70.47196 42.01803, -70.47196 42.157...",2024-03-14 21:57:20.417209
1,10400100918AEE00,,WORLDVIEW-3,"POLYGON ((-70.47169 42.01807, -70.47169 42.157...",2024-03-14 21:57:36.186486
2,10400100918AEE00,,WORLDVIEW-3,"POLYGON ((-70.4715 41.95334, -70.4715 42.09161...",2024-03-14 21:57:26.632846
3,10400100918AEE00,,WORLDVIEW-3,"POLYGON ((-70.47123 41.95338, -70.47123 42.091...",2024-03-14 21:57:05.046272
4,10400100918AEE00,,WORLDVIEW-3,"POLYGON ((-70.47113 41.88873, -70.47113 42.026...",2024-03-14 21:57:29.089187


### Plot differences

In [14]:
gdf_aoi = functions.get_aoi(db, aoi_id)

def style_function(hex_value):
    return {'color': hex_value, 'fillOpacity': 0}

# Add OpenStreetMap as a basemap
map = folium.Map()
folium.TileLayer('openstreetmap').add_to(map)

# Create a GeoJson layer from the response_geojson and add it to the map
#      Blue
folium.GeoJson(
    gdf_ee.to_json(),
    style_function = lambda x: style_function('#0000FF')
).add_to(map)

# Red
folium.GeoJson(
    gdf_mgp.to_json(),
    style_function = lambda x: style_function('#FF0000')
).add_to(map)

# Black
folium.GeoJson(
    gdf_aoi['geometry'].to_json(),
    style_function = lambda x: style_function('#000000')
).add_to(map)

# Zoom to collected images
map.fit_bounds(map.get_bounds(), padding=(100, 100))

# Display the map
map