Skip to content

Commit

Permalink
more documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
lillythomas committed May 21, 2024
1 parent d0ec7d9 commit eab6b1f
Showing 1 changed file with 76 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,53 @@
"from src.model_clay_v1 import ClayMAEModule\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f950c34e-2c8a-4d02-9d72-c161321c826e",
"metadata": {},
"outputs": [],
"source": [
"# STAC API endpoint and platform details\n",
"STAC_API = \"https://planetarycomputer.microsoft.com/api/stac/v1\"\n",
"PLATFORM_NAME = \"naip\"\n",
"\n",
"# Directory to save the downloaded data\n",
"save_dir = \"./data_naip_test/ca/2020/sf/\"\n",
"os.makedirs(save_dir, exist_ok=True)\n",
"\n",
"YEAR = 2020\n",
"\n",
"# STAC API search query\n",
"search_query = {\n",
" \"collections\": [PLATFORM_NAME],\n",
" \"bbox\": [-122.6, 37.6, -122.35, 37.85], # Part of San Francisco, CA\n",
" \"datetime\": f\"{YEAR}-01-01T00:00:00Z/{YEAR+1}-01-01T00:00:00Z\",\n",
"}\n",
"\n",
"client = Client.open(STAC_API)\n",
"items = client.search(**search_query)\n",
"\n",
"stackstac_datasets = []\n",
"granule_names = []\n",
"\n",
"# Iterate over the granule names and fetch the corresponding StackSTAC data arrays\n",
"for item in items.get_all_items():\n",
" assets = item.assets\n",
" dataset = rioxarray.open_rasterio(item.assets[\"image\"].href).sel(band=[1, 2, 3, 4])\n",
" granule_name = item.assets[\"image\"].href.split('/')[-1]\n",
" stackstac_datasets.append(dataset)\n",
" granule_names.append(granule_name)"
]
},
{
"cell_type": "markdown",
"id": "589611d4",
"metadata": {},
"source": [
"Below are some functions we will rely on to prepare the data cubes, generate embeddings and plot subsets of the tiled images for visualization purposes."
]
},
{
"cell_type": "code",
"execution_count": 3,
Expand Down Expand Up @@ -185,16 +232,8 @@
"\n",
" # The first embedding is the class token, which is the\n",
" # overall single embedding. \n",
" return unmsk_patch[:, 0, :].cpu().numpy()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ad703bb7",
"metadata": {},
"outputs": [],
"source": [
" return unmsk_patch[:, 0, :].cpu().numpy()\n",
"\n",
"def tile_dataset(dataset, granule_name):\n",
" \"\"\"\n",
" Tile dataset into 256x256 image chips and drop any excess border regions.\n",
Expand Down Expand Up @@ -254,46 +293,7 @@
" tiles.append(tile)\n",
" tile_names.append(tile_name)\n",
" \n",
" return tiles, tile_names"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f950c34e-2c8a-4d02-9d72-c161321c826e",
"metadata": {},
"outputs": [],
"source": [
"# STAC API endpoint and platform details\n",
"STAC_API = \"https://planetarycomputer.microsoft.com/api/stac/v1\"\n",
"PLATFORM_NAME = \"naip\"\n",
"\n",
"# Directory to save the downloaded data\n",
"save_dir = \"./data_naip_test/ca/2020/sf/\"\n",
"os.makedirs(save_dir, exist_ok=True)\n",
"\n",
"YEAR = 2020\n",
"\n",
"# STAC API search query\n",
"search_query = {\n",
" \"collections\": [PLATFORM_NAME],\n",
" \"bbox\": [-122.6, 37.6, -122.35, 37.85], # Part of San Francisco, CA\n",
" \"datetime\": f\"{YEAR}-01-01T00:00:00Z/{YEAR+1}-01-01T00:00:00Z\",\n",
"}\n",
"\n",
"client = Client.open(STAC_API)\n",
"items = client.search(**search_query)\n",
"\n",
"stackstac_datasets = []\n",
"granule_names = []\n",
"\n",
"# Iterate over the granule names and fetch the corresponding StackSTAC data arrays\n",
"for item in items.get_all_items():\n",
" assets = item.assets\n",
" dataset = rioxarray.open_rasterio(item.assets[\"image\"].href).sel(band=[1, 2, 3, 4])\n",
" granule_name = item.assets[\"image\"].href.split('/')[-1]\n",
" stackstac_datasets.append(dataset)\n",
" granule_names.append(granule_name)"
" return tiles, tile_names\n"
]
},
{
Expand All @@ -315,6 +315,7 @@
" tiles, tile_names = tile_dataset(dataset, granule_name)\n",
" tiles_.append(tiles)\n",
" tile_names_.append(tile_names)\n",
" # Flatten sublists\n",
" tiles__ = [tile for tile in tiles for tile_ in tiles_]\n",
" tile_names__ = [tile for tile in tile_names for tile_ in tile_names_]\n",
"else:\n",
Expand Down Expand Up @@ -360,6 +361,14 @@
"os.makedirs(outdir_embeddings, exist_ok=True)"
]
},
{
"cell_type": "markdown",
"id": "d8f01975",
"metadata": {},
"source": [
"### Load the trained Clay v1 model"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -376,6 +385,14 @@
")"
]
},
{
"cell_type": "markdown",
"id": "e5ae07f7",
"metadata": {},
"source": [
"### Generate embeddings"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -472,6 +489,15 @@
"len(embeddings)"
]
},
{
"cell_type": "markdown",
"id": "a9cf665b",
"metadata": {},
"source": [
"### Run a similarity search to identify similar embeddings\n",
"We will select a random index to search with and plot the corresponding RGB images from the search results. "
]
},
{
"cell_type": "code",
"execution_count": 126,
Expand Down

0 comments on commit eab6b1f

Please sign in to comment.