In [None]:
# Example for downloading GLOBE Land Cover Images

In [10]:
# Import Libraries
import pandas as pd
import requests
import json
import datetime
import csv
import os
from pathlib import Path

In [84]:
# Open the input data
inputFile = "https://github.com/IGES-Geospatial/2025_SEES_OpenScience/raw/main/input_data/Land_Cover_Data_AllYears_AllFields.csv"
# If running from local copy of repository, you may comment out the GitHub Url above, and uncomment the line below.
#inputFile = Path("../input_data/Land_Cover_Data_AllYears_AllFields.csv")
df = pd.read_csv(inputFile, delimiter=',', dtype='str')

In [85]:
# Suggested naming convention: https://www.globe.gov/documents/10157/2592674/GLOBE+Data+User+Guide_v2_final.pdf#page=39

In [86]:
# Directory to save downloaded images
save_dir = '../input_data/downloaded_images'
os.makedirs(save_dir, exist_ok=True)

# Function to download, rename, and save photo thumbnails from a URL column
def download_thumbnails(row,direction):
    column = f"{direction}PhotoUrl"
    url = row[column]
    # Check if the URL is valid
    if pd.isna(url):
        print(f"Invalid URL (pd.na) for LandCoverId: {row['LandCoverId']}")
        return

    if 'original' not in url:
        print(f"Invalid URL (likely pending or rejected) for LandCoverId: {row['LandCoverId']}")
        return

    thumbnail_url = url.replace("original", "thumb")
    image_name = f"{row['protocol']}_{row['LandCoverId']}_{direction}_Thumb"
    file_extension = os.path.splitext(thumbnail_url)[1]
    save_path = os.path.join(save_dir, f"{image_name}{file_extension}")

    try:
        response = requests.get(thumbnail_url, stream=True)
        if response.status_code == 200:
            with open(save_path, 'wb') as f:
                for chunk in response.iter_content(1024):
                    f.write(chunk)
            print(f"Image downloaded: {save_path}")
        else:
            print(f"Failed to retrieve image from {thumbnail_url} (status code: {response.status_code})")
    except Exception as e:
        print(f"Error downloading {thumbnail_url}: {e}")



In [87]:
# For the sake of the demo, reduce the number of photos downloaded - let's just download thumbnails from SEES2025 observations
# n.b. if you really only wanted data and photos from the SEES2025 team, the API request could use the "GlobeTeams" endpoint
# e.g. https://api.globe.gov/search/v1/measurement/protocol/measureddate/globeteams/?protocols=land_covers&startdate=2018-01-01&enddate=2025-06-12&globeteam=SEES%202025&geojson=TRUE&sample=TRUEdf_filtered = df[df['GlobeTeams'].str.contains('SEES 2025', regex=False, na=False)].reset_index()
# Apply the download_image function to each row
df_filtered.apply(download_thumbnails, axis=1, args=('Downward',))

Image downloaded: ../input_data/downloaded_images/land_covers_73873_Downward_Thumb.jpg
Image downloaded: ../input_data/downloaded_images/land_covers_73870_Downward_Thumb.jpg
Image downloaded: ../input_data/downloaded_images/land_covers_73865_Downward_Thumb.jpg
Image downloaded: ../input_data/downloaded_images/land_covers_73880_Downward_Thumb.jpg
Image downloaded: ../input_data/downloaded_images/land_covers_73879_Downward_Thumb.jpg
Image downloaded: ../input_data/downloaded_images/land_covers_73864_Downward_Thumb.jpg
Image downloaded: ../input_data/downloaded_images/land_covers_73878_Downward_Thumb.jpg
Image downloaded: ../input_data/downloaded_images/land_covers_73871_Downward_Thumb.jpg
Image downloaded: ../input_data/downloaded_images/land_covers_73866_Downward_Thumb.jpg
Image downloaded: ../input_data/downloaded_images/land_covers_73794_Downward_Thumb.jpg
Image downloaded: ../input_data/downloaded_images/land_covers_73795_Downward_Thumb.jpg
Image downloaded: ../input_data/downloaded_

0       None
1       None
2       None
3       None
4       None
        ... 
1121    None
1122    None
1123    None
1124    None
1125    None
Length: 1126, dtype: object