<a href="https://colab.research.google.com/github/JohnRuskinONLINE/epic-classification/blob/master/ColocationCode.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Sentinel Satellite (2 and 3) Co-location and Unsupervised Machine Learning**

This section of code details the co-locating aspect of the code. It is split into annotated chunks to make the running of it less RAM intensive, and to allow the pinpointing of errors, if and when they may occur, easier.

In [None]:
###########################
#      _   ____     ___   #
#     | | |  _ \   / _ \  #
#  _  | | | |_) | | | | | #
# | |_| | |  _ <  | |_| | #
#  \___/  |_| \_\  \___/  #
#                         #
###########################

########################################################################################################
# Mount your Google Drive, it is important that you use one acccount consistently. the likewise details#
# for Earth Engine, Colab, etcetera. You will also require an account for the Copernicus Space         #
# Database, for this I reccomend that you use a novel password as I have done on line XX, to reduce any#
# possible security risks associated with sharing this code with colleagues, friends, etc.             #
########################################################################################################

from google.colab import drive
drive.mount('/content/drive')

#Import and install if neccesary (!pip install package) your packages

import ee
from datetime import datetime, timedelta
from shapely.geometry import Polygon, Point
import numpy as np
import subprocess
import requests
import pandas as pd
import os

########################################################################################################
# Here we connect Google Colab to Google Earth Engine, Earth Engine will be used to retrieve our       #
# satellite date. Make SURE that you enable the 'Earth Engine' API within the Google Earth Engine.     #
# Change 'week4datasdn' to a unique identifier of your choice. It does not particularly matter what,   #
# it is just a means for interfacing.                                                                  #
########################################################################################################

ee.Authenticate()
ee.Initialize(project='week4fetchdatasdn')
def get_matched_S2_image_ids(s3_image,boundary_geometry):
        s3_time = datetime.utcfromtimestamp(s3_image.get('system:time_start').getInfo() / 1000)

        # Define a time window for S2 search (±3 hours from S3 image time)
        start_time = s3_time - timedelta(hours=3)
        end_time = s3_time + timedelta(hours=3)

        # Query for S2 images within the time window and spatial extent of S3
        S2_collection = ee.ImageCollection('COPERNICUS/S2') \
        .filterDate(start_time, end_time) \
        .filterBounds(boundary_geometry)

        # Return the list of S2 image IDs
        return S2_collection.aggregate_array('system:index').getInfo()

########################################################################################################
# Below is a function to find matched Sentinel02 image IDs for each Sentinel03 image in the given date #
# and spacial extent defined by the previous function                                                  #
########################################################################################################

def find_matched_satellite_images(S3_date_range, S3_spatial_extent, boundary_geometry):

    # Define variables for Sentinel-3 query
    S3_product = 'COPERNICUS/S3/OLCI'

    # Query for Sentinel-3 data, again utilising Earth Engine
    S3_collection = ee.ImageCollection(S3_product) \
        .filterDate(S3_date_range[0], S3_date_range[1]) \
        .filterBounds(boundary_geometry)

    # Convert S3_collection to a list of image IDs
    S3_image_ids = S3_collection.aggregate_array('system:index').getInfo()

    # List to store matched pairs
    matched_pairs = []

    # Loop through each S3 image ID and find matching S2 images
    for s3_image_id in S3_image_ids:
        s3_image = ee.Image(S3_collection.filter(ee.Filter.eq('system:index', s3_image_id)).first())
        matched_S2_image_ids = get_matched_S2_image_ids(s3_image,boundary_geometry)

        # Record each pair of matched S3 and S2 images
        for s2_image_id in matched_S2_image_ids:
            matched_pairs.append((s3_image_id, s2_image_id))

    return matched_pairs

########################################################################################################
# Parses the Google Earth Engine filename to extract satellite name, sensing date, and start time.     #
# Parameters:                                                                                          #
# gee_filename (str): Filename obtained from Google Earth Engine.                                      #
#                                                                                                      #
# Returns:                                                                                             #
# tuple: Contains satellite name, sensing date, and start time.                                        #
########################################################################################################

def parse_gee_filename(gee_filename):
    parts = gee_filename.split('_')
    sensing_date = parts[0]
    tile_number = parts[2]
    return sensing_date, tile_number

def parse_gee_filename_s3(gee_filename):
    parts = gee_filename.split('_')
    satellite = parts[0] + '_OL_1_EFR'
    start_datetime = parts[1]
    end_datetime = parts[2]

    # Extract date from the start_datetime (assuming the format is like '20180601T014926')
    sensing_date = start_datetime[:8]
    start_time = start_datetime[9:]

    return satellite, sensing_date, start_time

########################################################################################################
# Retrieves access token from Copernicus Dataspace using the provided credentials.                     #
#                                                                                                      #
# Parameters:                                                                                          #
# username (str): Username for Copernicus Dataspace.                                                   #
# password (str): Password for Copernicus Dataspace.                                                   #
#                                                                                                      #
# Returns:                                                                                             #
# str: Access token for authenticated sessions.                                                        #
#                                                                                                      #
# These elements do not need to be changed YET. Here functions are simply being defined.               #
########################################################################################################

def get_access_token(username, password):
    url = 'https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token'
    data = {
        'grant_type': 'password',
        'username': username,
        'password': password,
        'client_id': 'cdse-public'
    }
    response = requests.post(url, data=data)
    response.raise_for_status()
    return response.json()['access_token']

########################################################################################################
# Queries the Sentinel-2 data from the Copernicus Data Space based on sensing start date, tile number, #
# and access token.                                                                                    #
#                                                                                                      #
# Parameters:                                                                                          #
# sensing_start_date(str): The start date and time for the data sensing in the format 'YYYYMMDDTHHMMSS'#
# tile_number (str): The specific tile number of the Sentinel-2 data to be queried.                    #
# token (str): The access token for authenticating requests to the Copernicus Data Space.              #
#                                                                                                      #
# Returns:                                                                                             #
# DataFrame: A DataFrame containing the query results with details about the Sentinel-2 data.          #
#                                                                                                      #
# The function constructs a query URL with specified parameters, sends a request to the Copernicus Data#
# Space, and returns the results as a DataFrame. It filters the data based on the tile number and the  #
#content start date within a certain time window.                                                      #
########################################################################################################

def query_sentinel2_data(sensing_start_date, tile_number, token):

    # Convert sensing_start_date to datetime object and format it for the query
    start_time = datetime.strptime(sensing_start_date, '%Y%m%dT%H%M%S')
    end_time = start_time + timedelta(hours=2)  # Adjust the time window as necessary
    start_time_str = start_time.strftime('%Y-%m-%dT%H:%M:%SZ')
    end_time_str = end_time.strftime('%Y-%m-%dT%H:%M:%SZ')

    # Construct the request URL with the contains function for tile number
    url = f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=contains(Name,'{tile_number}') and Collection/Name eq 'SENTINEL-2' and ContentDate/Start gt {start_time_str} and ContentDate/Start lt {end_time_str}"
    headers = {'Authorization': f'Bearer {token}'}

    # Make the API request
    response = requests.get(url, headers=headers)
    response.raise_for_status()

    return pd.DataFrame.from_dict(response.json()['value'])

########################################################################################################
# Extracts the correct product name and ID from a dataframe based on a specific start time and tile    #
# number.                                                                                              #
#                                                                                                      #
# Parameters:                                                                                          #
# df (DataFrame): The dataframe containing product information.                                        #
# start_time (str): The start time used to filter the products.                                        #
#  tile_number (str): The tile number used to filter the products.                                     #
#                                                                                                      #
# Returns:                                                                                             #
# tuple: A tuple containing the first matching product name and ID, or (None, None) if no match is     #
# found.                                                                                               #
########################################################################################################

def extract_correct_product_name(df, start_time, tile_number):

    # Adjusted regex pattern to match the filename format
    pattern = f'MSIL1C.*{start_time}.*_{tile_number}_'
    filtered_products = df[df['Name'].str.contains(pattern, regex=True)]

    # Return the first matching product name, or None if not found
    return filtered_products['Name'].iloc[0] if not filtered_products.empty else None, filtered_products['Id'].iloc[0] if not filtered_products.empty else None

########################################################################################################
# Processes a pair of Sentinel-2 images by querying the Copernicus Data Space to find the corresponding#
#product name and ID.                                                                                  #
#                                                                                                      #
# Parameters:                                                                                          #
# s2_ee_image_id (str): The Sentinel-2 Earth Engine image ID.                                          #
# token (str): The access token for authenticating requests to the Copernicus Data Space.              #
#                                                                                                      #
# Returns:                                                                                             #
# tuple: A tuple containing the product name and ID for the corresponding Sentinel-2 image.            #
########################################################################################################

def process_image_pair(s2_ee_image_id, token):

    sensing_start_date = s2_ee_image_id.split('_')[0]
    tile_number = s2_ee_image_id.split('_')[2]

    # Query the Copernicus Data Space
    df = query_sentinel2_data(sensing_start_date, tile_number, token)

    # Extract the correct MSIL1C product name
    return extract_correct_product_name(df, sensing_start_date, tile_number)

########################################################################################################
# Download a single product from the Copernicus Data Space.                                            #
#                                                                                                      #
# Parameters:                                                                                          #
# product_id: The unique identifier for the product.                                                   #
# file_name: The name of the file to be downloaded.                                                    #
# access_token: The access token for authorization.                                                    #
# download_dir: The directory where the product will be saved.                                         #
########################################################################################################

def download_single_product(product_id, file_name, access_token, download_dir="downloaded_products"):

    # Ensure the download directory exists
    os.makedirs(download_dir, exist_ok=True)

    # Construct the download URL
    url = f"https://zipper.dataspace.copernicus.eu/odata/v1/Products({product_id})/$value"

    # Set up the session and headers
    headers = {"Authorization": f"Bearer {access_token}"}
    session = requests.Session()
    session.headers.update(headers)

    # Perform the request
    response = session.get(url, headers=headers, stream=True)

    # Check if the request was successful
    if response.status_code == 200:
        # Define the path for the output file
        output_file_path = os.path.join(download_dir, file_name + ".zip")

        # Stream the content to a file
        with open(output_file_path, "wb") as file:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    file.write(chunk)
        print(f"Downloaded: {output_file_path}")
    else:
        print(f"Failed to download product {product_id}. Status Code: {response.status_code}")

########################################################################################################
# Queries Sentinel-3 OLCI data from Copernicus Data Space based on satellite name, sensing date,       #
# and start time.                                                                                      #
#                                                                                                      #
# Parameters:                                                                                          #
# satellite (str): Name of the satellite.                                                              #
# sensing_date (str): Date of the data sensing.                                                        #
# start_time (str): Start time of the data sensing.                                                    #
# token (str): Access token for authentication.                                                        #
#                                                                                                      #
# Returns:                                                                                             #
# DataFrame: A DataFrame containing the query results with details about the Sentinel-3 OLCI data.     #
########################################################################################################

def query_sentinel3_olci_data(satellite, sensing_date, start_time, token):

    # Convert sensing_date to datetime object and format it for the query
    sensing_datetime = datetime.strptime(f'{sensing_date}T{start_time}', '%Y%m%dT%H%M%S')
    sensing_datetime = sensing_datetime - timedelta(seconds=1)

    # Construct the request URL using the filter structure provided
    url = (
        f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products?"
        f"$filter=contains(Name,'{satellite}') and "
        f"ContentDate/Start ge {sensing_datetime.strftime('%Y-%m-%dT%H:%M:%S.000Z')} and "
        f"ContentDate/Start le {(sensing_datetime + timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%S.000Z')}&"
        f"$orderby=ContentDate/Start&$top=1000"
    )
    headers = {'Authorization': f'Bearer {token}'}

    # Print the URL for debugging
    print(url)

    # Make the API request
    response = requests.get(url, headers=headers)
    # Check if the request was successful
    if response.status_code != 200:
        # Print error details and return an empty DataFrame if the request failed
        print(f"Error: Unable to fetch data. Status Code: {response.status_code}. Response: {response.text}")
        return pd.DataFrame()

    # Convert the JSON response to a DataFrame
    search_results_df = pd.DataFrame.from_dict(response.json()['value'])

    # Convert the 'ContentDate/Start' to datetime objects and sort the results
    search_results_df['SensingStart'] = pd.to_datetime(search_results_df['ContentDate'].apply(lambda x: x['Start']))
    search_results_df.sort_values(by='SensingStart', inplace=True)

    return search_results_df

########################################################################################################
# Fetches Sentinel-3 OLCI images based on a specified date range and area of interest.                 #
#                                                                                                      #
# Parameters:                                                                                          #
# date_range: List containing the start and end dates (e.g., ['2018-06-01', '2018-06-02'])             #
# spatial_extent: List containing the spatial extent [min_lon, min_lat, max_lon, max_lat]              #
# area_of_interest: ee.Geometry object defining the specific area for which to fetch images            #
#                                                                                                      #
# Returns:                                                                                             #
# List of dictionaries, each containing details about a fetched image, including its ID,               #
# date, and download URL.                                                                              #
########################################################################################################

def fetch_S3_images_by_area_and_date(date_range, spatial_extent, area_of_interest):

    # Initialize the Earth Engine module
    ee.Initialize()

    # Define variables for Sentinel-3 OLCI query
    S3_product = 'COPERNICUS/S3/OLCI'

    # Query for Sentinel-3 data within the specified date range and area of interest
    S3_collection = ee.ImageCollection(S3_product) \
        .filterDate(date_range[0], date_range[1]) \
        .filterBounds(area_of_interest)

    # Convert S3_collection to a list of image IDs
    S3_image_ids = S3_collection.aggregate_array('system:index').getInfo()
    S3_images_info = S3_collection.getInfo()['features']

    # Initialize an empty list to store details
    S3_image_details = []

    # Iterate through each image in the collection
    for img_info in S3_images_info:
        # Fetch image ID
        image_id = img_info['id']

        # Fetch image date and other properties as needed
        image_date = img_info['properties']['system:time_start']  # Example property

        # Append the details to the list
        S3_image_details.append({
            'id': image_id,
            'date': image_date
        })

    return S3_image_details

########################################################################################################
# Retrieves Sentinel-2 images within the Arctic region for a specified date range and cloud coverage   #
# limit.                                                                                               #
#                                                                                                      #
# Parameters:                                                                                          #
# start_date (str): The starting date for the image collection in 'YYYY-MM-DD' format.                 #
# end_date (str): The ending date for the image collection in 'YYYY-MM-DD' format.                     #
# area (ee.Geometry): The geographical area within which to filter the Sentinel-2 images.              #
# max_cloud_percentage (float, optional): The maximum cloud coverage percentage for filtering images.  #
# Defaults to 10 percent.                                                                              #
#                                                                                                      #
# Returns:                                                                                             #
# ee.ImageCollection: A collection of Sentinel-2 images that fall within the specified date range,     #
# cloud coverage limit, and geographical area.                                                         #
########################################################################################################

def get_s2_images_in_arctic(start_date, end_date, max_cloud_percentage=10):

    # Define the Arctic region bounding box
    arctic_region = ee.Geometry.Rectangle([-180, 60, 180, 90])

    # Filter the Sentinel-2 collection
    s2_collection = ee.ImageCollection('COPERNICUS/S2') \
        .filterDate(start_date, end_date) \
        .filterBounds(arctic_region) \
        .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', max_cloud_percentage))

    return s2_collection

########################################################################################################
# Finds Sentinel-3 images that are temporally and spatially colocated with a given Sentinel-2 image.   #
#                                                                                                      #
# Parameters:                                                                                          #
# s2_image (ee.Image): The Sentinel-2 image to use as a reference for finding colocated Sentinel-3     #
# images.                                                                                              #
# buffer_distance (int, optional): The buffer distance in meters to apply to the footprint of the      #
# Sentinel-2 image. Defaults to 10,000 meters.                                                         #
# time_window_hours (int, optional): The time window in hours to search for Sentinel-3 images before   #
# and after the Sentinel-2 image's acquisition time. Defaults to 2 hours.                              #
#                                                                                                      #
# Returns:                                                                                             #
# ee.List: A list of Sentinel-3 images that are within the specified time window and overlapping       #
# the buffered footprint of the provided Sentinel-2 image.                                             #
########################################################################################################

def find_colocated_s3_for_s2(s2_image, buffer_distance=10000, time_window_hours=2):

    # Buffer the S2 footprint and define time window
    s2_geometry = s2_image.geometry().buffer(buffer_distance)
    s2_time = ee.Date(s2_image.get('system:time_start'))
    start_time = s2_time.advance(-time_window_hours, 'hour')
    end_time = s2_time.advance(time_window_hours, 'hour')

    # Query for S3 images
    s3_collection = ee.ImageCollection('COPERNICUS/S3/OLCI') \
        .filterDate(start_time, end_time) \
        .filterBounds(s2_geometry)

    return s3_collection.toList(s3_collection.size())


In [None]:
########################################################################################################
# Co-location Code:                                                                                    #
# Once you have set up your environment and are authenticated with Google Earth Engine, the next step  #
# is to extract the matched filenames that meet your specific criteria. This involves querying the     #
# Google Earth Engine datasets based on your area of interest, time frame, and any other relevant      #
# parameters. We will get a list of matched filenames but we only select one of them to download. The  #
# code snippet below demonstrates how to perform this task effectively:                                #
########################################################################################################

#initialise Earth Engine again
ee.Initialize()
start_date = '2019-03-01'
end_date = '2019-03-02'
s2_collection = get_s2_images_in_arctic(start_date, end_date)

# Iterate over S2 images and find colocated S3 images
# Define the empty array matched_pairs
matched_pairs = []
s2_list = s2_collection.toList(s2_collection.size()).getInfo()
for s2_info in s2_list:
    s2_image = ee.Image(s2_info['id'])
    colocated_s3 = find_colocated_s3_for_s2(s2_image)
    s3_info_list = colocated_s3.getInfo()

    for s3_info in s3_info_list:
        matched_pairs.append((s2_info['id'], s3_info['id']))

# Print or process the matched pairs
for pair in matched_pairs:
    print("S2 Image:", pair[0], "has colocated S3 Image:", pair[1])


In [None]:
########################################################################################################
# Here you define the variables you used to define the functions written at the beginning of this code,#
# here you should change the email and password to that of your Copernicus Space Database account.     #
# change the parameter gee_filename to the sentinel02 filename produced by the previous snippet of     #
# code. The parameter download_dir should be set to within your Google Drive.                          #
########################################################################################################

username = 'zcfbsne@ucl.ac.uk'
password = '9XzTbN4!_v_Unqn'

token = get_access_token(username, password)
access_token = token
download_dir = '/content/drive/MyDrive/GEOL0069/Week4'

#Sentinel02 filename
gee_filename = '20190301T235611_20190301T235610_T01WCM'
token = get_access_token(username, password)
file_name, product_id = process_image_pair(gee_filename, token)
download_single_product(product_id, file_name, access_token, download_dir)
print(file_name)

In [None]:
########################################################################################################
# Here you define the variables you used to define the functions written at the beginning of this code,#
# here you should change the email and password to that of your Copernicus Space Database account.     #
# Change the parameter gee_filename to the sentinel03 filename produced by the previous snippet of     #
# code.                                                                                                #
########################################################################################################

# Example GEE image ID
username = 'zcfbsne@ucl.ac.uk'
password = '9XzTbN4!_v_Unqn'
token = get_access_token(username, password)
gee_image_id = 'S3A_20190301T222350_20190301T222650'
# Parse the GEE filename to get the date and time
satellite, sensing_date, start_time = parse_gee_filename_s3(gee_image_id)

# Query the Copernicus Data Space for the corresponding Sentinel-3 OLCI data
s3_olci_data = query_sentinel3_olci_data(satellite, sensing_date, start_time, token)
download_dir = '/content/drive/MyDrive/GEOL0069/Week4' # Replace with your desired download directory
product_id = s3_olci_data['Id'][0]
file_name = s3_olci_data['Name'][0]
print(file_name)

# Download the single product
download_single_product(product_id, file_name, access_token, download_dir)

In [None]:
########################################################################################################
# The Sentinel-3 satellite offers an exceptional capability in Earth observation: the simultaneous     #
# acquisition of optical data from its OLCI instrument and altimetry measurements. In this section, I  #
# will guide you through the process of downloading this colocated altimetry data alongside the        #
# Sentinel-3 OLCI optical data.                                                                        #
########################################################################################################

#Import and install if neccesary (!pip install package) your packages

import requests
import pandas as pd
import subprocess
import os
import time
import shutil
import json
from datetime import date
from joblib import Parallel, delayed
import zipfile
import sys
import glob
import numpy as np

# Here are defined numerous functions, using variable previous defined, in text delimation describes exactly what they do!

#=============================================================================================================================================================#

def get_access_token(username, password):
    """
    Obtain an access token to the Copernicus Data Space Ecosystem.
    Necessary for the download of hosted products.
    """
    p =  subprocess.run(f"curl --location --request POST 'https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token' \
            --header 'Content-Type: application/x-www-form-urlencoded' \
            --data-urlencode 'grant_type=password' \
            --data-urlencode 'username={username}' \
            --data-urlencode 'password={password}' \
            --data-urlencode 'client_id=cdse-public'", shell=True,capture_output=True, text=True)
    access_dict = json.loads(p.stdout)
    return access_dict['access_token'], access_dict['refresh_token']

#=============================================================================================================================================================#

def get_new_access_token(refresh_token):
    """
    Obtain a new access token to the Copernicus Data Space Ecosystem using a previously provided refesh token.
    """
    p =  subprocess.run(f"curl --location --request POST 'https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token' \
    --header 'Content-Type: application/x-www-form-urlencoded' \
    --data-urlencode 'grant_type=refresh_token' \
    --data-urlencode 'refresh_token={refresh_token}' \
    --data-urlencode 'client_id=cdse-public'", shell=True,capture_output=True, text=True)
    access_dict = json.loads(p.stdout)
    return access_dict['access_token'], access_dict['refresh_token']

#=============================================================================================================================================================#

def get_S3_SI_search_results_df(date):
    """
    Obtain a pandas dataframe of Sentinel-3 sea ice thematic products for a given date.
    """
    json = requests.get(f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Collection/Name eq 'SENTINEL-3' and Attributes/OData.CSC.StringAttribute/any(att:att/Name eq 'productType' and att/OData.CSC.StringAttribute/Value eq 'SR_2_LAN_SI') and Attributes/OData.CSC.StringAttribute/any(att:att/Name eq 'timeliness' and att/OData.CSC.StringAttribute/Value eq 'NT') and ContentDate/Start gt {(date-pd.Timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%SZ')} and ContentDate/End lt {(date+pd.Timedelta(days=2)).strftime('%Y-%m-%dT%H:%M:%SZ')}&$top=1000").json()

    results_df = pd.DataFrame.from_dict(json['value'])
    results_df['Satellite'] = [row['Name'][:3] for i,row in results_df.iterrows()]
    results_df['SensingStart'] = [pd.to_datetime(row['ContentDate']['Start']) for i,row in results_df.iterrows()]
    results_df['SensingEnd'] = [pd.to_datetime(row['ContentDate']['End']) for i,row in results_df.iterrows()]
    results_df =  results_df[(results_df['SensingEnd'] >= date) & (results_df['SensingStart'] <= date+pd.Timedelta(days=1))]
    results_df = results_df.sort_values(by='SensingStart')
    return results_df


#=============================================================================================================================================================#

def filter_duplicate_products_versions(results_df, keep_latest=True ):
    """
    Filter Sentinel-3 product dataframe to remove duplicate verions of files.
    By default, we keep the latest version of the file. E.g., where an operation version
    and a reprocessed version exists, we keep the reprocessed version.
    """
    results_df['name_snippet'] = [row['Name'][:47] for i,row in results_df.iterrows()]
    if  keep_latest == True:
        keep='last'
    else:
        keep = 'first'

    results_df = (
        results_df
        .sort_values(by='ModificationDate')
        .drop_duplicates(subset=['name_snippet'], keep=keep)
        .drop(columns = ['name_snippet'])
        .sort_values(by='SensingStart')
    )

    return results_df

def find_overlapping_sar(olci_filename, search_results_df):
    # Extract date and time from OLCI filename
    parts = olci_filename.split('_')
    olci_date_time = datetime.strptime(parts[7], '%Y%m%dT%H%M%S')

    # Filter SAR filenames based on overlapping criteria
    # This is a placeholder logic, adjust according to your specific criteria
    overlapping_sar = search_results_df[search_results_df['Name'].apply(lambda x: 'S3' in x and 'SR_2_LAN_SI' in x)]

    return overlapping_sar


def get_date_from_olci_filename(olci_filename):
    """
    Extracts the date from an OLCI filename.

    Parameters:
    olci_filename (str): The OLCI filename.

    Returns:
    datetime.date: The date extracted from the filename.
    """
    parts = olci_filename.split('_')
    date_str = parts[7][:8]  # Extract date part and truncate to YYYYMMDD format
    return pd.to_datetime(date_str, format='%Y%m%d').date()

def get_overlapping_sar_file(olci_filename, get_S3_SI_search_results_df, token):
    olci_date = get_date_from_olci_filename(olci_filename)
    start_date = olci_date - pd.Timedelta(days=1)
    end_date = olci_date + pd.Timedelta(days=1)
    dates = pd.date_range(start_date, end_date)

    all_overlapping_sar = pd.DataFrame()  # Collect all overlapping SAR files

    for date in dates:
        date = date.tz_localize('UTC')
        search_results_df = get_S3_SI_search_results_df(date)

        if search_results_df.empty:
            print(f"No SAR data found for date: {date}")
            continue

        filtered_df = filter_duplicate_products_versions(search_results_df)
        if filtered_df.empty:
            print(f"No SAR data after filtering for date: {date}")
            continue

        overlapping_sar = find_overlapping_sar(olci_filename, filtered_df)
        if not overlapping_sar.empty:
            all_overlapping_sar = pd.concat([all_overlapping_sar, overlapping_sar], ignore_index=True)

    return all_overlapping_sar

from datetime import datetime

def check_overlap(row, olci_filename, olci_start, olci_end):
    """
    Checks if the SAR file's sensing period overlaps with the OLCI file's sensing period and if it's from the same satellite.

    Parameters:
    row (Series): A row from the SAR search results DataFrame.
    olci_filename (str): The OLCI filename.
    olci_start (datetime): Start time of OLCI sensing period.
    olci_end (datetime): End time of OLCI sensing period.

    Returns:
    bool: True if there's an overlap and the satellite is consistent, False otherwise.
    """
    # Extract satellite identifier from the OLCI filename
    satellite = olci_filename.split('_')[0]  # e.g., S3A or S3B

    # Parse SAR start and end times
    sar_start = datetime.strptime(row['ContentDate']['Start'], '%Y-%m-%dT%H:%M:%S.%fZ')
    sar_end = datetime.strptime(row['ContentDate']['End'], '%Y-%m-%dT%H:%M:%S.%fZ')

    # Check for temporal overlap and satellite consistency
    is_temporal_overlap = sar_start <= olci_end and sar_end >= olci_start
    is_same_satellite = satellite in row['Name']

    return is_temporal_overlap and is_same_satellite

# Adjust the find_overlapping_sar function to include the OLCI filename in the check_overlap call
def find_overlapping_sar(olci_filename, search_results_df):
    # Extract date and time from OLCI filename
    parts = olci_filename.split('_')
    olci_sensing_start = datetime.strptime(parts[7], '%Y%m%dT%H%M%S')
    olci_sensing_end = datetime.strptime(parts[8], '%Y%m%dT%H%M%S')

    # Filter for SAR files that overlap with the OLCI sensing period
    overlapping_sar = search_results_df[search_results_df.apply(lambda row: check_overlap(row, olci_filename, olci_sensing_start, olci_sensing_end), axis=1)]

    return overlapping_sar


In [None]:
# Example usage

token, refresh_token = get_access_token(username, password)
olci_filename = s3_olci_data['Name'][0] # This is an example, which you should replace with the one you are interested in.
overlapped_df = get_overlapping_sar_file(olci_filename, get_S3_SI_search_results_df, token)
product_id = overlapped_df['Id'].iloc[0]
file_name = overlapped_df['Name'].iloc[0]
download_dir = '/content/drive/MyDrive/GEOL0069/Week4'
download_single_product(product_id, file_name, token, download_dir)

# We've now gathered Sentinel-2 optical data, Sentinel-3 OLCI, and altimetry data,
# enabling us to advance into a comprehensive analysis leveraging their combined strengths.