In [7]:
import rasterio
import os
from datetime import datetime
import pandas as pd
import csv



def extract_geotiff_info(geotiff_path):
    """
    Extract geographic extent (min/max lat/lon) and date from a GeoTIFF file.

    Args:
        geotiff_path (str): Path to the GeoTIFF file.

    Returns:
        dict: A dictionary containing min/max lat/lon and the parsed date.
    """
    # Open the GeoTIFF file
    with rasterio.open(geotiff_path) as src:
        # Get the bounding box (left, bottom, right, top)
        bounds = src.bounds
        min_lon, min_lat, max_lon, max_lat = bounds.left, bounds.bottom, bounds.right, bounds.top

    # Extract the filename
    filename = os.path.basename(geotiff_path)

    # Parse the date from the filename (assuming yyyymmdd format)
    date_str = filename.split("_")[-4][:8]
    # for part in filename.split('_'):
    #     if part.isdigit() and len(part) == 8:
    #         date_str = part
    #         break

    # if not date_str:
    #     raise ValueError("No date in yyyymmdd format found in the filename.")

    # Convert the date string to year, month, and day
    date_obj = datetime.strptime(date_str, "%Y%m%d")
    year, month, day = date_obj.year, date_obj.month, date_obj.day

    # Return the information
    return {
        "filename": filename,
        "min_lat": min_lat,
        "max_lat": max_lat,
        "min_lon": min_lon,
        "max_lon": max_lon,
        "year": year,
        "month": month,
        "day": day
    }


def save_geotiff_info_to_csv(geotiff_paths, output_csv):
    """
    Extract geographic extent and date information from GeoTIFF files
    and save them to a CSV file.

    Args:
        geotiff_paths (list of str): List of GeoTIFF file paths.
        output_csv (str): Path to the output CSV file.

    Returns:
        None
    """
    # CSV header
    header = ["filename", "min_lat", "max_lat", "min_lon", "max_lon", "year", "month", "day"]

    # Collect data from each GeoTIFF
    data = []
    for geotiff_path in geotiff_paths:
        try:
            info = extract_geotiff_info(geotiff_path)
            data.append(info)
        except Exception as e:
            print(f"Error processing {geotiff_path}: {e}")

    # Write to CSV
    with open(output_csv, mode="w", newline="") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=header)
        writer.writeheader()
        writer.writerows(data)

    print(f"GeoTIFF information saved to {output_csv}")

# Example usage
# The `geotiff_files` list is a placeholder for storing paths to GeoTIFF files. In the example provided, it is commented out and contains two sample file paths as strings.
# geotiff_files = [
#     "path/to/your_geotiff_20221231.tif",
#     "path/to/another_geotiff_20230101.tif"
# ]  # Replace with your file paths
output_csv_path = "/usr/workspace/lazin1/anaconda_dane/envs/RAPID/Codes/geotiff_info_for_SM.csv"  # Replace with your desired output file path




events_file = '/usr/workspace/lazin1/anaconda_dane/envs/RAPID/EVENTS/combined.csv'
combined_df = pd.read_csv(events_file, header=None) 
# for idx, raster_path in enumerate(combined_df[0]): #events = [raster_path.split("/")[-1][:-4] 
#     # print(raster_path)
#     # # Example usage
#     # info = extract_geotiff_info(raster_path)
#     # print(info)
save_geotiff_info_to_csv(combined_df[0], output_csv_path)



GeoTIFF information saved to /usr/workspace/lazin1/anaconda_dane/envs/RAPID/Codes/geotiff_info_for_SM.csv


# Sonoma

In [1]:
import rasterio
import os
from datetime import datetime, timedelta
import pandas as pd
import csv
from collections import defaultdict



def extract_geotiff_info(geotiff_path):
    
    """
    Extract geographic extent (min/max lat/lon) and date from a GeoTIFF file.

    Args:
        geotiff_path (str): Path to the GeoTIFF file.

    Returns:
        dict: A dictionary containing min/max lat/lon and the parsed date.
    """
    # Open the GeoTIFF file
    with rasterio.open(geotiff_path) as src:
        # Get the bounding box (left, bottom, right, top)
        bounds = src.bounds
        min_lon, min_lat, max_lon, max_lat = bounds.left, bounds.bottom, bounds.right, bounds.top
    # Extract the filename
    filename = geotiff_path

    # Parse the date from the filename (assuming yyyymmdd format)
    
    s = filename.split("/")[-2].split("_")[-2]
    e = filename.split("/")[-2].split("_")[-1]
    
    
    start_dt = datetime.strptime(s, "%Y-%m-%d")
    # start_date = start_dt.strftime( "%Y-%m-%d %H")
    end_dt = datetime.strptime(e, "%Y-%m-%d")
    # end_date = end_dt.strftime( "%Y-%m-%d %H")
    
    days = (end_dt - start_dt).days
    

    info_list = []
    for d in range(days+1):
        
        date_dt = datetime.fromtimestamp(start_dt.timestamp()) + timedelta(days=d)
        print(date_dt)

    
    
        date_str = date_dt.strftime( "%Y-%m-%d") #filename.split("/")[-2].split("_")[-2]

    # for part in filename.split('_'):
    #     if part.isdigit() and len(part) == 8:
    #         date_str = part
    #         break

    # if not date_str:
    #     raise ValueError("No date in yyyymmdd format found in the filename.")

        # Convert the date string to year, month, and day
        date_obj = datetime.strptime(date_str, "%Y-%m-%d")
        year, month, day = date_obj.year, date_obj.month, date_obj.day
        # info = filename, min_lat, max_lat, min_lon,  max_lon,  year, month,  day

        info_list.append({
        "filename": f"{date_str}.tif",
        "min_lat": min_lat,
        "max_lat": max_lat,
        "min_lon": min_lon,
        "max_lon": max_lon,
        "year": year,
        "month": month,
        "day": day
        })

    return info_list


    # return {
    #         "filename": filename,
    #         "min_lat": min_lat,
    #         "max_lat": max_lat,
    #         "min_lon": min_lon,
    #         "max_lon": max_lon,
    #         "year": year,
    #         "month": month,
    #         "day": day
    #         }


def save_geotiff_info_to_csv(geotiff_paths, output_csv):
    """
    Extract geographic extent and date information from GeoTIFF files
    and save them to a CSV file.

    Args:
        geotiff_paths (list of str): List of GeoTIFF file paths.
        output_csv (str): Path to the output CSV file.

    Returns:
        None
    """
    # CSV header
    header = ["filename", "min_lat", "max_lat", "min_lon", "max_lon", "year", "month", "day"]

    # Collect data from each GeoTIFF
    all_rows = []
    for geotiff_path in geotiff_paths:
        try:
            info_rows = extract_geotiff_info(geotiff_path)
            # print(type(info), info)
            
            all_rows.extend(info_rows)
        except Exception as e:
            print(f"Error processing {geotiff_path}: {e}")

    # Write to CSV
    with open(output_csv, mode="w", newline="") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=header)
        writer.writeheader()
        writer.writerows(all_rows)

    print(f"GeoTIFF information saved to {output_csv}")

# Example usage
# The `geotiff_files` list is a placeholder for storing paths to GeoTIFF files. In the example provided, it is commented out and contains two sample file paths as strings.
# geotiff_files = [
#     "path/to/your_geotiff_20221231.tif",
#     "path/to/another_geotiff_20230101.tif"
# ]  # Replace with your file paths
output_csv_path = "/usr/workspace/lazin1/anaconda_dane/envs/RAPID/Codes/geotiff_info_for_SM_Sonoma_event3.csv"  # Replace with your desired output file path




events_file = '/usr/workspace/lazin1/anaconda_dane/envs/RAPID/EVENTS/combined_test.csv'
combined_df = pd.read_csv(events_file, header=None) 
# for idx, raster_path in enumerate(combined_df[0]): #events = [raster_path.split("/")[-1][:-4] 
#     # print(raster_path)
#     # # Example usage
#     # info = extract_geotiff_info(raster_path)
#     # print(info)
save_geotiff_info_to_csv(combined_df[0], output_csv_path)



2017-01-09 00:00:00
2017-01-10 00:00:00
2017-01-11 00:00:00
2017-01-12 00:00:00
2017-01-13 00:00:00
2017-01-14 00:00:00
2017-01-15 00:00:00
2017-01-16 00:00:00
2017-01-17 00:00:00
2017-01-18 00:00:00
2017-01-19 00:00:00
2017-01-20 00:00:00
2017-01-21 00:00:00
2017-01-22 00:00:00
2017-01-23 00:00:00
2017-01-24 00:00:00
2017-01-25 00:00:00
2017-01-26 00:00:00
2017-01-27 00:00:00
2017-01-28 00:00:00
2017-01-29 00:00:00
2017-01-30 00:00:00
2017-01-31 00:00:00
2017-02-01 00:00:00
2017-02-02 00:00:00
2017-02-03 00:00:00
2017-02-04 00:00:00
GeoTIFF information saved to /usr/workspace/lazin1/anaconda_dane/envs/RAPID/Codes/geotiff_info_for_SM_Sonoma_event3.csv


In [13]:
p = f"/p/lustre1/lazin1/flood/Sonoma/RussianRiver_2019-02-25_2019-03-04/depth-1-60.tif"
start_date = p.split("/")[-2].split("_")[-2]
end_date = p.split("/")[-2].split("_")[-1]
print(start_date, end_date)

start_dt = datetime.strptime(start_date, "%Y-%m-%d")
# start_date = start_dt.strftime( "%Y-%m-%d %H")
end_dt = datetime.strptime(end_date, "%Y-%m-%d")
# end_date = end_dt.strftime( "%Y-%m-%d %H")

delta = end_dt - start_dt
delta.days

date_obj = datetime.strptime(start_date, "%Y-%m-%d")
date_obj.year


with rasterio.open(p) as src:
    # Get the bounding box (left, bottom, right, top)
    bounds = src.bounds
    min_lon, min_lat, max_lon, max_lat = bounds.left, bounds.bottom, bounds.right, bounds.top
    
print(min_lon, min_lat, max_lon, max_lat)

2019-02-25 2019-03-04
-123.387438304 38.298644454 -122.5294855530917 39.39918691


In [3]:
combined_df[0]

0     /p/lustre1/lazin1/RAPID_Archive_Flood_Maps/201...
1     /p/lustre1/lazin1/RAPID_Archive_Flood_Maps/201...
2     /p/lustre1/lazin1/RAPID_Archive_Flood_Maps/201...
3     /p/lustre1/lazin1/RAPID_Archive_Flood_Maps/201...
4     /p/lustre1/lazin1/RAPID_Archive_Flood_Maps/201...
                            ...                        
59    /p/lustre1/lazin1/RAPID_Archive_Flood_Maps/201...
60    /p/lustre1/lazin1/RAPID_Archive_Flood_Maps/201...
61    /p/lustre1/lazin1/RAPID_Archive_Flood_Maps/201...
62    /p/lustre1/lazin1/RAPID_Archive_Flood_Maps/201...
63    /p/lustre1/lazin1/RAPID_Archive_Flood_Maps/201...
Name: 0, Length: 64, dtype: object

In [16]:
os.path.basename(raster_path).split("_")[-4][:8]

'20180826'