In [7]:
import os
import csv
from osgeo import gdal
import re

# Base directory to search for raster files
base_dir = '/Volumes/Expansion 1/DTM_UNZIP/'

# Output CSV file
output_csv = "raster_metadata.csv"

def parse_projection(projection_str):
    """Extract key projection parameters from a WKT projection string."""
    projection_dict = {}
    # Extract the authority code (EPSG code)
    epsg_match = re.search(r'AUTHORITY\["EPSG","(\d+)"\]', projection_str)
    if epsg_match:
        projection_dict['EPSG Code'] = epsg_match.group(1)

    # Extract the projection name
    proj_match = re.search(r'PROJECTION\["([^\]]+)"\]', projection_str)
    if proj_match:
        projection_dict['Projection'] = proj_match.group(1)

    # Extract the spheroid information
    spheroid_match = re.search(r'SPHEROID\["([^"]+)",([\d.]+),([\d.]+)\]', projection_str)
    if spheroid_match:
        projection_dict['Spheroid'] = spheroid_match.group(1)
        projection_dict['Semi-major Axis'] = spheroid_match.group(2)
        projection_dict['Inverse Flattening'] = spheroid_match.group(3)

    # Extract the datum information
    datum_match = re.search(r'DATUM\["([^"]+)"\]', projection_str)
    if datum_match:
        projection_dict['Datum'] = datum_match.group(1)

    # Extract unit of measure
    unit_match = re.search(r'UNIT\["([^"]+)",([\d.]+)\]', projection_str)
    if unit_match:
        projection_dict['Unit'] = unit_match.group(1)
        projection_dict['Unit Conversion Factor'] = unit_match.group(2)

    return projection_dict

def save_raster_metadata_to_csv(metadata, csv_file):
    """Save metadata as rows in a CSV file."""
    # Define a set of field names that includes all possible fields
    fieldnames = [
        "File Path", 
        "EPSG Code", 
        "Projection", 
        "Spheroid", 
        "Semi-major Axis", 
        "Inverse Flattening", 
        "Datum", 
        "Unit", 
        "Unit Conversion Factor",
        "GeoTransform Top Left X", 
        "GeoTransform Pixel Size X", 
        "GeoTransform Rotation X", 
        "GeoTransform Top Left Y", 
        "GeoTransform Rotation Y", 
        "GeoTransform Pixel Size Y"
    ]

    # Open the CSV file and write the header and data
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        for data in metadata:
            writer.writerow(data)

def get_raster_metadata(file_path):
    """Extract spatial reference metadata from a raster file and return as a dictionary."""
    dataset = gdal.Open(file_path)
    if dataset is None:
        print(f"Failed to open {file_path}")
        return None

    projection = dataset.GetProjection()
    geo_transform = dataset.GetGeoTransform()

    # Parse projection information
    projection_info = parse_projection(projection)

    # Add geotransform values to the metadata
    metadata = {
        "File Path": file_path,
        "GeoTransform Top Left X": geo_transform[0],
        "GeoTransform Pixel Size X": geo_transform[1],
        "GeoTransform Rotation X": geo_transform[2],
        "GeoTransform Top Left Y": geo_transform[3],
        "GeoTransform Rotation Y": geo_transform[4],
        "GeoTransform Pixel Size Y": geo_transform[5]
    }

    # Merge parsed projection info with geotransform data
    metadata.update(projection_info)
    return metadata

def process_first_raster_in_folder(folder, metadata_list):
    """Search for the first .img or .tif file in the folder, extract metadata, and append to list."""
    found_file = False
    print(f"Checking folder: {folder}")
    
    for root, dirs, files in os.walk(folder):
        for file in files:
            if file.endswith((".img", ".tif")):  # Check for both .img and .tif files
                file_path = os.path.join(root, file)
                print(f"Found file: {file_path}")
                metadata = get_raster_metadata(file_path)
                if metadata:
                    metadata_list.append(metadata)
                found_file = True
                return  # Stop after processing the first .img or .tif file
    
    if not found_file:
        print(f"No .img or .tif file found in {folder}\n")

# Automatically get all folders under the base directory
def get_all_subfolders(base_directory):
    """Return a list of all subdirectories in the given base directory."""
    return [os.path.join(base_directory, d) for d in os.listdir(base_directory) if os.path.isdir(os.path.join(base_directory, d))]

# List to hold metadata for each file
raster_metadata_list = []

# Get all subfolders under the base directory
folders = get_all_subfolders(base_dir)

# Loop through each folder and process only the first .img or .tif file
for folder in folders:
    process_first_raster_in_folder(folder, raster_metadata_list)

# Save all metadata to a CSV file
if raster_metadata_list:
    save_raster_metadata_to_csv(raster_metadata_list, output_csv)
    print(f"Metadata saved to {output_csv}")
else:
    print("No metadata to save.")


Checking folder: /Volumes/Expansion 1/DTM_UNZIP/Belleville-DTM-A
Found file: /Volumes/Expansion 1/DTM_UNZIP/Belleville-DTM-A/1km182830488802022LBELLEVILLE_DTM.tif
Checking folder: /Volumes/Expansion 1/DTM_UNZIP/Belleville-DTM-B
Found file: /Volumes/Expansion 1/DTM_UNZIP/Belleville-DTM-B/1km183050486602022LBELLEVILLE_DTM.tif
Checking folder: /Volumes/Expansion 1/DTM_UNZIP/Belleville-DTM-C
Found file: /Volumes/Expansion 1/DTM_UNZIP/Belleville-DTM-C/1km183190485802022LBELLEVILLE_DTM.tif
Checking folder: /Volumes/Expansion 1/DTM_UNZIP/Belleville-DTM-D
Found file: /Volumes/Expansion 1/DTM_UNZIP/Belleville-DTM-D/1km183290485602022LBELLEVILLE_DTM.tif
Checking folder: /Volumes/Expansion 1/DTM_UNZIP/Belleville-DTM-E
Found file: /Volumes/Expansion 1/DTM_UNZIP/Belleville-DTM-E/1km182800489802022LBELLEVILLE_DTM.tif
Checking folder: /Volumes/Expansion 1/DTM_UNZIP/Belleville-DTM-P
Found file: /Volumes/Expansion 1/DTM_UNZIP/Belleville-DTM-P/1km182700496302022LBELLEVILLE_DTM.tif
Checking folder: /Volu

In [10]:
import os
from osgeo import gdal, osr

# Define the input directory
input_dir = '/Volumes/Expansion 1/DTM_UNZIP'

# Define the output text file path (saving in current directory)
output_file = 'spatial_reference_info.txt'  # This will be saved in the current directory

# Open the output file in write mode
with open(output_file, 'w') as txt_file:
    # Get a list of all subdirectories in the input directory
    folders = [
        os.path.join(input_dir, d) for d in os.listdir(input_dir)
        if os.path.isdir(os.path.join(input_dir, d))
    ]

    # Iterate over each folder
    for folder in folders:
        # List all files in the current folder
        files = os.listdir(folder)
        # Find the first file that ends with .tif or .img (case-insensitive)
        tif_img_files = [
            f for f in files if f.lower().endswith(('.tif', '.tiff', '.img'))
        ]
        if tif_img_files:
            # Get the first matching file
            first_file = tif_img_files[0]
            filepath = os.path.join(folder, first_file)
            # Open the file using GDAL
            dataset = gdal.Open(filepath)
            if dataset:
                # Get spatial reference information
                projection = dataset.GetProjection()
                geotransform = dataset.GetGeoTransform()
                # Get spatial reference in WKT format
                srs = osr.SpatialReference(wkt=projection)
                wkt = srs.ExportToPrettyWkt()
                # Prepare the content to write to the text file
                content_lines = [
                    f"Folder: {os.path.basename(folder)}",
                    f"File: {first_file}",
                    "\n--- Spatial Reference (WKT) ---\n",
                    wkt,
                    "\n--- GeoTransform ---\n",
                    ', '.join(map(str, geotransform)),
                    "\n" + "="*60 + "\n"  # Separator between entries
                ]
                content = '\n'.join(content_lines)
                # Write the content to the text file
                txt_file.write(content)
                print(f"Appended spatial reference information for {first_file} to {output_file}")
            else:
                print(f"Failed to open {filepath}")
        else:
            print(f"No .tif or .img files found in {folder}")


Appended spatial reference information for 1km182830488802022LBELLEVILLE_DTM.tif to spatial_reference_info.txt
Appended spatial reference information for 1km183050486602022LBELLEVILLE_DTM.tif to spatial_reference_info.txt
Appended spatial reference information for 1km183190485802022LBELLEVILLE_DTM.tif to spatial_reference_info.txt
Appended spatial reference information for 1km183290485602022LBELLEVILLE_DTM.tif to spatial_reference_info.txt
Appended spatial reference information for 1km182800489802022LBELLEVILLE_DTM.tif to spatial_reference_info.txt
Appended spatial reference information for 1km182700496302022LBELLEVILLE_DTM.tif to spatial_reference_info.txt
Appended spatial reference information for 1km183010488902022LBELLEVILLE_DTM.tif to spatial_reference_info.txt
Appended spatial reference information for 1km183190489302022LBELLEVILLE_DTM.tif to spatial_reference_info.txt
Appended spatial reference information for 1km182810492602022LBELLEVILLE_DTM.tif to spatial_reference_info.txt
A