# TIFF Resampling Tool with Parallel Processing

This notebook resamples TIFF files from 0.000277777777777778157 (≈30m) to 0.000833333 (≈90m) resolution using parallel processing for better performance.

Features:
- Parallel processing for faster execution
- Preserves the original data format
- Maintains original CRS
- Names output files with the original name plus "_90m" suffix

In [11]:
# Import necessary libraries
import os
import time
import multiprocessing
import pandas as pd
from IPython.display import display

# Import our custom resampling module
import tiff_resampler

## Set Parameters

In [12]:
# Set folder path to search for TIFF files
# Replace this with your actual folder path
folder_path = '../../data/HZD/TUN/FLUVIAL_UNDEFENDED/2020'

# Define target resolution
target_resolution = 0.000833333

# Output suffix
output_suffix = "_90m"

# Create output folder if it doesn't exist
output_folder = os.path.join(folder_path, 'resampled')
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
    print(f"Created output folder: {output_folder}")
else:
    print(f"Output folder already exists: {output_folder}")

# Set number of parallel processes (None = use all available cores)
# You can adjust this based on your system capabilities
num_workers = multiprocessing.cpu_count() - 1  # Leave one core free
print(f"Using {num_workers} worker processes for parallel execution")

Created output folder: ../../data/HZD/TUN/FLUVIAL_UNDEFENDED/2020\resampled
Using 23 worker processes for parallel execution


## Find TIFF Files

In [13]:
# Find all TIFF files in the folder
tiff_files = tiff_resampler.find_tiff_files(folder_path)

print(f"Found {len(tiff_files)} TIFF files:")
for file in tiff_files:
    print(f"  - {os.path.basename(file)}")

Found 8 TIFF files:
  - 1in10.tif
  - 1in100.tif
  - 1in1000.tif
  - 1in20.tif
  - 1in200.tif
  - 1in5.tif
  - 1in50.tif
  - 1in500.tif


## Check Resolutions of Found Files

In [14]:
# Check resolution of each file
resolution_data = []
for file in tiff_files:
    try:
        resolution = tiff_resampler.check_resolution(file)
        resolution_data.append({
            'file': os.path.basename(file),
            'x_resolution': resolution[0],
            'y_resolution': resolution[1]
        })
        print(f"{os.path.basename(file)}: {resolution}")
    except Exception as e:
        print(f"Error reading {os.path.basename(file)}: {e}")

# Display as a table if there are files
if resolution_data:
    display(pd.DataFrame(resolution_data))

1in10.tif: (0.00027777777777777816, 0.00027777777777778)
1in100.tif: (0.00027777777777777816, 0.00027777777777778)
1in1000.tif: (0.00027777777777777816, 0.00027777777777778)
1in20.tif: (0.00027777777777777816, 0.00027777777777778)
1in200.tif: (0.00027777777777777816, 0.00027777777777778)
1in5.tif: (0.00027777777777777816, 0.00027777777777778)
1in50.tif: (0.00027777777777777816, 0.00027777777777778)
1in500.tif: (0.00027777777777777816, 0.00027777777777778)


Unnamed: 0,file,x_resolution,y_resolution
0,1in10.tif,0.000278,0.000278
1,1in100.tif,0.000278,0.000278
2,1in1000.tif,0.000278,0.000278
3,1in20.tif,0.000278,0.000278
4,1in200.tif,0.000278,0.000278
5,1in5.tif,0.000278,0.000278
6,1in50.tif,0.000278,0.000278
7,1in500.tif,0.000278,0.000278


## Resample Files in Parallel

In [15]:
# Measure performance
start_time = time.time()

# Process files in parallel
print(f"Starting parallel processing of {len(tiff_files)} files with {num_workers} workers...")
results = tiff_resampler.resample_tiffs_parallel(
    folder_path=folder_path,
    target_resolution=target_resolution,
    output_suffix=output_suffix,
    max_workers=num_workers
)

# Calculate elapsed time
elapsed_time = time.time() - start_time
print(f"Processing completed in {elapsed_time:.2f} seconds")

Starting parallel processing of 8 files with 23 workers...
Processing completed in 5.60 seconds


## Results Summary

In [None]:
# Count successes and failures
successes = sum(1 for r in results if r['status'] == 'Success')
failures = len(results) - successes

# Display results summary
print("\nProcessing Results:")
print("-" * 80)
for result in results:
    if result['status'] == 'Success':
        print(f"✓ {result['input_file']} -> {result['output_file']}")
        print(f"  Original resolution: {result['original_resolution']}")
        print(f"  New resolution: {result['new_resolution']}")
    else:
        print(f"✗ {result['input_file']}: {result['status']}")
    print("-" * 80)

print(f"\nSummary: {successes} files processed successfully, {failures} failed")

# Create a DataFrame for better visualization
if results:
    df_results = pd.DataFrame([
        {
            'File': r['input_file'],
            'Output': r.get('output_file', 'N/A'),
            'Status': 'Success' if r['status'] == 'Success' else r['status'],
            'Original Resolution': r.get('original_resolution', 'N/A') if r['status'] == 'Success' else 'N/A',
            'New Resolution': r.get('new_resolution', 'N/A') if r['status'] == 'Success' else 'N/A'
        } for r in results
    ])
    
    display(df_results)

## Verify Output Files

In [None]:
# Verify output files have the correct resolution
verification_results = tiff_resampler.verify_output_files(output_folder, target_resolution)

print(f"\nVerifying {len(verification_results)} output files:")
for result in verification_results:
    if result['status'] == 'Verified':
        match_status = "✓" if result['matches_target'] else "✗"
        print(f"{match_status} {result['file']}: Resolution = {result['resolution']}")
    else:
        print(f"✗ {result['file']}: {result['status']}")

# Create a DataFrame for better visualization
if verification_results:
    df_verify = pd.DataFrame([
        {
            'File': r['file'],
            'Resolution': r.get('resolution', 'N/A') if r['status'] == 'Verified' else 'N/A',
            'Matches Target': r.get('matches_target', False) if r['status'] == 'Verified' else False,
            'Status': r['status']
        } for r in verification_results
    ])
    
    display(df_verify)

## Performance Analysis

In [None]:
# Calculate some performance metrics
if len(tiff_files) > 0 and elapsed_time > 0:
    avg_time_per_file = elapsed_time / len(tiff_files)
    print(f"Average processing time per file: {avg_time_per_file:.2f} seconds")
    print(f"Files processed per second: {len(tiff_files) / elapsed_time:.2f}")
    print(f"Estimated time savings with parallel processing: {len(tiff_files) * avg_time_per_file - elapsed_time:.2f} seconds")

## Test Single File Processing

You can use this cell to test processing a single file if needed

In [None]:
def test_single_file(input_file, target_resolution=0.000833333):
    """Test resampling on a single file"""
    if not os.path.exists(input_file):
        print(f"File not found: {input_file}")
        return
    
    # Generate output filename
    base_name = os.path.splitext(os.path.basename(input_file))[0]
    output_file = os.path.join(output_folder, f"{base_name}_test.tif")
    
    print(f"Processing: {os.path.basename(input_file)} -> {os.path.basename(output_file)}")
    
    start = time.time()
    try:
        # Resample the file
        resolutions = tiff_resampler.resample_tiff(input_file, output_file, target_resolution)
        elapsed = time.time() - start
        
        print(f"Processing completed in {elapsed:.2f} seconds")
        print(f"Original resolution: {resolutions[0]}")
        print(f"New resolution: {resolutions[1]}")
        
        return True
        
    except Exception as e:
        print(f"Error: {e}")
        return False

# Uncomment and modify this line to test a specific file
# test_single_file('./data/your_file.tif')