In [1]:
import os
import subprocess
import time
from datetime import datetime
import pandas as pd

# Load sentinel-1 product URLs from CSV
sentinel_ASF_products_urls = pd.read_csv('../data/sentinel_file_urls.csv')["product_url"].tolist()

print(sentinel_ASF_products_urls[1])

https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20231024T154105_20231024T154130_050909_0622FE_73C5.zip


In [2]:
# Define paths
aoi_path = "../utils/AOI_bboxes/aoi_shifa.geojson"
output_dir = "../data/preprocessed/sentinel"
snap_gpt_path = "/Applications/esa-snap/bin/gpt" 

# Create a dictionary to track processing results
results = []

# Process each URL
for i, url in enumerate(sentinel_ASF_products_urls):
    # Extract filename from URL
    filename = os.path.basename(url)
    
    # Print progress information
    print(f"\n{'='*80}")
    print(f"Processing file {i+1}/{len(sentinel_ASF_products_urls)}: {filename}")
    print(f"{'='*80}\n")
    
    # Track start time
    start_time = datetime.now()
    
    # Build command for download and processing
    cmd = [
        "python", 
        "../scripts/download_process_single_sentinel_file.py",  # Add the correct path to the script
        url, 
        aoi_path, 
        output_dir, 
        "--snap_gpt_path", 
        snap_gpt_path
    ]
    
    # Run the command
    try:
        # Execute download and processing
        process = subprocess.run(cmd, capture_output=False, text=True)
        success = process.returncode == 0
        end_time = datetime.now()
        processing_time = end_time - start_time
        
        # Record result
        results.append({
            'filename': filename,
            'url': url,
            'success': success,
            'processing_time': processing_time,
            'date': filename.split('_')[5][:8],  # Extract date YYYYMMDD
            'orbit': 'ASC' if 'T15' in filename else 'DESC'  # Determine orbit from time
        })
        
        # Store intermediate results in case of interruption
        pd.DataFrame(results).to_csv('../data/processing_results.csv', index=False)
        
        print(f"\nProcessed {filename} in {processing_time}")
        print(f"Status: {'SUCCESS' if success else 'FAILED'}")
        
    except Exception as e:
        print(f"Error processing {filename}: {str(e)}")
        results.append({
            'filename': filename,
            'url': url,
            'success': False,
            'processing_time': datetime.now() - start_time,
            'date': filename.split('_')[5][:8],
            'orbit': 'ASC' if 'T15' in filename else 'DESC',
            'error': str(e)
        })
        
    # Optional: Add a short delay between processing files
    time.sleep(1)

# Create a results DataFrame
results_df = pd.DataFrame(results)

# Calculate summary statistics
total_success = results_df['success'].sum()
total_failed = len(results_df) - total_success
total_time = results_df['processing_time'].sum()
avg_time = results_df['processing_time'].mean()

# Print summary
print("\n" + "="*80)
print("PROCESSING SUMMARY")
print("="*80)
print(f"Total files processed: {len(results_df)}")
print(f"Successfully processed: {total_success}")
print(f"Failed: {total_failed}")
print(f"Total processing time: {total_time}")
print(f"Average processing time: {avg_time}")
print("="*80)

# Save results to CSV
results_df.to_csv('../data/processing_results.csv', index=False)
print(f"Results saved to ../data/processing_results.csv")

# Create a Markdown-friendly table of results
from IPython.display import display, Markdown

# Format the results into a Markdown table
md_table = "| # | Filename | Orbit | Date | Success | Processing Time |\n"
md_table += "|---|---------|-------|------|---------|----------------|\n"

for i, row in results_df.iterrows():
    md_table += f"| {i+1} | {row['filename']} | {row['orbit']} | {row['date']} | {'✅' if row['success'] else '❌'} | {row['processing_time']} |\n"
    
display(Markdown(md_table))

# Plot success rate by orbit and time period
if len(results_df) > 0:
    try:
        import matplotlib.pyplot as plt
        import seaborn as sns
        
        # Add reference/post classification based on date
        results_df['period'] = results_df['date'].apply(
            lambda x: 'Reference' if x < '20231007' else 'Post-conflict'
        )
        
        # Create a figure with summary plots
        fig, axes = plt.subplots(1, 2, figsize=(15, 6))
        
        # Success by orbit
        orbit_success = results_df.groupby(['orbit', 'success']).size().unstack(fill_value=0)
        orbit_success.plot(kind='bar', stacked=True, ax=axes[0], 
                          color=['#ff9999', '#66b3ff'])
        axes[0].set_title('Processing Success by Orbit')
        axes[0].set_ylabel('Number of Files')
        
        # Success by period
        period_success = results_df.groupby(['period', 'success']).size().unstack(fill_value=0)
        period_success.plot(kind='bar', stacked=True, ax=axes[1],
                           color=['#ff9999', '#66b3ff'])
        axes[1].set_title('Processing Success by Period')
        axes[1].set_ylabel('Number of Files')
        
        plt.tight_layout()
        plt.show()
    except Exception as e:
        print(f"Error creating plots: {str(e)}")


Processing file 1/54: S1A_IW_GRDH_1SDV_20231025T034436_20231025T034501_050916_062340_FA4F.zip

Downloading S1A_IW_GRDH_1SDV_20231025T034436_20231025T034501_050916_062340_FA4F.zip from https://datapool.asf.alaska.edu/GRD_HD/SA/S1A_IW_GRDH_1SDV_20231025T034436_20231025T034501_050916_062340_FA4F.zip
Output location: ../data/raw/sentinel/S1A_IW_GRDH_1SDV_20231025T034436_20231025T034501_050916_062340_FA4F.zip
 > Could not validate old cookie jar
No existing URS cookie found, please enter Earthdata username & password:
(Credentials will not be stored, saved or logged anywhere)
Username: 
 > Caught Signal. Exiting!



Traceback (most recent call last):
  File "/Users/matteolarrode/Documents/Oxford/Applied ML/Summative/ML-BuildingDamageViaSatellite/scripts/../scripts/download_process_single_sentinel_file.py", line 156, in <module>
    sys.exit(main())
  File "/Users/matteolarrode/Documents/Oxford/Applied ML/Summative/ML-BuildingDamageViaSatellite/scripts/../scripts/download_process_single_sentinel_file.py", line 128, in main
    zip_path = download_sentinel_data(args.url, args.raw_dir)
  File "/Users/matteolarrode/Documents/Oxford/Applied ML/Summative/ML-BuildingDamageViaSatellite/scripts/../scripts/download_process_single_sentinel_file.py", line 44, in download_sentinel_data
    result = subprocess.run(cmd)
  File "/opt/anaconda3/envs/ML-summative/lib/python3.10/subprocess.py", line 505, in run
    stdout, stderr = process.communicate(input, timeout=timeout)
  File "/opt/anaconda3/envs/ML-summative/lib/python3.10/subprocess.py", line 1146, in communicate
    self.wait()
  File "/opt/anaconda3/envs/M

KeyboardInterrupt: 