In [1]:
pip install requests tqdm pandas


Collecting pandas
  Downloading pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl.metadata (89 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2024.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl (11.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.4/11.4 MB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading pytz-2024.2-py2.py3-none-any.whl (508 kB)
Downloading tzdata-2024.1-py2.py3-none-any.whl (345 kB)
Installing collected packages: pytz, tzdata, pandas
Successfully installed pandas-2.2.3 pytz-2024.2 tzdata-2024.1
Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import requests
import pandas as pd
from tqdm import tqdm
from urllib.parse import urlparse

# Function to download a file with resumption support
def download_file(url, folder, chunk_size=1024):
    # Parse the URL to get the original file name
    file_name = os.path.basename(urlparse(url).path)
    file_path = os.path.join(folder, file_name)
    
    # Check if file already exists and get its size
    if os.path.exists(file_path):
        resume_header = {'Range': f'bytes={os.path.getsize(file_path)}-'}
        resume_mode = 'ab'  # append mode
        start_pos = os.path.getsize(file_path)
    else:
        resume_header = {}
        resume_mode = 'wb'  # write mode (new file)
        start_pos = 0
    
    # Start the request with resume headers if applicable
    response = requests.get(url, stream=True, headers=resume_header)
    total_size = int(response.headers.get('content-length', 0)) + start_pos

    # Download with a progress bar
    with open(file_path, resume_mode) as file, tqdm(
        desc=file_name,
        total=total_size,
        unit='B',
        unit_scale=True,
        unit_divisor=1024,
        initial=start_pos,
        miniters=1
    ) as bar:
        for chunk in response.iter_content(chunk_size=chunk_size):
            if chunk:
                file.write(chunk)
                bar.update(len(chunk))

    return file_name  # Return the file name after downloading

# Read the CSV file
df = pd.read_csv('DTM_download.csv')

# Add a new 'Status' column to track the download progress if it doesn't already exist
if 'Status' not in df.columns:
    df['Status'] = 'Incomplete'

# Set the download folder (you can customize this)
download_folder = '/Users/shuyang/Data/DTM_ZIP'

# Create the download folder if it doesn't exist
if not os.path.exists(download_folder):
    os.makedirs(download_folder)

# Loop through each row in the CSV and download the file
for index, row in df.iterrows():
    project = row['Project']
    package = row['Package']
    size = row['Size']
    resolution = row['Res. (m)']
    download_link = row['Download Link']
    
    # Check if the file is already marked as 'Completed'
    if df.loc[index, 'Status'] == 'Completed':
        print(f"{download_link} is already downloaded.")
        continue

    try:
        # Download the file and use the original name from the URL
        print(f"Downloading {package} from {project} ({size} GB)...")
        file_name = download_file(download_link, download_folder)
        
        # Mark as completed
        df.loc[index, 'Status'] = 'Completed'
        df.loc[index, 'Downloaded File Name'] = file_name
        print(f"{file_name} downloaded successfully.")
    
    except Exception as e:
        print(f"Error downloading {download_link}: {e}")
        df.loc[index, 'Status'] = 'Error'
        break  # Stop the loop if an error occurs

# Save the updated CSV with download status and file names
df.to_csv('DTM_download_updated.csv', index=False)
print("Download process completed. Updated CSV saved as 'DTM_download_updated.csv'.")


Downloading Cochrane A from OMAFRA Lidar 2016-18 (2.91 GB)...


LIDAR2016to18_DTM-Crne-A.zip:  56%|█████▌    | 1.55G/2.78G [11:20<09:57, 2.20MB/s] 