In [None]:
co

In [1]:
import os
import subprocess
import pandas as pd
from urllib.parse import urlparse
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import requests
from tqdm import tqdm

# Create a session with retry logic
def create_retry_session(retries=5, backoff_factor=0.3, status_forcelist=(500, 502, 504)):
    session = requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session

# Download with aria2 (parallel download)
def download_with_aria2(url, folder):
    file_name = os.path.basename(urlparse(url).path)
    file_path = os.path.join(folder, file_name)
    command = ['aria2c', '-x', '16', '-s', '16', '-c', '-o', file_name, '-d', folder, url]
    result = subprocess.run(command)
    return file_name if result.returncode == 0 else None

# Download with requests (fallback)
def download_file(url, folder, chunk_size=1024):
    file_name = os.path.basename(urlparse(url).path)
    file_path = os.path.join(folder, file_name)
    session = create_retry_session()
    response = session.get(url, stream=True)
    total_size = int(response.headers.get('content-length', 0))
    
    with open(file_path, 'wb') as file, tqdm(
        desc=file_name,
        total=total_size,
        unit='B',
        unit_scale=True,
        unit_divisor=1024,
        miniters=1
    ) as bar:
        for chunk in response.iter_content(chunk_size=chunk_size):
            file.write(chunk)
            bar.update(len(chunk))
    return file_name

# Function to handle the download process from the CSV
def process_downloads(csv_file, download_folder):
    # Read the CSV and add 'Status' if it doesn't exist
    df = pd.read_csv(csv_file)
    if 'Status' not in df.columns:
        df['Status'] = 'Incomplete'

    os.makedirs(download_folder, exist_ok=True)

    # Loop through the CSV and download files
    for index, row in df.iterrows():
        if df.loc[index, 'Status'] == 'Completed':
            #print (f"Skip . Completed")
            continue

        download_link = row['Download Link']
        package = row['Package']
        print(f"Downloading {package}...")

        try:
            # Download via aria2 or fallback to requests
            file_name = download_with_aria2(download_link, download_folder)
            if not file_name:
                file_name = download_file(download_link, download_folder)
            
            if file_name:
                df.loc[index, 'Status'] = 'Completed'
                df.loc[index, 'Downloaded File Name'] = file_name
                print(f"{file_name} downloaded successfully.")
            else:
                df.loc[index, 'Status'] = 'Incomplete'
                print(f"Download failed for {package}.")

        except Exception as e:
            print(f"Error downloading {package}: {e}")
            df.loc[index, 'Status'] = 'Error'

        # Update the original CSV after each download
        df.to_csv(csv_file, index=False)

    print("Download process completed. All updates saved.")


In [3]:

# Usage example:
csv_file = 'DTM_download_V2.csv'
download_folder = '/Users/shuyangwang/Downloads'
process_downloads(csv_file, download_folder)


Downloading Sudbury-DTM-10...

09/30 03:39:15 [[1;32mNOTICE[0m] Downloading 1 item(s)

09/30 03:39:16 [[1;32mNOTICE[0m] Allocating disk space. Use --file-allocation=none to disable it. See --file-allocation option in man page for more details.
[#76776f 3.7MiB/3.1GiB(0%) CN:16 DL:4.0MiB ETA:13m25s]
[#76776f 8.5MiB/3.1GiB(0%) CN:16 DL:4.5MiB ETA:12m4s]
[#76776f 13MiB/3.1GiB(0%) CN:16 DL:4.5MiB ETA:11m56s]
[#76776f 17MiB/3.1GiB(0%) CN:16 DL:4.5MiB ETA:11m48s]
[#76776f 22MiB/3.1GiB(0%) CN:16 DL:4.6MiB ETA:11m43s]
[#76776f 27MiB/3.1GiB(0%) CN:16 DL:4.6MiB ETA:11m43s]
[#76776f 31MiB/3.1GiB(0%) CN:16 DL:4.6MiB ETA:11m37s]
[#76776f 36MiB/3.1GiB(1%) CN:16 DL:4.6MiB ETA:11m29s]
[#76776f 41MiB/3.1GiB(1%) CN:16 DL:4.7MiB ETA:11m26s]
[#76776f 46MiB/3.1GiB(1%) CN:16 DL:4.7MiB ETA:11m22s]
[#76776f 51MiB/3.1GiB(1%) CN:16 DL:4.8MiB ETA:11m10s]
[#76776f 56MiB/3.1GiB(1%) CN:16 DL:4.7MiB ETA:11m10s]
[#76776f 61MiB/3.1GiB(1%) CN:16 DL:4.8MiB ETA:11m3s]
[#76776f 65MiB/3.1GiB(2%) CN:16 DL:4.8MiB ETA:11m1