From b228e23065a193caafb711281c1c604c9d65e96c Mon Sep 17 00:00:00 2001 From: saurabhojha Date: Sat, 22 Mar 2025 17:14:09 +0530 Subject: [PATCH 1/2] Fix download_data script --- download_data.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/download_data.sh b/download_data.sh index b9d4622..8e59dc5 100755 --- a/download_data.sh +++ b/download_data.sh @@ -9,19 +9,19 @@ read -p "Enter the number corresponding to your choice: " choice case $choice in 2) - # Download 10m dataset - wget https://clickhouse-public-datasets.s3.amazonaws.com/bluesky/file_{0001..0010}.json.gz -P ~/data/bluesky -N + # Download 10m dataset: files 0001 to 0010 + wget -N -P ~/data/bluesky -i <(seq -f "https://clickhouse-public-datasets.s3.amazonaws.com/bluesky/file_%04g.json.gz" 1 10) ;; 3) - # Download 100m dataset - wget https://clickhouse-public-datasets.s3.amazonaws.com/bluesky/file_{0001..0100}.json.gz -P ~/data/bluesky -N + # Download 100m dataset: files 0001 to 0100 + wget -N -P ~/data/bluesky -i <(seq -f "https://clickhouse-public-datasets.s3.amazonaws.com/bluesky/file_%04g.json.gz" 1 100) ;; 4) - # Download 1000m dataset - wget https://clickhouse-public-datasets.s3.amazonaws.com/bluesky/file_{0001..1000}.json.gz -P ~/data/bluesky -N + # Download 1000m dataset: files 0001 to 1000 + wget -N -P ~/data/bluesky -i <(seq -f "https://clickhouse-public-datasets.s3.amazonaws.com/bluesky/file_%04g.json.gz" 1 1000) ;; *) - # Download 1m dataset - wget https://clickhouse-public-datasets.s3.amazonaws.com/bluesky/file_0001.json.gz -P ~/data/bluesky -N + # Download 1m dataset: single file + wget -N -P ~/data/bluesky "https://clickhouse-public-datasets.s3.amazonaws.com/bluesky/file_0001.json.gz" ;; esac \ No newline at end of file From 85bef4508b7ce2f85efc1b0a3be2962223057b59 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 23 Mar 2025 09:12:26 +0000 Subject: [PATCH 2/2] Use long-form parameters, so readers don't need to check the manpage first --- download_data.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/download_data.sh b/download_data.sh index 8e59dc5..5acb12f 100755 --- a/download_data.sh +++ b/download_data.sh @@ -10,18 +10,18 @@ read -p "Enter the number corresponding to your choice: " choice case $choice in 2) # Download 10m dataset: files 0001 to 0010 - wget -N -P ~/data/bluesky -i <(seq -f "https://clickhouse-public-datasets.s3.amazonaws.com/bluesky/file_%04g.json.gz" 1 10) + wget --timestamping --directory-prefix ~/data/bluesky --input-file <(seq --format "https://clickhouse-public-datasets.s3.amazonaws.com/bluesky/file_%04g.json.gz" 1 10) ;; 3) # Download 100m dataset: files 0001 to 0100 - wget -N -P ~/data/bluesky -i <(seq -f "https://clickhouse-public-datasets.s3.amazonaws.com/bluesky/file_%04g.json.gz" 1 100) + wget --timestamping --directory-prefix ~/data/bluesky --input-file <(seq --format "https://clickhouse-public-datasets.s3.amazonaws.com/bluesky/file_%04g.json.gz" 1 100) ;; 4) # Download 1000m dataset: files 0001 to 1000 - wget -N -P ~/data/bluesky -i <(seq -f "https://clickhouse-public-datasets.s3.amazonaws.com/bluesky/file_%04g.json.gz" 1 1000) + wget --timestamping --directory-prefix ~/data/bluesky --input-file <(seq --format "https://clickhouse-public-datasets.s3.amazonaws.com/bluesky/file_%04g.json.gz" 1 1000) ;; *) # Download 1m dataset: single file - wget -N -P ~/data/bluesky "https://clickhouse-public-datasets.s3.amazonaws.com/bluesky/file_0001.json.gz" + wget --timestamping --directory-prefix ~/data/bluesky "https://clickhouse-public-datasets.s3.amazonaws.com/bluesky/file_0001.json.gz" ;; -esac \ No newline at end of file +esac