Automatic downloading of multiple preset files and zips from different sources

In [None]:
import requests
from bs4 import BeautifulSoup as bs
import urllib.parse as ur
import os
import zipfile

In [14]:
def extract_from_zip(zip_path, extract_to):
    # Extract all .fxp files from the zip (all subdirectories)
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        i = 0
        for member in zip_ref.namelist():
            if member.endswith('.fxp'):
                zip_ref.extract(member, extract_to)
                i += 1
    return i

In [None]:
urls = [
    'https://demos.newloops.com/New_Loops-Surge_Presets.zip',
    'https://damon-armani.com/wp-content/uploads/Damon-Armani-Surge-Presets-Vol-2.zip',
    'https://rekkerd.org/bin/presets/inigo_kennedy_03.zip',
    'https://rekkerd.org/bin/presets/inigo_kennedy_02.zip',
    'https://rekkerd.org/bin/presets/inigo_kennedy_01.zip',
    'https://rekkerd.org/bin/presets/NICK_MORITZ_Surge_Bank_v.1.rar',
    'https://rekkerd.org/bin/presets/Bronto_Scorpio_Surge_2.zip',
    'https://rekkerd.org/bin/presets/Bronto_Scorpio_Surge.zip'
    ''
]
save_path = "../../data/presets/surge/"

reset = True # delete existing files
if reset:
    import shutil
    shutil.rmtree(save_path, ignore_errors=True)
os.makedirs(save_path, exist_ok=True)

In [16]:
agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
headers = {'User-Agent': agent}

for url in urls:
    local_filename = os.path.join(save_path, url.split('/')[-1])
    try:
        print(f'Downloading {url} to {local_filename}...')
        with requests.get(url, headers=headers, stream=True) as r:
            r.raise_for_status()
            with open(local_filename, 'wb') as f:
                for chunk in r.iter_content(chunk_size=8192):
                    f.write(chunk)
        if local_filename.endswith('.zip'):
            num = extract_from_zip(local_filename, save_path)
            print(f'Extracted {num} .fxp files from {local_filename}.')
    except Exception as e:
        print(f'Failed to download {url}. Error: {e}')
        continue

Downloading https://demos.newloops.com/New_Loops-Surge_Presets.zip to ../../data/presets/surge/New_Loops-Surge_Presets.zip...
Failed to download https://demos.newloops.com/New_Loops-Surge_Presets.zip. Error: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
Downloading https://damon-armani.com/wp-content/uploads/Damon-Armani-Surge-Presets-Vol-2.zip to ../../data/presets/surge/Damon-Armani-Surge-Presets-Vol-2.zip...
Extracted 36 .fxp files from ../../data/presets/surge/Damon-Armani-Surge-Presets-Vol-2.zip.
Downloading https://rekkerd.org/bin/presets/inigo_kennedy_03.zip to ../../data/presets/surge/inigo_kennedy_03.zip...
Extracted 30 .fxp files from ../../data/presets/surge/inigo_kennedy_03.zip.
Downloading https://rekkerd.org/bin/presets/inigo_kennedy_02.zip to ../../data/presets/surge/inigo_kennedy_02.zip...
Extracted 30 .fxp files from ../../data/presets/surge/inigo_kennedy_02.zip.
Downloading https://rekkerd.org/bin/presets/inigo_kennedy_01

In [21]:
# Test downloading from kvraudio official surge 
# https://www.kvraudio.com/product/surge-xt-by-surge-synth-team/downloads

cookies = 'kvr_cookies.txt'
headers = {'User-Agent': agent}

import http.cookiejar as cookielib
session = requests.Session()
cj = cookielib.MozillaCookieJar(cookies)  # path to your exported cookies file
cj.load(ignore_discard=True, ignore_expires=True)
session.cookies = cj

# class kvronoffleft flexflexed
# href in the form /banks.php?s=dl&id=12345
base_url = "https://www.kvraudio.com"

response = session.get(ur.urljoin(base_url, "/product/surge-by-surge-synth-team/downloads"), headers=headers)
soup = bs(response.text, 'html.parser')
print('Full html:', response.text)
links = soup.find_all('a', class_='kvronoffright flex flexcenter')
print(links)

for link in links:
    href = link.get('href')
    if href and 'dl&id=' in href:
        download_url = ur.urljoin(base_url, href)
        local_filename = os.path.join(save_path, download_url.split('=')[-1] + '.zip')
        try:
            print(f'Downloading {download_url} to {local_filename}...')
            with session.get(download_url, headers=headers, stream=True) as r:
                r.raise_for_status()
                with open(local_filename, 'wb') as f:
                    for chunk in r.iter_content(chunk_size=8192):
                        f.write(chunk)
            if local_filename.endswith('.zip'):
                num = extract_from_zip(local_filename, save_path)
                print(f'Extracted {num} .fxp files from {local_filename}.')
        except Exception as e:
            print(f'Failed to download {download_url}. Error: {e}')
            continue

Full html: <!DOCTYPE html>
<!--[if lt IE 7]> <html class="no-js ie6 oldie" lang="en-US"> <![endif]-->
<!--[if IE 7]>    <html class="no-js ie7 oldie" lang="en-US"> <![endif]-->
<!--[if IE 8]>    <html class="no-js ie8 oldie" lang="en-US"> <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en-US"> <!--<![endif]-->
<head>
<title>Attention Required! | Cloudflare</title>
<meta charset="UTF-8" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=Edge" />
<meta name="robots" content="noindex, nofollow" />
<meta name="viewport" content="width=device-width,initial-scale=1" />
<link rel="stylesheet" id="cf_styles-css" href="/cdn-cgi/styles/cf.errors.css" />
<!--[if lt IE 9]><link rel="stylesheet" id='cf_styles-ie-css' href="/cdn-cgi/styles/cf.errors.ie.css" /><![endif]-->
<style>body{margin:0;padding:0}</style>


<!--[if gte IE 10]><!-->
<script>
  if (!navigator.cookieEnabled) {
    window.addEventListener('DOMConten