In [1]:
!pip install requests beautifulsoup4


Collecting requests
  Using cached requests-2.31.0-py3-none-any.whl (62 kB)
Collecting beautifulsoup4
  Using cached beautifulsoup4-4.12.2-py3-none-any.whl (142 kB)
Collecting charset-normalizer<4,>=2
  Downloading charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl (99 kB)
Collecting urllib3<3,>=1.21.1
  Using cached urllib3-2.0.7-py3-none-any.whl (124 kB)
Collecting idna<4,>=2.5
  Using cached idna-3.4-py3-none-any.whl (61 kB)
Collecting certifi>=2017.4.17
  Using cached certifi-2023.7.22-py3-none-any.whl (158 kB)
Collecting soupsieve>1.2
  Using cached soupsieve-2.5-py3-none-any.whl (36 kB)
Installing collected packages: urllib3, soupsieve, idna, charset-normalizer, certifi, requests, beautifulsoup4
Successfully installed beautifulsoup4-4.12.2 certifi-2023.7.22 charset-normalizer-3.3.2 idna-3.4 requests-2.31.0 soupsieve-2.5 urllib3-2.0.7


You should consider upgrading via the 'f:\maven-repo\venv\scripts\python.exe -m pip install --upgrade pip' command.


# Download whole repository from Maven

In [None]:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin


destination_folder = "local-maven-repo"
maven_url = 'https://repo1.maven.org/maven2/'


def download_file(url, destination):
    response = requests.get(url, stream=True)
    # if destination folder doesn't exist, create it
    if not os.path.exists(os.path.dirname(destination)):
        os.makedirs(os.path.dirname(destination))
    with open(destination, 'wb') as f:
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)


def download_artifacts(repository_url, destination_folder):
    response = requests.get(repository_url)
    soup = BeautifulSoup(response.text, 'html.parser')
    links = soup.find_all('a')
    print(f'Found {len(links)} links in {repository_url}')

    for link in links[1:]:
        artifact_url = urljoin(repository_url, link.get('href'))
        if artifact_url.endswith('/'):
            # It's a directory, recurse into it
            print(f'Found directory: {artifact_url}')
            download_artifacts(artifact_url, os.path.join(
                destination_folder, link.text))
        else:
            # It's a file, download it
            print(f'Downloading: {artifact_url}')
            print("File size: ", requests.head(
                artifact_url).headers['Content-Length'])
            file_destination = os.path.join(destination_folder, link.text)
            print(f'Downloading: {artifact_url}')
            download_file(artifact_url, file_destination)


if not os.path.exists(destination_folder):
    os.makedirs(destination_folder)

download_artifacts(maven_url, destination_folder)

# Download selected versions from Maven

In [None]:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse


def download_file(url, destination):
    response = requests.get(url, stream=True)
    with open(destination, 'wb') as f:
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)


def download_artifacts(repository_url, base_destination_folder):
    response = requests.get(repository_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    for link in soup.find_all('a')[1:]:
        artifact_url = urljoin(repository_url, link.get('href'))
        if artifact_url.endswith('/'):
            # It's a directory, recurse into it
            artifact_name = os.path.basename(urlparse(artifact_url).path)
            destination_folder = os.path.join(
                base_destination_folder, artifact_name)
            download_artifacts(artifact_url, destination_folder)
        else:
            # It's a file, download it
            file_destination = os.path.join(base_destination_folder, link.text)
            print(f'Downloading: {artifact_url}')
            download_file(artifact_url, file_destination)


if __name__ == "__main__":
    # List of Maven repository URLs
    URLS = [
        # 'https://repo1.maven.org/maven2/org/springframework/boot/spring-boot-dependencies/3.1.5/',
        # 'https://repo1.maven.org/maven2/org/springframework/boot/spring-boot-starter-json/3.1.5/',
        # 'https://repo1.maven.org/maven2/org/springframework/boot/spring-boot-starter-log4j/1.3.8.RELEASE/',
        # 'https://repo1.maven.org/maven2/org/springframework/boot/spring-boot-starter-oauth2-client/3.1.5/',
        # 'https://repo1.maven.org/maven2/org/springframework/boot/spring-boot-starter-parent/3.1.5/',
        # 'https://repo1.maven.org/maven2/org/springframework/boot/spring-boot-starter-test/3.1.5/',
        # 'https://repo1.maven.org/maven2/org/springframework/boot/spring-boot-starter-tomcat/3.1.5/',
        # 'https://repo1.maven.org/maven2/org/springframework/boot/spring-boot-starter-web/3.1.5/',
        # 'https://repo1.maven.org/maven2/org/springframework/spring-web/6.0.9/'
        # 'https://repo1.maven.org/maven2/org/springframework/spring-hibernate/1.2.9/'
        # 'https://repo1.maven.org/maven2/org/springframework/data/spring-data-jpa/3.1.5/'
        # 'https://repo1.maven.org/maven2/org/springframework/data/spring-data-jpa/3.1.4/'
        # 'https://repo1.maven.org/maven2/org/springframework/boot/spring-boot-starter-data-jpa/3.1.5/',
        # 'https://repo1.maven.org/maven2/org/springframework/boot/spring-boot-starter-data-jpa/3.1.4/',
        # 'https://repo1.maven.org/maven2/org/springframework/boot/spring-boot-starter-data-jdbc/3.1.5/',
        # 'https://repo1.maven.org/maven2/org/springframework/boot/spring-boot-starter-security/3.1.5/',
        # 'https://repo1.maven.org/maven2/org/springframework/boot/spring-boot-maven-plugin/3.1.5/',
        # 'https://repo1.maven.org/maven2/org/springframework/boot/spring-boot-starter-validation/3.1.5/',
        # 'https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.9-rc/',
        # 'https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.8-dmr/',
        # 'https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.7-dmr/',
        # 'https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.33/',
        # 'https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.32/',
        # 'https://repo1.maven.org/maven2/mysql/mysql-connector-java/8.0.31/',
        # 'https://repo1.maven.org/maven2/org/springframework/boot/spring-boot-starter-thymeleaf/3.1.5/',
        # 'https://repo1.maven.org/maven2/org/springframework/boot/spring-boot-devtools/3.1.5/'
    ]

    for url in URLS:
        # Extract the relative path from the URL
        relative_path = urlparse(url).path.lstrip("/")
        destination_folder = os.path.join("maven-repo", relative_path)

        if not os.path.exists(destination_folder):
            os.makedirs(destination_folder)

        download_artifacts(url, destination_folder)