In [29]:
import requests
from urllib.request import urlretrieve
from tempfile import gettempdir
from pathlib import Path
import subprocess
import re
from contextlib import contextmanager
import ftplib

In [6]:
array_express_id = "E-GEOD-61545"

In [7]:
def send_query(url: str):
    # Send the GET request
    response = requests.get(url)
    # Check if the request was successful
    if response.status_code == 200:
        return response.json()
    else:
        raise ValueError(f"Error: {response.status_code}")

In [15]:
url = f"https://www.ebi.ac.uk/biostudies/api/v1/studies/{array_express_id}/info"
data = send_query(url)
ftp_url = data['ftpLink']

In [22]:
ftp_url

'ftp://ftp.ebi.ac.uk/biostudies/fire/E-GEOD-/545/E-GEOD-61545'

In [17]:
def get_conda_package_path(package_name):
    try:
        # Run the conda list command
        result = subprocess.run(
            ['conda', 'list', package_name],
            stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
        )
        
        # Check if the command executed successfully
        if result.returncode == 0:
            # Search for the line containing the environment path
            match = re.search(r'packages in environment at (.*):', result.stdout)
            if match:
                return match.group(1)  # Return the extracted path
            else:
                raise RuntimeError("Environment path not found in output.")
        else:
            return RuntimeError( f"Error: {result.stderr}")
    except Exception as e:
        raise e

In [31]:
@contextmanager
def ftp_connection(host):
    ftp = ftplib.FTP(host)
    try:
        ftp.login()  # Anonymous login
        yield ftp  # Provide the FTP connection object
    finally:
        ftp.quit()  # Ensure the connection is closed

def download_all_files_from_ftp(ftp_url: str, destination_folder: Path):
    ftp_host = "ftp.ebi.ac.uk"
    ftp_directory = ftp_url.replace(f'ftp://{ftp_host}/', '')
    files_ftp_directory = ftp_directory + '/Files'
    
    # Use the context manager to handle the FTP connection
    with ftp_connection(ftp_host) as ftp:
        # Change to the desired directory
        ftp.cwd(files_ftp_directory)

        # List all files in the directory
        files = ftp.nlst()

        # Ensure the destination folder exists
        destination_folder.mkdir(parents=True, exist_ok=True)

        # Download each file
        for file_name in files:
            local_file_path = destination_folder / file_name
            print(f"Downloading {file_name} to {local_file_path}...")
            with open(local_file_path, 'wb') as local_file:
                ftp.retrbinary(f"RETR {file_name}", local_file.write)
    
    print('All files downloaded.')

In [32]:
destination_folder = Path(gettempdir()) / array_express_id
download_all_files_from_ftp(ftp_url, destination_folder)

Downloading E-GEOD-61545.idf.txt to /tmp/E-GEOD-61545/E-GEOD-61545.idf.txt...
Downloading E-GEOD-61545.sdrf.txt to /tmp/E-GEOD-61545/E-GEOD-61545.sdrf.txt...
All files downloaded.


In [21]:
aspera_package_path = Path(get_conda_package_path('aspera-cli'))
private_ssh_key_file = aspera_package_path / 'etc/asperaweb_id_dsa.openssh'