In [31]:
import requests
import gzip
import os

def download_and_decompress(id):
    # Define the directory to save the downloaded file
    url = f"https://ftp.ncbi.nlm.nih.gov/pubchem/Compound_3D/10_conf_per_cmpd/SDF/{id}.sdf.gz"
    directory = "sdf/"

    # Ensure the directory exists
    os.makedirs(directory, exist_ok=True)

    # Send a GET request to the URL
    response = requests.get(url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Extract filename from the URL
        filename = url.split("/")[-1]

        # Path to save the downloaded file
        file_path = os.path.join(directory, filename)

        # Open the file and write the content
        with open(file_path, "wb") as f:
            f.write(response.content)
        print(f"File downloaded successfully to: {file_path}")

        # If the downloaded file is in gzip format, decompress it
        if filename.endswith('.gz'):
            with gzip.open(file_path, 'rb') as f_in:
                # Read the decompressed data
                decompressed_data = f_in.read()

            # Define the path for the decompressed file
            decompressed_file_path = os.path.splitext(file_path)[0]

            # Write the decompressed data to a new file
            with open(decompressed_file_path, 'wb') as f_out:
                f_out.write(decompressed_data)

            print(f"Decompressed file saved successfully: {decompressed_file_path}")
    else:
        print("Failed to download the file.")

# Example usage:
id = "00000001_00025000"
download_and_decompress(id)



File downloaded successfully to: sdf/00000001_00025000.sdf.gz
Decompressed file saved successfully: sdf/00000001_00025000.sdf


In [26]:
def generate_sequence(term, n):
    sequence = []
    for i in range(n):
        formatted_term = f"{term:08}"
        sequence.append(formatted_term)
        term += 25000
    return sequence

fs=generate_sequence(1, 40)
bs=generate_sequence(25000, 40)

In [None]:
for i in range(len(fs)):
    id = fs[i] + "_" + bs[i]
    download_and_decompress(id)

In [27]:
download_and_decompress

['00000001',
 '00025001',
 '00050001',
 '00075001',
 '00100001',
 '00125001',
 '00150001',
 '00175001',
 '00200001',
 '00225001',
 '00250001',
 '00275001',
 '00300001',
 '00325001',
 '00350001',
 '00375001',
 '00400001',
 '00425001',
 '00450001',
 '00475001',
 '00500001',
 '00525001',
 '00550001',
 '00575001',
 '00600001',
 '00625001',
 '00650001',
 '00675001',
 '00700001',
 '00725001',
 '00750001',
 '00775001',
 '00800001',
 '00825001',
 '00850001',
 '00875001',
 '00900001',
 '00925001',
 '00950001',
 '00975001']

In [28]:
bs

['00025000',
 '00050000',
 '00075000',
 '00100000',
 '00125000',
 '00150000',
 '00175000',
 '00200000',
 '00225000',
 '00250000',
 '00275000',
 '00300000',
 '00325000',
 '00350000',
 '00375000',
 '00400000',
 '00425000',
 '00450000',
 '00475000',
 '00500000',
 '00525000',
 '00550000',
 '00575000',
 '00600000',
 '00625000',
 '00650000',
 '00675000',
 '00700000',
 '00725000',
 '00750000',
 '00775000',
 '00800000',
 '00825000',
 '00850000',
 '00875000',
 '00900000',
 '00925000',
 '00950000',
 '00975000',
 '01000000']