In [3]:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

def download_hitran_zip(url, folder='hitran_zip'):
    # Create the folder if it doesn't exist
    if not os.path.exists(folder):
        os.makedirs(folder)

    # Get the HTML content of the page
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all <a> tags, extract the href attribute if it ends with .zip
    for link in soup.find_all('a', href=True):
        href = link['href']
        if href.endswith('.zip'):
            # Build the full URL for the file
            file_url = urljoin(url, href)
            # Download the file
            with requests.get(file_url, stream=True) as file_response:
                # Create the full path for the file to be saved
                file_path = os.path.join(folder, href)
                # Save the file
                with open(file_path, 'wb') as file:
                    for chunk in file_response.iter_content(chunk_size=8192):
                        file.write(chunk)
            print(f'Downloaded {href}')

# Use the function with the provided URL
download_hitran_zip('https://hitran.org/suppl/xsec/cross_section_data/')


Downloaded (Difluoromethoxy)trifluoromethane.zip
Downloaded (E)-1,2,3,3,3-Pentafluoroprop-1-ene.zip
Downloaded (E)-1,3,3,3-Tetrafluoroprop-1-ene.zip
Downloaded (E)-1-Chloro-3,3,3-trifluoroprop-1-ene.zip
Downloaded (Perfluoro-n-butyl)ethylene.zip
Downloaded (Perfluoro-n-octyl)ethylene.zip
Downloaded (Z)-1,2,3,3,3-Pentafluoroprop-1-ene.zip
Downloaded (Z)-1,3,3,3-Tetrafluoroprop-1-ene.zip
Downloaded 1,1,1,2,2,3,3,4,4-Nonafluoro-4-methoxybutane.zip
Downloaded 1,1,1,2,2,3,3,4,4-Nonafluorobutane.zip
Downloaded 1,1,1,2,2,3,3-Heptafluoro-3-methoxypropane.zip
Downloaded 1,1,1,2,2,3,3-Heptafluoropropane.zip
Downloaded 1,1,1,2,2,3,4,5,5,5-Decafluoro-3-methoxy-4-(trifluoromethyl)pentane.zip
Downloaded 1,1,1,2,2,3,4,5,5,5-Decafluoropentane.zip
Downloaded 1,1,1,2,2,3-Hexafluoropropane.zip
Downloaded 1,1,1,2,2,4,5,5,5-Nonafluoro-4-(trifluoromethyl)-3-pentanone.zip
Downloaded 1,1,1,2,2-Pentafluoro-3-methoxypropane.zip
Downloaded 1,1,1,2,2-Pentafluoropropane.zip
Downloaded 1,1,1,2,3,3,3-Heptafluoroprop

In [4]:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

def download_hitran_json(url, folder):
    # Create the folder if it doesn't exist
    if not os.path.exists(folder):
        os.makedirs(folder)

    # Get the HTML content of the page
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all <a> tags, extract the href attribute if it ends with .json
    for link in soup.find_all('a', href=True):
        href = link['href']
        if href.endswith('.json'):
            # Build the full URL for the file
            file_url = urljoin(url, href)
            # Download the file
            with requests.get(file_url, stream=True) as file_response:
                # Create the full path for the file to be saved
                file_path = os.path.join(folder, href)
                # Save the file
                with open(file_path, 'wb') as file:
                    for chunk in file_response.iter_content(chunk_size=8192):
                        file.write(chunk)
            print(f'Downloaded {href}')

# Example usage:
download_hitran_json('https://hitran.org/suppl/xsec/cross_section_headers/', 'hitran_cross_section_headers')
download_hitran_json('https://hitran.org/suppl/xsec/molecule_headers/', 'molecule_headers')


Downloaded (CF3)2CHOCH2F_298.0K-0.0Torr_399.9-3999.7_1.00_147_28.json
Downloaded (CF3)2CHOCH2F_298.0K-700.0Torr_600.0-1999.3_0.25_air_147_5.json
Downloaded (CF3)2CHOCH3_298.0K-0.0Torr_449.8-3200.3_1.00_151_25.json
Downloaded (CF3)2CHOH_300.4K-0.0Torr_530.0-3400.0_0.10_11_48.json
Downloaded (CF3)2CHOH_303.5K-0.0Torr_530.0-3400.0_0.10_11_48.json
Downloaded (CF3)2CHOH_312.1K-0.0Torr_530.0-3400.0_0.10_11_48.json
Downloaded (CF3)2CHOH_326.8K-0.0Torr_530.0-3400.0_0.10_11_48.json
Downloaded (CF3)2CHOH_335.6K-0.0Torr_530.0-3400.0_0.10_11_48.json
Downloaded (CF3)2CHOH_343.1K-0.0Torr_530.0-3400.0_0.10_11_48.json
Downloaded (CF3)2CHOH_353.8K-0.0Torr_530.0-3400.0_0.10_11_48.json
Downloaded (CF3)2CHOH_362.4K-0.0Torr_530.0-3400.0_0.10_11_48.json
Downloaded (CH2Br)2_278.1K-760.0Torr_570.0-6500.0_0.11_N2_438_43.json
Downloaded (CH2Br)2_298.1K-760.0Torr_570.0-6500.0_0.11_N2_438_43.json
Downloaded (CH2Br)2_323.1K-760.0Torr_570.0-6500.0_0.11_N2_438_43.json
Downloaded BrONO2_204.0K-0.0Torr_765.0-850.0_0.0