### Import all what we need

In [1]:
import os
import sys 
import bz2
import json
import requests
from tqdm import tqdm_notebook as tqdm

## Get Download Links

Download files of <font color="skyblue"> Molecule $C_2H_2$ </font>.

In [2]:
molecule = 'C2H2'
api_url = 'http://exomol.com/api/?molecule=C2H2&datatype=linelist'
response = requests.get(api_url)
content = response.text  
json_dict = json.loads(content)

print('Iso-slugs: ', json_dict.keys())


Iso-slugs:  dict_keys(['(12C)2(1H)2', '(12C)(13C)(1H)2', '(12C)2(1H)(2H)'])


In [3]:
print('Isotopologues for each IsoFormula:\n')
print('IsoFormula: (12C)2(1H)(2H)       Isotopologue dataset names: ', json_dict['(12C)2(1H)(2H)']['linelist'].keys(), '\n')
print('IsoFormula: (12C)(13C)(1H)2      Isotopologue dataset names: ', json_dict['(12C)(13C)(1H)2']['linelist'].keys(), '\n')
print('IsoFormula: (12C)2(1H)2          Isotopologue dataset names: ', json_dict['(12C)2(1H)2']['linelist'].keys(), '\n')

Isotopologues for each IsoFormula:

IsoFormula: (12C)2(1H)(2H)       Isotopologue dataset names:  dict_keys(['data type']) 

IsoFormula: (12C)(13C)(1H)2      Isotopologue dataset names:  dict_keys(['data type']) 

IsoFormula: (12C)2(1H)2          Isotopologue dataset names:  dict_keys(['data type', 'aCeTY']) 



Here we test our code with <font color="green"> Iso-slug 12C2-1H2 </font> and <font color="orange"> Isotopologue dataset name aCeTY </font>.

http://exomol.com/db/C2H2/12C2-1H2/aCeTY/12C2-1H2__aCeTY.def

In [4]:
iso = [('(12C)2(1H)2', 'aCeTY')]

Get the download links of states.bz2 files and trans.bz2 files from webset.

In [5]:
def get_target_url():
    """
    Get the download url from api.
    
    """
    
    response = requests.get(api_url)
    
    if(response.status_code != 200):
        print('ExoMol API Error' + str(response.status_code))
        
    # If the obtained status code is 200, it is correct.
    else:
        content = response.text            # Get the relevant content.
        json_dict = json.loads(content)    # Convert json into dictionary.
        
        print("Download links:")
        
        file_url = []
        for i in range(len(iso)):
            # Extract files information from dictionary and convert them into list
            iso_slug = iso[i][0]
            isotopologue = iso[i][1]
            json_list = json_dict[iso_slug]['linelist'][isotopologue]['files']

            for j in range(len(json_list)):
                link = json_list[j].get('url')
                try:
                    if((link.endswith('states.bz2') or link.endswith('trans.bz2'))):
                    #if((link.endswith('states.bz2'))):
                        file_url.append("http://www." + link)
                except KeyError:
                    print('Keyerror, keep going!')
                        
        for k in file_url:
            print(k)
            
        return file_url


In [6]:
target_link = get_target_url()

Download links:
http://www.exomol.com/db/C2H2/12C2-1H2/aCeTY/12C2-1H2__aCeTY.states.bz2


## Download Files

Download states.bz2 files and trans.bz2 files with download links. Save these files into correspoding folders which names by download links and file names.

In [7]:
def download_target_series(file_url):
    
    failed_list = [] 
    for link in tqdm(file_url):
        
        file_name = link.split('/')[-1]                
        print("Downloading file:%s" % file_name)
        print(link)

        # Make folders for saving doanloaded files.
        full_name = "./data/" + file_url[0].split('//')[-1]
        dir_name = "/".join(full_name.split('/')[:-1])
        if os.path.exists(dir_name):
            pass
        else:
            os.makedirs(dir_name, exist_ok=True)
        save_name = os.path.join(full_name)
        
        try:
            r = requests.get(link, stream=True, verify=False)
        except Exception:
            failed_list.append(file_name.split('\\')[-1])
            print(' download failed. Go to download next one\n')
              
        # For compute the progess.
        total_size = int(r.headers['Content-Length'])
        temp_size = 0    
   
        # Download started.
        with open(save_name, 'wb') as f:
            for chunk in r.iter_content(chunk_size=1024 * 1024):
                if chunk:
                    temp_size += len(chunk)
                    f.write(chunk)
                    f.flush()
                    done = int(50 * temp_size / total_size)
                    sys.stdout.write("\r[%s%s] %d%%" % ('█' * done, ' ' * (50 - done), 100 * temp_size / total_size))
                    sys.stdout.flush()
        
        
        print(" Downloaded!\n")

    print("All files downloaded!")

    print(failed_list) # Record which file is failed to download.

    return


In [8]:
download_target_series(target_link)

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

Downloading file:12C2-1H2__aCeTY.states.bz2
http://www.exomol.com/db/C2H2/12C2-1H2/aCeTY/12C2-1H2__aCeTY.states.bz2
[██████████████████████████████████████████████████] 100% Downloaded!


All files downloaded!
[]


## Decompress bz2 File

In [None]:
path = "./data/www.exomol.com/db/C2H2"

# Make a folder for saving decompressed files.
de_path = "./data/decompress/C2H2"
if os.path.exists(de_path):
    pass
else:
    os.makedirs(de_path, exist_ok=True)

    
for(dirpath,dirnames,files)in os.walk(path):
    count = 0
    for filename in files:
        filepath = os.path.join(dirpath, filename)
        de_filepath = os.path.join(de_path, filename.split('.')[0] + '_' + filename.split('.')[1] + '.inp')  
        with open(de_filepath, 'wb') as de_file, bz2.BZ2File(filepath, 'rb') as file:
            size = os.path.getsize(filepath)
            if (size < (float(1024) * 100)): 
                for data in iter(lambda: file.read(50 * 1024), b''):
                    de_file.write(data)
            else:
                for data in iter(lambda: file.read(100 * 1024), b''):
                    de_file.write(data)
        count += 1
        print(count)
                    
print("Finished decompression")
