In [None]:
import os
import gzip
import ijson
from pymatgen.io.lobster import Lobsterout
from pymatgen.core import Structure
from pymatgen.electronic_structure.cohp import Cohp
from pymatgen.electronic_structure.cohp import CompleteCohp
from pymatgen.electronic_structure.dos import LobsterCompleteDos
from atomate2.lobster.schemas import LobsterTaskDocument

<h1><center>Top level keys to access data from Computational data JSON files </center></h1>
<table>
<thead>
  <tr>
    <th>Root Keys</th>
    <th>Data type</th>
    <th>Description</th>
  </tr>
</thead>
<tbody>
  <tr>
    <td>structure</td>
    <td>dict</td>
    <td>Dict representation of the pymatgen structure object<br>used for the LOBSTER calculation</td>
  </tr>
  <tr>
    <td>charges</td>
    <td>dict</td>
    <td>Atomic charges dict from LOBSTER based on <br>Mulliken and Loewdin charge analysis</td>
  </tr>
  <tr>
    <td>lobsterin</td>
    <td>dict</td>
    <td>LOBSTER calculation inputs</td>
  </tr>
  <tr>
    <td>lobsterout</td>
    <td>dict</td>
    <td>Information of LOBSTER calculation output</td>
  </tr>
  <tr>
    <td>lobsterpy_data</td>
    <td>dict</td>
    <td>Summarized bonding analysis data from Lobsterpy <br>(all bonds mode). It also includes Cohp objects <br>to plot the COHP curves from the automatic analysis</td>
  </tr>
  <tr>
    <td>lobsterpy_text</td>
    <td>dict</td>
    <td>LobsterPy automatic analysis summary text (all bonds mode)</td>
  </tr>
  <tr>
    <td>strongest_bonds_icohp</td>
    <td>dict</td>
    <td>Describes the strongest ICOHP bonds</td>
  </tr>
  <tr>
    <td>strongest_bonds_icoop</td>
    <td>dict</td>
    <td>Describes the strongest ICOOP bonds</td>
  </tr>
  <tr>
    <td>strongest_bonds_icobi</td>
    <td>dict</td>
    <td>Describes the strongest ICOBI bonds</td>
  </tr>
  <tr>
    <td>lobsterpy_data_cation_anion</td>
    <td>dict</td>
    <td>Summarized bonding analysis data from Lobsterpy <br>(cation-anion bonds mode). It also includes Cohp objects <br>to plot the COHP curves from the automatic analysis</td>
  </tr>
  <tr>
    <td>lobsterpy_text_cation_anion</td>
    <td>dict</td>
    <td>LobsterPy automatic analysis summary text <br>(cation-anion bonds mode)</td>
  </tr>
  <tr>
    <td>strongest_bonds_icohp_cation_anion</td>
    <td>dict</td>
    <td>Describes the strongest cation-anion ICOHP bonds</td>
  </tr>
  <tr>
    <td>strongest_bonds_icoop_cation_anion</td>
    <td>dict</td>
    <td>Describes the strongest cation-anion ICOOP bonds</td>
  </tr>
  <tr>
    <td>strongest_bonds_icobi_cation_anion</td>
    <td>dict</td>
    <td>Describes the strongest cation-anion ICOBI bonds</td>
  </tr>
  <tr>
    <td>cohp_data</td>
    <td>dict</td>
    <td>Dict representation of pymatgen CompleteCohp object <br>including data to plot COHP curves</td>
  </tr>
  <tr>
    <td>coop_data</td>
    <td>dict</td>
    <td>Dict representation of pymatgen CompleteCohp object <br>including data to plot COOP curves</td>
  </tr>
  <tr>
    <td>cobi_data</td>
    <td>dict</td>
    <td>Dict representation of pymatgen CompleteCohp object <br>including data to plot COBI curves</td>
  </tr>
  <tr>
    <td>dos</td>
    <td>dict</td>
    <td>Dict representation of pymatgen LobsterCompleteDos object <br>including the DOSCAR.lobster data</td>
  </tr>
  <tr>
    <td>lso_dos</td>
    <td>dict</td>
    <td>Dict representation of pymatgen LobsterCompleteDos object <br>including the DOSCAR.LSO.lobster data</td>
  </tr>
  <tr>
    <td>madelung_energies</td>
    <td>dict</td>
    <td>Consists of the Madelung energies of the structure derived from <br>the Mulliken and Löwdin charges</td>
  </tr>
</tbody>
</table>

### Function below can reads one Root key data from the json at a time. Due to file large sizes it is recommended approach to avoid running of out memory issues 

In [None]:
def read_lobstertask_doc_json(filename: str,pymatgen_objs: bool=False, query='structure'):
    """
    This function loads the json.gz files and returns a dictionary with all the lobster outputs.
    
    Args:
        filename (str) : name of file
        pymatgen_objs (bool) : if True will convert structure,coop,cobi,cohp and dos to pymatgen objects
        query (str) : Filters the json file based on this key and returns data corresponding to it.  
        Possible query corresponds to root keys column of the table above.
    Returns:
        Returns a dictionary with lobster task json corresponding to query.
    """
    lobster_data={}
    with gzip.open(filename, 'rb') as f:
        objects = ijson.items(f,'item',use_float=True)
        #filtered_objects = (obj for obj in objects if list(obj.keys())[0]==query)
        for obj in objects:
            if query in obj:
                for k, v in obj.items():
                    if pymatgen_objs:
                        if k=='structure':
                            lobster_data[k]=Structure.from_dict(v)
                        elif k=='lobsterpy_data' or k=='lobsterpy_data_cation_anion':
                            lobster_data[k]=v
                            if lobster_data[k]:
                                for plotlabel, cohp in lobster_data[k]["cohp_plot_data"].items():
                                    lobster_data[k]["cohp_plot_data"][plotlabel]= Cohp.from_dict(cohp)
                        elif k=='cohp_data' or k=='cobi_data' or k=='coop_data':
                            lobster_data[k]=CompleteCohp.from_dict(v)
                        elif k=='lso_dos' or k=='dos':
                            lobster_data[k]=LobsterCompleteDos.from_dict(v)
                        else:
                            lobster_data[k]=v
                    else:
                        lobster_data[k]=v
                break
                    
    return lobster_data

In [None]:
lobtask= read_lobstertask_doc_json(filename='../Example_data/Computational_data_jsons/mp-1008223.json.gz', 
                                   pymatgen_objs=True, query='lobsterpy_data_cation_anion')