In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from collections import Counter
import pandas as pd
import os
import glob
import copy
from pathlib import Path

from pprint import pprint
import itertools as it
from collections import Counter
import numpy as np

from aiida import load_profile
from aiida.orm.nodes.data.structure import StructureData

from aiida_quantumespresso.tools.pwinputparser import create_builder_from_file
from aiida_quantumespresso.calculations.pw import PwCalculation
from aiida_quantumespresso.calculations.pwimmigrant import PwimmigrantCalculation
from aiida.engine import submit
from aiida_quantumespresso.calculations.pw import PwCalculation
from aiida_quantumespresso.workflows.pw.relax import PwRelaxWorkChain
from aiida_quantumespresso.workflows.pw.base import PwBaseWorkChain
from aiida.orm import load_node, load_code

from qe_tools.exceptions import ParsingError

# from own_utils.calc_df import init_project_df

from pymatgen.io.ase import AseAtomsAdaptor

# import pybat
# from pybat import Cathode, LiRichCathode, Dimer, DimerNEBAnalysis

from ase.visualize import view
from ase.io.vasp import read_vasp, write_vasp
from ase.build.tools import sort
from ase.io.espresso import read_espresso_in, read_espresso_out

from project_settings import *

# from itables import init_notebook_mode
# init_notebook_mode(all_interactive=True)
pd.set_option('display.max_colwidth', None)

In [None]:
load_profile()

In [None]:
# iurii_olivine_dir = '/home/jgeiger/projects/bat_uv_ml/data/olivines_iurii/LixMnPO4/Li1.00/DFT_plus_U/1_vcrelax/'
# iurii_olivine_file = 'LiMnPO4.vcrelax.1.in'
# iurii_pseudo_dir = '/home/jgeiger/projects/bat_uv_ml/data/olivines_iurii/Pseudopotentials/SSSP'
# iurri_spinel_dir = '/home/jgeiger/projects/bat_uv_ml/data/spinel_iurii/LixMn2O4/DFT+U/LiMn2O4/1_vcrelax'
# iurii_spinel_file = 'LiMn2O4.vcrelax.1.in'

In [None]:
def init_project_df(calc_dir, file_glob):
    # TODO: Extent for multiple file types, also for output files -> can be done by passing a tuple, which endswith accepts.
    # path_list = [os.path.dirname(path) for path in Path(calc_dir).rglob(file_glob)]
    path_list = []
    for root, dirs, files in os.walk(calc_dir):
        for file in files:
            if file.endswith(file_glob):
                path_list.append(os.path.join(root, file))
    # path_list = [_.replace('+', '\+') for _ in path_list]

    full_project_df = pd.DataFrame()
    full_project_df['abs_path'] = [os.path.dirname(_) for _ in path_list]
    full_project_df['calc_in'] = [os.path.basename(_) for _ in path_list]
    full_project_df['abs_path_in'] = path_list
    # ! Rel path buggy because it ends up being 
    # full_project_df['rel_path'] = full_project_df['abs_path'].str.replace(project_dir, '.')

    return full_project_df

def add_calcdata(df_in):
    calctype_regex = r"\.(pdos|dos|nscf|scf|hp|vcrelax)\."
    df_out = df_in.copy(deep=True)
    df_out['calc_name'] = df_out['calc_in'].apply(lambda x: Path(x).stem)
    df_out['calc_type'] = df_out['calc_in'].str.extract(calctype_regex)
    df_out['calc_out'] = df_out['calc_in'].str.replace('.in', '.out', regex=False)
    return df_out

# ! Also matches paremeters.in file, which results in calctype = nan
spinel_df_iurii = init_project_df(
    calc_dir=spinel_dir_iurii,
    file_glob='.in' # ('.in', '.out')
    )

olivine_df_iurii = init_project_df(
    calc_dir=olivine_dir_iurii,
    file_glob='.in'
    )
# olivine_df_iurii = olivine_df_iurii.dropna()


In [None]:
iurii_full_data_df = pd.concat([spinel_df_iurii, olivine_df_iurii], ignore_index=True)
iurii_full_data_df = add_calcdata(iurii_full_data_df)

iurii_full_data_df.shape
iurii_full_data_df.head()

# pandas write to csv
print(project_dir)
iurii_full_data_df.to_csv(os.path.join(project_dir, 'data', 'iurii_full_data_df.csv'), index=False)


# Olivine directory tree
<!-- 
├── Bulk_Li
├── LixFePO4
│   ├── Li0.00
│   ├── Li0.25
│   ├── Li0.50
│   ├── Li0.75
│   └── Li1.00
├── LixMn0.5Fe0.5PO4
│   ├── Li0.00
│   ├── Li0.25
│   ├── Li0.50
│   ├── Li0.75
│   └── Li1.00
├── LixMnPO4
│   ├── Li0.00
│   ├── Li0.25
│   ├── Li0.50
│   ├── Li0.75
│   └── Li1.00
└── Pseudopotentials
    ├── PseudoDojo
    └── SSSP
-->

# Spinel directory tree

<!--
├── LixMn1.5Ni0.5O4
│   ├── DFT+U
│   │   ├── LiMn1.5Ni0.5O4
│   │   └── Mn1.5Ni0.5O4
│   └── DFT+U+V
│       ├── LiMn1.5Ni0.5O4
│       └── Mn1.5Ni0.5O4
└── LixMn2O4
    ├── DFT+U
    │   ├── LiMn2O4
    │   └── Mn2O4
    └── DFT+U+V
        ├── LiMn2O4
        └── Mn2O4
-->

# Read in fully lithiated phospho-olivine and spinel structures from pw inputs

In [None]:
# project_dir = '/home/jgeiger/projects/bat_uv_ml/'

# # Read in csv with pandas
# iurii_full_df = pd.read_csv(os.path.join(project_dir, 'data', 'iurii_full_data_df.csv'))

In [None]:
iurii_full_data_df.head()

***
# Fully lithiated structures

In [None]:
fully_lithiated_df = iurii_full_data_df.loc[
  # olivine
    (iurii_full_data_df['abs_path_in'].str.contains('LixFePO4/Li1.00/DFT_plus_UV/8_PDOS/LFPO.scf.2.in', regex=False))
  | (iurii_full_data_df['abs_path_in'].str.contains('LixMnPO4/Li1.00/DFT_plus_UV/9_PDOS/LMPO.scf.2.in', regex=False))
  | (iurii_full_data_df['abs_path_in'].str.contains('LixMn0.5Fe0.5PO4/Li1.00/DFT_plus_UV/8_PDOS/LFPO.scf.2.in', regex=False))
  # spinel
  | (iurii_full_data_df['abs_path_in'].str.contains('LixMn1.5Ni0.5O4/DFT+U+V/LiMn1.5Ni0.5O4/8_pdos/LiMn1.5Ni0.5O4.scf.2.in', regex=False))
  | (iurii_full_data_df['abs_path_in'].str.contains('LixMn2O4/DFT+U+V/LiMn2O4/5_pdos/LiMn2O4.scf.2.in', regex=False))
]
fully_lithiated_df.shape
fully_lithiated_df.head()

In [None]:
fully_lithiated_df = fully_lithiated_df.sort_values(by='abs_path_in')
fully_lithiated_df = fully_lithiated_df.reset_index(drop=True)

# ! Replace Mn3u and other special labels with atom types for reading in QE input structure with ASE
fully_lithiated_df['clean_input'] = fully_lithiated_df['abs_path_in'].apply(lambda x: Path(x).read_text())
fully_lithiated_df['clean_input'] = fully_lithiated_df['clean_input'].str.replace('\nM\d[u,d] ', '\nMn ', regex=True)
fully_lithiated_df['clean_input'] = fully_lithiated_df['clean_input'].str.replace('\nO\d ', '\nO ', regex=True)
fully_lithiated_df['clean_input_file'] = fully_lithiated_df['abs_path_in'].apply(lambda x: x.replace('.in', '.clean.in'))
_ = fully_lithiated_df.apply(lambda x: Path(x['clean_input_file']).write_text(x['clean_input']), axis=1)

# ! Actually read in the input files with ASE
fully_lithiated_df['ase_in'] = fully_lithiated_df['clean_input_file'].apply(lambda x: read_espresso_in(x))
fully_lithiated_df['ase_in'] = fully_lithiated_df['ase_in'].apply(lambda x: sort(x, tags=x.numbers))
# devel_df['ase_out'] = devel_df.apply(lambda x: read_espresso_out(os.path.join(x['abs_path'], x['calc_out'])), axis=1)

# ! Get chemical formula and symbols
fully_lithiated_df['chem_formula'] = fully_lithiated_df['ase_in'].apply(lambda x: x.get_chemical_formula())
fully_lithiated_df['chem_symbols'] = fully_lithiated_df['ase_in'].apply(lambda x: x.get_chemical_symbols())

# type(devel_df['ase_out'].values[0])
# ! Convert ase structures to pymatgen
fully_lithiated_df['pmg_in'] = fully_lithiated_df['ase_in'].apply(lambda x: AseAtomsAdaptor.get_structure(x))
# devel_df['pmg_out'] = devel_df['ase_out'].apply(lambda x: AseAtomsAdaptor.get_structure(x))

fully_lithiated_df.shape
show_columns_structures = ['abs_path', 'calc_in', 'calc_out', 'calc_type', 'chem_formula']
fully_lithiated_df[show_columns_structures].head()

In [None]:
# view(fully_lithiated_df['ase_in'].values)

# short_names = ['spinel_LiMnNiO', 'spinel_LiMnO', 'olivine_LiFePO', 'olivine_LiMnFePO', 'olivine_LiMnPO']
# fully_lithiated_df['short_name'] = short_names
# fully_lithiated_df[show_columns_structures].head()
# fully_lithiated_df['calc_in'].values
# fully_lithiated_df

In [None]:
fully_lithiated_df.to_pickle(os.path.join(project_dir, 'data', 'fully_lithiated_df.pkl'))