# 670raman

In [None]:
DESCRIPTION = """
Panos Manganaris, Kat Nykiel
Simtool for the automated generation of Raman and IR Spectra from QE vc-relax>scf>ph>dynmat pipeline
"""

In [None]:
def copyAndSaveFileAsOutput(outputVariableName,inputPath):
    """ saves an ouput variable as a file at inputpath """
    if inputPath:
        if inputPath.startswith('file://'):
            resultFile = os.path.basename(inputPath[7:])
            if resultFile != inputPath[7:]:
                shutil.copy2(inputPath[7:],resultFile)
        else:
            resultFile = os.path.basename(inputPath)
            if resultFile != inputPath:
                shutil.copy2(inputPath,resultFile)
        db.save(outputVariableName,file=resultFile)

In [None]:
def copyAndSaveFileAsBogusOutput(outputVariableName,inputPath):
    """ saves an output variable as a placeholder file if file does not exist """
    if inputPath:
        if inputPath.startswith('file://'):
            resultFile = os.path.basename(inputPath[7:])
            if resultFile != inputPath[7:]:
                shutil.copy2(inputPath[7:],resultFile)
        else:
            resultFile = os.path.basename(inputPath)
            if resultFile != inputPath:
                shutil.copy2(inputPath,resultFile)
        try:
            db.save(outputVariableName,file=resultFile + "_bogus")
        except FileNotFoundError as e:
            print("%s" % (e.args[0]))

In [None]:
%load_ext yamlmagic

In [None]:
%%yaml INPUTS
# Notebook Monitoring
loglevel:
    type: Choice
    description: python logging module level setting
    options: ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
    value: "WARNING"

# submit resource requests and queue preferences
numnodes:
    type: Number
    description: Number of processors for mpi execution
    value: 1
    min: 1
    max: 8
        
walltime:
    type: Text
    description: Maximum time to wait for reduced job load on cluster in HH:MM:SS time format
    value: "01:00:00"

pps:
    type: List
    description: List of pseudopotentials in UPF format to be used by simulator. Potentials must either exist in ./simtool/pseudo or be accessible at http://www.quantum-espresso.org/wp-content/uploads/upf_files/
    value: ["O.pbe-hgh.UPF", "Zn.pbe-d-hgh.UPF"]
  
# Simulator settings
smearing:
    type: Choice
    description: Setting the extent to which there is metal-like sharing of electrons
    options: ['smearing', 'fixed']
    value: 'fixed'

ecutwfc:
    type: Number
    description: kinetic energy cutoff for wavefunctions
    value: 120
    min: 25
    max: 400
    units: Ry

ecutrho:
    type: Number
    description: kinetic energy cutoff for charge density and potential. Recommended (but not required) to be 4 times the cutoff for wavefunctions.
    value: 480
    min: 100
    max: 1600
                
kpoints:
    type: Number
    description: magnitude of kpoint density, which will be scaled according to the lattice vectors
    value: 6
    min: 1
    max: 20

epsil:
    type: Boolean
    description: true for solid crystals, false for molecules
    value: True
        
lraman:
    type: Boolean
    description: compute raman tensor with gga functionals when true, LDA functionals when false
    value: True


# POSCAR string taken from frontend
struct_dict:
    type: Dict
    description: User selected structure as dictionary

In [None]:
%%yaml OUTPUTS

spectra:
    type: Dict
    description: Dictionary of pandas Series of dynamat.x output containing mode numbers and frequencies of IR and Raman Spectra
        
logreport:
    type: Text
    description: contents of the run logfile populated according to loglevel

In [None]:
EXTRA_FILES = ["./pseudo"]
# add the pp repository files to the simtool directory so they can be found automatically
# find a way to copy new pseudopotentials to the simtool home so network access is not always needed.

In [None]:
#parameters: define default values which can be overwritten at runtime -- parameters should NOT be interdependent
from simtool import getValidatedInputs

defaultInputs = getValidatedInputs(INPUTS)
if defaultInputs:
    globals().update(defaultInputs)
#papermill adds injected parameters sell after this cell containing run settings
#Don't intake passwords/keys/private info -- can't seem to delete from RUNS

## Translate User Choices to Backend Variables as Necessary

In [None]:
import logging
vars()["DEBUG"] = logging.DEBUG
vars()["INFO"] = logging.INFO
vars()["WARNING"] = logging.WARNING
vars()["ERROR"] = logging.ERROR
vars()["CRITICAL"] = logging.CRITICAL
logfmt = '[%(levelname)s] %(asctime)s - %(message)s'
logging.basicConfig(filename='run.log', level=vars()[loglevel], datefmt="%Y-%m-%d %H:%M:%S", format=logfmt)

## define simtool dependencies

In [None]:
#cli utilities
import io
import shutil
import subprocess

#nanohub utilities
import hublib.use
import fileinput
from simtool import DB, parse

#automate retreival of crystal structures and pseudopotentials from Materials Project
from pymatgen.core import Structure, Lattice, Element, Composition
import openbabel # automates translation of pymatgen structs to useful file formats
#process and categorize crystal structures

#misc tools
import math
import pandas as pd
import numpy as np
np.set_printoptions(formatter={'float': '{: 0.16f}'.format}, suppress=False)

#%use espresso-6.2.1
#Apparently not functioning? sufficient to declare espresso-6.8 in submit call

## Validating/Correcting User Defined Chemistry Inputs

In [None]:
# convert poscar back to structure object
#TODO we want to test resulting periodicsites objects for significant digits. log and WARN user of insufficient precisions + advise to set nosym parameter to TRUE in that case.
struct = Structure.from_dict(struct_dict)
logging.debug(f"the struct is made! {struct.sites}")

In [None]:
#get compound from poscar
compound = struct.formula

logging.info(f"User Entered: {compound}")
vcomp = Composition.ranked_compositions_from_indeterminate_formula(compound, lock_if_strict=True)
logging.info(f"User likely refering to one of these compositions: {vcomp}")
if isinstance(vcomp, list) and vcomp:
    vcomp = vcomp[0] #ordered by likelyhood so pick best candidate
    logging.debug(f"vcomp is type {type(vcomp)}")
    if not vcomp.valid:
        logging.info(f"the input composition contains dummy species")
else:
    raise ValueError(f"The compound: {compound} is not recognizable as a valid chemical formula")

In [None]:
#Obtain Specified pps
# We don't seem to have network access permissions in slurm, so this doesn't work 
#for pp in pps:
#    COMMANDwgetpp = f"wget -O ./pseudo/{pp} http://www.quantum-espresso.org/wp-content/uploads/upf_files/{pp}"
#    subprocess.run(COMMANDwgetpp.split())
# Attempt to generate pseudopotentials on the spot?
# pmg potcar --symbols Li_sv O --functional PBE #Can potcar be converted to UPF format?

In [None]:
# extract element key from pps
pppart_list = []
for pp in pps:
    ppparts = pp.split('.')
    pppart_list.append(ppparts[0])
    logging.debug(f"from {pp} got {ppparts[0]}")
#sort formula elements and atomic mass by decending atomic mass
amass_ar = np.array([Element(el).atomic_mass for el in vcomp.to_reduced_dict.keys()])
massive_first = np.argsort(amass_ar).tolist()[::-1]
amass_falling = amass_ar[massive_first]
el_falling = np.array([el for el in vcomp.to_reduced_dict.keys()])[massive_first]
logging.debug(f"Elements in order of decending mass: {el_falling}. Decending mass: {amass_falling}")
#sort pps to match
pps_amass_ar = np.array([Element(el).atomic_mass for el in pppart_list])
pps_massive_first = np.argsort(pps_amass_ar).tolist()[::-1]
pps_falling = np.array(pps)[pps_massive_first]
logging.debug(f"Pseudopotentials in order of decending mass: {pps_falling}")

if len(pps_falling) != len(el_falling):
    logging.warning(f"There are not as many Pseudopotentials as Species. This may not work")

## Obtain Crystal Structure(s) of Compound Unit Cell from Database
#### Obtain standard and conventional sim cell from Structure(s) 
### or
## Generate from Space Group, Lattice, Elements, and Factional Coords
### or
## Take the User's input as gospel

In [None]:
#Create cell parameters block
bio = io.BytesIO()
np.savetxt(bio, struct.lattice.matrix, fmt="%0.8f")# to much precision in the input files causes convergence issues. to little causes symmetry issues.
cell_parameters_block = bio.getvalue().decode('latin1')
logging.info(f"""The standardized conventional lattice parameters are obtained:
            {cell_parameters_block}""")

In [None]:
#pw's atomic Species block and dm's amass_block
atomic_species_block = """"""
for pp, amass, species in zip(pps_falling, amass_falling, el_falling):
    block_line = f"{species} {amass} {pp}\n"
    atomic_species_block += block_line
logging.info(f"""The atomic_species_block:
{atomic_species_block}""")

amass_block = """"""
for ind, amass in enumerate(amass_falling):
    block_line = f"amass({ind+1})={amass},\n  "
    amass_block += block_line
ntyp_num = ind # this gives the wrong answer?
ntyp_num = len(np.unique([''.join([i for i in str(i.species) if i.isalpha()]) for i in struct.sites]))
logging.debug(f"ntyp = {ntyp_num}")
logging.info(f"""The amass_block:
{amass_block}""")

In [None]:
def get_fractional_coords_as_qe_array(periodicsitesobj):
    at_site = np.array([str(*periodicsitesobj.species.to_reduced_dict.keys()), periodicsitesobj.a, periodicsitesobj.b, periodicsitesobj.c])
    logging.debug(f"atomic sites array: {at_site}")
    # str might break if the species has whitespaces in it... I think. Or maybe that's just for unpacked lists
    return at_site

nat_num = len(struct.sites)
atomic_positions_block = np.array(list(map(lambda x: get_fractional_coords_as_qe_array(x), struct.sites)))
bio = io.BytesIO()
np.savetxt(bio, atomic_positions_block, fmt="%s", encoding="latin1")
atomic_positions_block = bio.getvalue().decode('latin1')
logging.info(f"""The atomic sites are obtained:
            {atomic_positions_block}""")

In [None]:
# generate a k-point grid from the provided kpoint magnitude and lattice vectors
b = np.array([1/a for a in struct.lattice.abc])
b = b*(1/max(b))
k = np.ceil(kpoints*b)
kpoints_block = f"""{k[0]} {k[1]} {k[2]} 0 0 0"""

## Prepare input files to simulation pipline in quantum espresso .in format

#### insert validated inputs into the predetermined format strings:

In [None]:
vcrelax_input = f"""
 &control
    calculation='vc-relax',
    restart_mode='from_scratch',
    prefix='{vcomp.reduced_formula}',
    outdir='./',
    pseudo_dir = './',
    etot_conv_thr=1.0d-6,
    forc_conv_thr=1.0d-6,
    nstep=100,
/
 &system    
    ibrav= 0, celldm(1)=1 ,nat= {nat_num}, ntyp= {ntyp_num},
    ecutwfc = {ecutwfc}, ecutrho = {ecutrho},
    occupations={smearing}, {"smearing='mp', degauss=0.06," if smearing == 'smearing' else ""}
    nosym = TRUE,
/
 &electrons
    mixing_beta =0.7,
    conv_thr =1.0d-8,
/
 &ions
    ion_dynamics='bfgs'
/
&cell
    cell_dynamics='bfgs',
    press=0.0,
    cell_factor=4.0,
    press_conv_thr=0.5,
/
CELL_PARAMETERS (alat= 1.00000000)
{cell_parameters_block}
ATOMIC_SPECIES
{atomic_species_block}
ATOMIC_POSITIONS (crystal)
{atomic_positions_block}
K_POINTS (automatic)
{kpoints_block}
"""

ph_input = f"""
Normal modes for {vcomp.reduced_formula}
 &inputph
  tr2_ph=1.0d-14,
  prefix='{vcomp.reduced_formula}',
  {amass_block}outdir='./'
  epsil=.{epsil}.,
  lraman=.{lraman}.,
  trans=.true.,
  asr=.true.,
  fildyn='dmat.{vcomp.reduced_formula}'
  ! ldisp=.true.
 /
 0.0 0.0 0.0
"""

dm_input = f"""
&input fildyn='dmat.{vcomp.reduced_formula}', asr='zero-dim' /
"""
logging.debug(f"{ph_input}")

## Write Input Files. Name Output Files. Prepare to Assign Validated Outputs

In [None]:
# fixed starting point input file generation
vcr_input_file = open(f"{vcomp.reduced_formula}.vc-relax.in", "w")
ph_input_file = open(f"{vcomp.reduced_formula}.ph.in", "w")
dm_input_file = open(f"{vcomp.reduced_formula}.dm.in", "w")

vcr_input_file.write(vcrelax_input)
ph_input_file.write(ph_input)
dm_input_file.write(dm_input)

vcr_input_file.close()
ph_input_file.close()
dm_input_file.close()

In [None]:
# output file generation
vcr_output_file = open(f"{vcomp.reduced_formula}.vc-relax.out", "w")
scf_output_file = open(f"{vcomp.reduced_formula}.scf.out", "w")
ph_output_file = open(f"{vcomp.reduced_formula}.ph.out", "w")
#dmat_file = open(f"dmat.{vcomp.reduced_formula}", "w")
dm_output_file = open(f"{vcomp.reduced_formula}.dm.out", "w")

In [None]:
db = DB(OUTPUTS)

## Prepare Psudopotential args for Run

In [None]:
pp_args = ""
for pp in pps:
    pp_args += f"-i pseudo/{pp} "

## Assemble Job Commands and Sequentially Submit to Cluster
#### First perform variable cell relaxation and receive optimized structure

In [None]:
COMMANDvcr = f"espresso-6.8_pw > {vcr_output_file.name}"
SUBMITvcr = f"submit -n {numnodes} -w {walltime} -e QE_DISABLE_GGA_PBE=0 --runName {vcomp.reduced_formula}vcr {COMMANDvcr} {pp_args} -i {vcr_input_file.name} "
logging.info("reached cell relaxation...")
spvcr = subprocess.run(SUBMITvcr.split(), capture_output=True, text=True)
spvcr_out = """""".join(spvcr.stdout)
spvcr_err = """""".join(spvcr.stderr)
logging.debug(" ".join(spvcr.args))
logging.info(f"""\nprocess output:\n{spvcr_out}\n""")
logging.debug(f"""\nprocess err out:\n{spvcr_err}\n""")
vcr_output_file.close()
#db.save('vcrstdout', spvcr) #cannot save artbitrary objects as outputs.

# Extract the new structure from the vc-relax calculation

In [None]:
# extract structure data from vc-relax.out and save to scf input
matrix_start = "CELL_PARAMETERS"
atpos_start = "ATOMIC_POSITIONS"
atpos_end = "End final coordinates"
matrix_inds = []
atpos_inds = []
with open(f"{vcr_output_file.name}", "rt") as vcrout:
    vcr_lines = vcrout.readlines()
    
#raise exception for bad file
if not vcr_lines:
    raise IndexError("vcrelax calculation did not output anything to file. It probably failed to run")
else:
    preamble = "".join(vcr_lines[14:40])
    logging.debug(f"""The first meaningful lines of vc relaxation output are: {preamble}\n""")

try:
    # Getting structure block line locations in the file
    for ind, line in enumerate(vcr_lines):
        if matrix_start in line:
            logging.debug(f"adding line {ind} to cell matrix reference locations")
            matrix_inds.append(ind)  
        if atpos_start in line:
            logging.debug(f"adding line {ind} to site reference locations")
            atpos_inds.append(ind)
        if atpos_end in line:
            logging.debug(f"line {ind} is the end of structure info")
            end = ind
    
    # get the new cell parameters
    relaxed_cell_parameters_block = """"""
    for line in [lines for lines in vcr_lines[matrix_inds[-1]+1:atpos_inds[-1]-1]]:
        logging.debug("looping though final cell matrix lines")
        relaxed_cell_parameters_block += line
    
    # get the new atomic positions
    if end:
        relaxed_atomic_positions_block = """"""
        for line in [lines for lines in vcr_lines[atpos_inds[-1]+1:end]]:
            logging.debug("looping though final atomic sites lines")
            relaxed_atomic_positions_block += line
    else:
        logging.error("Variable Cell Relaxation Failed to Converge! Try increasing ecutwfc to at least 200. The calculation is proceeding with most relaxed structure found.")
        end_lines_list = [line for line in vcr_lines[atpos_inds[-1]+1:-1]]
        relaxed_atomic_positions_block = """"""
        for line in end_lines_list:
            if line != "\n" :
                logging.debug("looping though final atomic sites lines")
                relaxed_atomic_positions_block += line                 
                 
    logging.info(f"""The Lattice Parameters after relaxation are:
    {relaxed_cell_parameters_block}""")
    logging.info(f"""The atomic sites after relaxation are:
    {relaxed_atomic_positions_block}""")
except:
    raise ValueError(f"{vcr_output_file.name} contains lines, but no reference to a relaxed structure. pw.x failed for some reason")

scf_input = f"""
&CONTROL
  calculation  = "scf",
  prefix       = "{vcomp.reduced_formula}",
  pseudo_dir   = "./",
  outdir       = "./",
/
&SYSTEM
  ibrav=0, celldm(1) =1, {nat_num}, ntyp= {ntyp_num},
  occupations={smearing}, {"smearing='marzari-vanderbilt', degauss=0.02," if smearing == 'smearing' else ""}
  ecutwfc ={ecutwfc},
/
&ELECTRONS
  mixing_mode='plain'
  mixing_beta = 0.5,
  startingwfc='random',
  conv_thr =  1.0d-8
/
CELL_PARAMETERS (alat= 1.00000000)
{relaxed_cell_parameters_block}
ATOMIC_SPECIES
{atomic_species_block}
ATOMIC_POSITIONS (crystal)
{relaxed_atomic_positions_block}
K_POINTS (automatic)
{kpoints_block}
"""

# write to the file
scf_input_file = open(f"{vcomp.reduced_formula}.scf.in", "w")
scf_input_file.write(scf_input)
scf_input_file.close()

#### Next perform self consistent field calculation with new structure to optimize wavefunction

In [None]:
COMMANDscf = f"espresso-6.8_pw > {scf_output_file.name}"
SUBMITscf = f"submit -n {numnodes} -w {walltime} -e QE_DISABLE_GGA_PBE=0 --runName {vcomp.reduced_formula}scf {COMMANDscf} {pp_args} -i {scf_input_file.name}"
logging.info("reached self consistent field calculation...")
spscf = subprocess.run(SUBMITscf.split(), capture_output=True, text=True)
spscf_out = """""".join(spscf.stdout)
spscf_err = """""".join(spscf.stderr)
logging.debug(" ".join(spscf.args))
logging.info(f"""\nprocess output:\n{spscf_out}\n""")
logging.debug(f"""\nprocess err out:\n{spscf_err}\n""")
scf_output_file.close()
#db.save('scfstdout', spscf)

#### Next compute vibrational frequencies
ph.x takes as inputs:
1. compound.ph.in file
2. compound.scf.out file

produces outputs:
1. compound.ph.out file
2. dmat.compound file

In [None]:
COMMANDph = f"espresso-6.8_ph > {ph_output_file.name}"
extra_inargs = f"-i {vcomp.reduced_formula}.xml -i {vcomp.reduced_formula}.save"
SUBMITph = f"submit -n {numnodes} -w {walltime} -e QE_DISABLE_GGA_PBE=0 --runName {vcomp.reduced_formula}ph {extra_inargs} {COMMANDph} -in {ph_input_file.name} {pp_args}"
logging.info("reached phonon calculation...")
spph = subprocess.run(SUBMITph.split(), capture_output=True, text=True)
spph_out = """""".join(spph.stdout)
spph_err = """""".join(spph.stderr)
logging.debug(" ".join(spph.args))
logging.info(f"""\nprocess output:\n{spph_out}\n""")
logging.debug(f"""\nprocess err out:\n{spph_err}\n""")
ph_output_file.close()
#db.save('phstdout', spph)

#### Next extract phonon spectra
dynmat.x takes as inputs:
1. compound.dm.in
2. dmat.compound

dmat.compound may be malformed... Why this happens, even amongst compositoins in the same spacegroup simply with variably defined unit cells is unknown to me.

produces output:
1. compound.dm.out

This output contains the spectrum tensor and can be used to plot the spectrum

In [None]:
try:
    with open(f"dmat.{vcomp.reduced_formula}", 'r') as dynmat:
        dmtext = dynmat.read()
    logging.info(dmtext)
except:
    logging.critical(f"dmat.{vcomp.reduced_formula} is empty. ph.x likely crashed")

In [None]:
COMMANDdm = f"espresso-6.8_dynmat > {dm_output_file.name}"
extra_inargs = f"-i dmat.{vcomp.reduced_formula}"
SUBMITdm = f"submit -n {numnodes} -w {walltime} -e QE_DISABLE_GGA_PBE=0 --runName {vcomp.reduced_formula}dm {extra_inargs} {COMMANDdm} -in {dm_input_file.name}" 
logging.info("reached dynamical matrix calculation...")
spdm = subprocess.run(SUBMITdm.split(), capture_output=True, text=True)
spdm_out = """""".join(spdm.stdout)
spdm_err = """""".join(spdm.stderr)
logging.debug(" ".join(spdm.args))
logging.info(f"""\nprocess output:\n{spdm_out}\n""")
logging.debug(f"""\nprocess err out:\n{spdm_err}\n""")
#dmat_file.close() #might be overwritting the qe's attempt to output this file automatically?
dm_output_file.close()
#db.save('dmstdout', spdm)

## Parse Output Files for Declared Results

In [None]:
with open(f"{dm_output_file.name}", "rt") as resultfile:
    results = resultfile.readlines()

try:
    results_start = "mode"
    results_end = "DYNMAT"
    for ind, line in enumerate(results):
        if results_start in line:
            start = ind
        if results_end in line:
            end = ind
    
    spectra_data = "".join(results[start:end]).replace("#"," ")

    logging.info(f"""The Predicted Spectrographs for {sa.get_crystal_system()} {vcomp.reduced_formula} are: {spectra_data}""")
except:
    logging.error(f"{dm_output_file.name} may not contain a modes and frequencies card. dynmat.x likely failed to produce it. dmat.{vcomp.reduced_formula} may be malformed")
    pass

### Assign results to output variables/hardcopy plot files

In [None]:
try:
    result_stream = io.StringIO(spectra_data)
    result_stream.seek(0)
    spectradf = pd.read_csv(result_stream, error_bad_lines=False, sep="\s+|\t+|\s+\t+|\t+\s")
except:
    pass

In [None]:
with open("run.log", 'r') as logfile:
    logtext = logfile.read()
db.save('logreport', logtext)

In [None]:
try: 
    db.save('spectra', spectradf.to_dict())
except:
    raise ValueError(f"No spectra dataframe was produced from processing {dm_output_file.name}")