In [None]:
import os
import sys
import numpy as np
# import psycopg2
import sqlalchemy
from tqdm.notebook import tqdm
tqdm.pandas()
import pandas as pd
from collections import defaultdict
from collections import Counter

import matplotlib.pyplot as plt
import seaborn as sns
sns.set(
    context='paper',
    font_scale=8/8.8,
#     context="talk",
    style='ticks',
    color_codes=True,
    rc={'legend.frameon': False})

plt.rcParams['svg.fonttype'] = 'none'

%matplotlib inline

In [None]:
print(np.__version__)
print(pd.__version__)

In [None]:
from pymatgen.core import Composition
from pymatgen.analysis.phase_diagram import PhaseDiagram, PDEntry

In [None]:
os.chdir('../')

In [None]:
print(os.getcwd())
sys.path.append('../../')
import rlmolecule
from rlmolecule.sql.run_config import RunConfig
from rlmolecule.sql import Base, Session
from rlmolecule.sql.tables import GameStore, RewardStore, StateStore
from rlmolecule.crystal.crystal_state import CrystalState
from rlmolecule.crystal import ehull
from rlmolecule.crystal.ehull import fere_entries

In [None]:
rewards_file = "/projects/rlmolecule/jlaw/crystal_outputs/2022-07-05/self_consistent_decomp_energies.csv"
df = pd.read_csv(rewards_file)
print(len(df))
df.head(2)

In [None]:
len(df[df.decomp_energy < -.1])

In [None]:
ax = df[(df.decomp_energy > -.5) & (df.decomp_energy < .5)].decomp_energy.hist(bins=50)
ax.set_xticks(np.arange(-.5, .6, .1))
plt.show()

In [None]:
df.decomp_energy.hist(bins=30)

## Double check the decomposition for some structures

In [None]:
def setup_competing_phases(competing_phases_files):
    if not isinstance(competing_phases_files, list):
        competing_phases_files = [competing_phases_files]
    all_competing_phases = [load_competing_phases(f) for f in competing_phases_files]

    # also add the individual elements
    competing_phases = pd.concat([pd.Series(fere_entries)] + all_competing_phases).reset_index()[0]
    return competing_phases


def load_competing_phases(competing_phases_file):
    print(f"Reading {competing_phases_file}")
    df = pd.read_csv(competing_phases_file)
    print(f"\t{len(df)} lines")
    print(df.head(2))

    assert ('sortedformula' in df.columns or 'comp' in df.columns) \
        and ('energyperatom' in df.columns or 'predicted_energy' in df.columns)
    if 'sortedformula' not in df.columns:
        df.rename(columns={'comp': 'sortedformula'}, inplace=True)
    if 'energyperatom' not in df.columns:
        df.rename(columns={'predicted_energy': 'energyperatom'}, inplace=True)
    print("columns after renaming:", df.columns)

    df['energy'] = (
        df.energyperatom *
        df.sortedformula.apply(lambda x: Composition(x).num_atoms)
    )
    # convert the dataframe to a list of PDEntries used to create the convex hull
    pd_entries = df.apply(
        lambda row: PDEntry(Composition(row.sortedformula),
                            row.energy),
        axis=1
    )
    print(f"\t{len(pd_entries)} entries")
    return pd_entries

In [None]:
competing_phases_files = [
    "/projects/rlmolecule/jlaw/rlmolecule/examples/crystal_energy/inputs/competing_phases.csv",
    "/projects/rlmolecule/jlaw/crystal_outputs/2022-07-05/best_decor_rewards.csv.gz",
]

In [None]:
competing_phases = setup_competing_phases(competing_phases_files)

In [None]:
df[df.decor_id == "Li1Ge1O1N1|_1_1_1_1|orthorhombic|icsd_183047|2"]

In [None]:
comp = Composition("Li1Ge1O1N1")
entry = PDEntry(comp, -24.524)
entry

In [None]:
elements = set(comp.elements)
curr_entries = [e for e in competing_phases if len(set(e.composition.elements) - elements) == 0
                             and e != entry]
curr_entries

In [None]:
phase_diagram = PhaseDiagram(curr_entries, elements=elements)

In [None]:
decomp, decomp_energy = phase_diagram.get_decomp_and_e_above_hull(
    entry,
    allow_negative=True,
    # docs say: "if you have a huge proportion of unstable entries,
    # then this check can slow things down."
    check_stable=False)

In [None]:
print(comp, decomp_energy)
decomp