In [None]:
import matplotlib
# %matplotlib inline
# print("BACKEND: ", matplotlib.get_backend())
# if matplotlib.get_backend() != "macosx":
#   print("Changing backend to macosx")
#   matplotlib.use('macosx')


In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

# matplotlib.use("Agg")

from ase.visualize import view

from ase import Atoms
from ase import build
from ase.optimize import FIRE
from ase.io import read, write
from agox.databases import Database
from agox.environments import Environment
from agox.utils.graph_sorting import Analysis

import numpy as np

In [None]:
# matplotlib.rcParams.update(matplotlib.rcParamsDefault)
plt.rc('text', usetex=True)
plt.rc('font', family='cmr10', size=12)
plt.rcParams["axes.formatter.use_mathtext"] = True

In [None]:
from agox.models.descriptors import SOAP, Voronoi
local_descriptor = local_descriptor = SOAP.from_species(["Si", "Ge"], r_cut=5.0)

graph_descriptor = Voronoi(
    covalent_bond_scale_factor=1.3, n_points=8, angle_from_central_atom=20, environment=None
)


In [None]:
from chgnet.model import CHGNetCalculator
calc = CHGNetCalculator()

In [None]:
Si_bulk = build.bulk("Si", crystalstructure="diamond", a=5.43)
Si_bulk.calc = calc
Si_reference_energy = Si_bulk.get_potential_energy() / len(Si_bulk)
Si_cubic = build.make_supercell(Si_bulk, [[-1, 1, 1], [1, -1, 1], [1, 1, -1]])

Ge_bulk = build.bulk("Ge", crystalstructure="diamond", a=5.65)
Ge_bulk.calc = calc
Ge_reference_energy = Ge_bulk.get_potential_energy() / len(Ge_bulk)
Ge_cubic = build.make_supercell(Ge_bulk, [[-1, 1, 1], [1, -1, 1], [1, 1, -1]])

Si_supercell = build.make_supercell(Si_cubic, [[2, 0, 0], [0, 2, 0], [0, 0, 1]])
Ge_supercell = build.make_supercell(Ge_cubic, [[2, 0, 0], [0, 2, 0], [0, 0, 1]])

Si_surface = build.surface(Si_supercell, indices=(0, 0, 1), layers=2)
Ge_surface = build.surface(Ge_supercell, indices=(0, 0, 1), layers=2)
Si_slab = build.surface(Si_supercell, indices=(0, 0, 1), layers=2, vacuum=12, periodic=True)
Si_slab.calc = calc
Ge_slab = build.surface(Ge_supercell, indices=(0, 0, 1), layers=2, vacuum=12, periodic=True)
Ge_slab.calc = calc

In [None]:
host = build.stack(Si_surface, Ge_surface, axis=2, distance= 5.43/2 + 5.65/2)
host.calc = calc
perfect_match = build.stack(Si_surface, Ge_surface, axis=2, distance= 2.35) #1.295)# 2.35 somehow = 1.295
perfect_match.calc = calc
# host is actually 2.5 layers, so can't be made using ASE

In [None]:
Si_surface_3 = build.surface(Si_supercell, indices=(0, 0, 1), layers=3)
Ge_surface_3 = build.surface(Ge_supercell, indices=(0, 0, 1), layers=3)
Si_slab_3 = build.surface(Si_supercell, indices=(0, 0, 1), layers=3, vacuum=12, periodic=True)
Si_slab_3.calc = calc
Ge_slab_3 = build.surface(Ge_supercell, indices=(0, 0, 1), layers=3, vacuum=12, periodic=True)
Ge_slab_3.calc = calc
perfect_match_3 = build.stack(Si_surface_3, Ge_surface_3, axis=2, distance= 2.35) #1.295)# 2.35 somehow = 1.295
perfect_match_3.calc = calc

In [None]:
print("Perfect match height: ", perfect_match.get_cell()[2])
print("Host height: ", host.get_cell()[2])

In [None]:
area = np.linalg.norm(np.cross(host.get_cell()[0], host.get_cell()[1]))
print("Interface area: ", area)

In [None]:
print("Si energy per atom: ", Si_reference_energy)
print("Ge energy per atom: ", Ge_reference_energy)
print("Si slab energy: ", Si_slab.get_potential_energy())
print("Ge slab energy: ", Ge_slab.get_potential_energy())

In [None]:
Si_slab_vac = read("Si_slab.vasp")
Ge_slab_vac = read("Ge_slab.vasp")
Si_slab_vac.calc = calc
Ge_slab_vac.calc = calc
optimizer = FIRE(Si_slab_vac)
optimizer.run(fmax=0.05, steps=100)
optimizer = FIRE(Ge_slab_vac)
optimizer.run(fmax=0.05, steps=100)

In [None]:
def get_interface_energy(struc, Si_slab, Ge_slab, extra=False):
    energy = struc.get_potential_energy()
    cell = struc.get_cell()
    area = np.linalg.norm(np.cross(cell[0], cell[1]))
    Si_energy = Si_slab.get_potential_energy()
    Ge_energy = Ge_slab.get_potential_energy()
    ## need to subtract remaining silicon and germanium energies also
    if extra:
        return (energy - Si_energy - Ge_energy - 16*(Si_reference_energy + Ge_reference_energy)) / (2.0 * area)
    else:
        return (energy - Si_energy - Ge_energy) / (2.0 * area)

In [None]:
print("perfect match 2 layersenergy: ", get_interface_energy(perfect_match, Si_slab, Ge_slab))
print("perfect match 3 layers energy: ", get_interface_energy(perfect_match_3, Si_slab_3, Ge_slab_3))

In [None]:
seed = 0
identifier = "2"
# min_energy = -3.6635127# -3.7717605425

In [None]:
unrlxd_structures = read("DTMP"+identifier+"/unrlxd_structures_seed"+str(seed)+".traj", index=":")
for structure in unrlxd_structures:
  structure.calc = calc

In [None]:
rlxd_structures = read("DTMP"+identifier+"/rlxd_structures_seed"+str(seed)+".traj", index=":")
for structure in rlxd_structures:
  structure.calc = calc
# min_energy = np.min([structure.get_potential_energy()/len(structure) for structure in rlxd_structures])

In [None]:
SiGe_perfect_match = read("SiGe_perfect_match.vasp")
SiGe_perfect_match.calc = calc
SiGe_perfect_match.set_cell(rlxd_structures[0].get_cell(), scale_atoms=True)
optimizer = FIRE(SiGe_perfect_match)
optimizer.run(fmax=0.05, steps=100)

In [None]:
perfect_en_per_area = get_interface_energy(SiGe_perfect_match, Si_slab_vac, Ge_slab_vac, False)
print("Perfect min energy: ", perfect_en_per_area)

In [None]:
# Calculate energies per atom for each unique structure
unrlxd_en_per_area = [get_interface_energy(structure, Si_slab_vac, Ge_slab_vac, False) for structure in unrlxd_structures]
print("Unrelaxed min energy: ", np.min(unrlxd_en_per_area))

In [None]:
rlxd_en_per_area = [get_interface_energy(structure, Si_slab_vac, Ge_slab_vac, False) for structure in rlxd_structures]
print("Relaxed min energy: ", np.min(rlxd_en_per_area))

In [None]:
from sklearn.decomposition import PCA

In [None]:
pca = PCA(n_components=2)

In [None]:
unrlxd_super_atoms = []
for structure in unrlxd_structures:
  unrlxd_super_atoms.append( np.mean(local_descriptor.get_features(structure), axis=0) )

In [None]:
rlxd_super_atoms = []
for structure in rlxd_structures:
  rlxd_super_atoms.append( np.mean(local_descriptor.get_features(structure), axis=0) )

In [None]:
rlxd_string = "rlxd" # which dataset to fit the PCA model to

In [None]:
# save pca model
import pickle
if True:
  pca.fit(np.squeeze([arr for arr in rlxd_super_atoms]))
  with open("pca_model_all_rlxd_"+str(seed)+".pkl", "wb") as f:
    pickle.dump(pca, f)

# load pca model
with open("pca_model_all_"+rlxd_string+"_0.pkl", "rb") as f:
  pca = pickle.load(f)

In [None]:
perf_super_atom = []
perf_super_atom.append(np.mean(local_descriptor.get_features(SiGe_perfect_match), axis=0))
perf_super_atom.append(np.mean(local_descriptor.get_features(SiGe_perfect_match), axis=0))

In [None]:
unrlxd_X_reduced = pca.transform(np.squeeze([arr for arr in unrlxd_super_atoms]))
rlxd_X_reduced = pca.transform(np.squeeze([arr for arr in rlxd_super_atoms]))
perf_X_reduced = pca.transform(np.squeeze([arr for arr in perf_super_atom]))

In [None]:
min_energy_index = np.argmin(rlxd_en_per_area)
print(min_energy_index)

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8, 6))

plt.subplots_adjust(wspace=0.05, hspace=0)

scaled_unrlxd_ens = [x * 1000 for x in unrlxd_en_per_area]
scaled_rlxd_ens = [x * 1000 for x in rlxd_en_per_area]
scaled_perf_en = perfect_en_per_area * 1000

min_en = max(-0.08*1000, min(np.max(scaled_unrlxd_ens), np.min(scaled_rlxd_ens)))
max_en = min(max(np.max(scaled_unrlxd_ens), np.max(scaled_rlxd_ens)), scaled_perf_en)

axes[0].scatter(unrlxd_X_reduced[:, 0], unrlxd_X_reduced[:, 1], c=scaled_unrlxd_ens, cmap="viridis", vmin = min_en, vmax = max_en)
axes[1].scatter(rlxd_X_reduced[:, 0], rlxd_X_reduced[:, 1], c=scaled_rlxd_ens, cmap="viridis", vmin = min_en, vmax = max_en)
for ax in axes:
  # ax.scatter(rlxd_X_reduced[min_energy_index, 0], rlxd_X_reduced[min_energy_index, 1], s=200, edgecolor='red', facecolor='none', linewidth=2)
  ax.scatter(perf_X_reduced[0, 0], perf_X_reduced[0, 1], s=200, edgecolor='red', facecolor='none', linewidth=2)
  axes[0].scatter(perf_X_reduced[0, 0], perf_X_reduced[0, 1], c=perfect_en_per_area, cmap="viridis", vmin = min_en, vmax = max_en)
  axes[1].scatter(perf_X_reduced[0, 0], perf_X_reduced[0, 1], c=perfect_en_per_area, cmap="viridis", vmin = min_en, vmax = max_en)
  

fig.text(0.5, 0.04, 'Principal Component 1', ha='center', fontsize=15)
axes[0].set_ylabel('Principal Component 2', fontsize=15)
axes[0].set_title('Unrelaxed')
axes[1].set_title('Relaxed')
if rlxd_string == "rlxd":
  xlims = [-2, 5.8]
  ylims = [-1, 2]
else:
  xlims = [-42, 55]
  ylims = [-12, 30]

for ax in axes:
  ax.tick_params(axis='both', direction='in')
  ax.set_xlim(xlims)
  ax.set_ylim(ylims)

# unify tick labels
xticks = axes[0].get_xticks()
xticks = xticks[(xticks >= xlims[0]) & (xticks <= xlims[1])]

axes[1].set_xticks(xticks)
axes[1].set_yticklabels([])
axes[0].tick_params(axis='x', labelbottom=True, top=True)
axes[1].tick_params(axis='x', labelbottom=True, top=True)
axes[0].tick_params(axis='y', labelbottom=True, right=True)
axes[1].tick_params(axis='y', labelbottom=True, right=True)

# Make axes[0] and axes[1] the same width
axes[0].set_box_aspect(1.7)
axes[1].set_box_aspect(1.7)

# Add colorbar next to the axes
cbar = fig.colorbar(axes[1].collections[0], ax=axes, orientation='vertical', fraction=0.085, pad=0.02)
cbar.set_label('Formation energy (meV/Ã…$^2$)', fontsize=15)

plt.savefig('Si-Ge_RAFFLE'+identifier+'_pca_'+rlxd_string+'_fit_seed'+str(seed)+'.pdf', bbox_inches='tight', pad_inches=0, facecolor=fig.get_facecolor(), edgecolor='none')