In [1]:
import numpy as np
import torch

from scipy.optimize       import minimize
from libraries.graph      import graph_POSCAR_encoding, find_closest_key, composition_concentration_from_keys
from torch_geometric.data import Data
from pymatgen.core        import Structure

# Graph embedding

In [2]:
distance_threshold = 5
y = 1

# Whether to care about lattice vectors or not
is_molecule = False

structure = Structure.from_file('POSCAR')

nodes, edges, attributes = graph_POSCAR_encoding(structure,
                                                 distance_threshold=distance_threshold,
                                                 encoding_type='sphere-images')
temp = Data(x=nodes,
            edge_index=edges.t().contiguous(),
            edge_attr=attributes.flatten(),
            y=torch.tensor([y], dtype=torch.float)
           )
temp

Data(x=[72, 4], edge_index=[2, 601], edge_attr=[601], y=[1])

In [3]:
edges = temp.edge_index.detach().cpu().numpy().T
weights = temp.edge_attr.detach().cpu().numpy()

In [4]:
coordinates = []
for s in structure:
    coordinates.append(s.coords)
coordinates = np.array(coordinates)

In [14]:
lattice_vectors = structure.lattice.matrix
lattice_vectors

array([[ 1.26111574e+01,  1.12000000e-06,  4.49000000e-05],
       [ 1.73000000e-06,  1.71582865e+01, -2.53000000e-06],
       [ 3.68000000e-05, -1.57000000e-06,  1.02820259e+01]])

In [ ]:
mw = np.max(weights)
weights /= mw

## Molecules

In [36]:
if is_molecule:
    # Initial guess for the positions
    #initial_positions = np.random.rand(len(edges) * 3)  # Initialize all points at origin, 1D array
    initial_positions = coordinates.reshape(-1, 1).ravel()
    
    # Function to calculate the squared difference between distances and weights
    def objective(solution_attempt, edges, weights):
        positions = solution_attempt.reshape(-1, 3)  # Reshape to 2D array
        errors = 0
        for edge, weight in zip(edges, weights):
            p1 = positions[edge[0]]
            p2 = positions[edge[1]]
            distance = np.linalg.norm(p2 - p1)
            errors += np.power(distance - weight, 2)
        print(errors)
        return errors

## Crystals

In [20]:
if not is_molecule:
    # Initial guess for the lattice parameters
    #lattice_vectors = np.array([[10, 0,   0],
    #                            [0,   10, 0],
    #                            [0,   0,   10]])
    
    # Initial guess for the positions
    #initial_positions = np.random.rand(len(edges) * 3)  # Initialize all points at origin, 1D array
    initial_positions = coordinates.reshape(-1, 1).ravel()
    initial_positions = np.concatenate([lattice_vectors.ravel(), initial_positions])
    
    # Function to calculate the squared difference between distances and weights
    def objective(solution_attempt, edges, weights):
        solution_attempt = solution_attempt.reshape(-1, 3)  # Reshape to 2D array
        
        lattice_vectors = solution_attempt[:3]
        positions       = solution_attempt[3:]
        
        errors = 0
        for edge, weight in zip(edges, weights):
            p1 = positions[edge[0]]
            p2 = positions[edge[1]]
            
            trial_errors = [] 
            for i in [-1, 0, 1]:
                for j in [-1, 0, 1]:
                    for k in [-1, 0, 1]:
                        distance = np.linalg.norm(p2 - p1 + i*lattice_vectors[0] + j*lattice_vectors[1] + k*lattice_vectors[2])
                        trial_errors.append(np.power(distance - weight, 2))
            errors += np.min(trial_errors)
        print(errors)
        return errors

In [21]:
solution = minimize(objective, initial_positions, args=(edges, weights),
                    method='BFGS')

# Check convergence status
if solution.success:
    print("Converged to a solution.")
else:
    print("Failed to converge:", solution.message)

6.604177703993856e-12
6.56233627515979e-12
6.5948222113814624e-12
6.602724742752044e-12
6.5932061521974975e-12
6.631913271403357e-12
6.580616309904989e-12
6.584455972164725e-12
6.6066484848124674e-12
6.600467884021327e-12
6.60115417761194e-12
6.613532875846068e-12
6.606092094869148e-12
6.616706697756342e-12
6.611462576261172e-12
6.612212634042032e-12
6.608056616493552e-12
6.600515307479737e-12
6.601788817892398e-12
6.6034179924662375e-12
6.619113336698921e-12
6.605169394477525e-12
6.598668946206259e-12
6.616671961563445e-12
6.602828742408581e-12
6.609609898637243e-12
6.590733218734596e-12
6.603199488348382e-12
6.616641675812487e-12
6.603498885774718e-12
6.6128776926936525e-12
6.589759810335421e-12
6.594300272670184e-12
6.606228015525815e-12
6.593190543566458e-12
6.6067064411653425e-12
6.607090111909992e-12
6.606641691353699e-12
6.604984522447734e-12
6.600715779893707e-12
6.6080317695819766e-12
6.60726547355155e-12
6.617950072229961e-12
6.613821481925049e-12
6.599147178451467e-12
6.6111

In [27]:
solution_attempt = solution.x.reshape(-1, 3)  # Reshape to 2D array

## Molecules

In [ ]:
if is_molecule:
    # Get the position of each atom in direct coordinates
    #direct_positions = graph_to_cartesian_positions(graph)
    #cartesian_positions = solution.x.reshape(-1, 3)*mw
    #cartesian_positions = solution.x.reshape(-1, 3)
    
    lattice_vectors     = np.array([[10, 0,   0],
                                    [0,   10, 0],
                                    [0,   0,   10]])
    cartesian_positions = solution_attempt

## Crystals

In [28]:
if not is_molecule:
    # Get the position of each atom in direct coordinates
    #direct_positions = graph_to_cartesian_positions(graph)
    #cartesian_positions = solution.x.reshape(-1, 3)*mw
    
    lattice_vectors     = solution_attempt[:3]
    cartesian_positions = solution_attempt[3:]

In [31]:
POSCAR_name = None

# Get name for the first line of the POSCAR
POSCAR_name = POSCAR_name or 'POSCAR from GenerativeModels'

# Clone the input graph to preserve the original structure
new_graph = temp.clone()

# Load and detach embeddings for the graph nodes
data_embeddings = new_graph.x.detach().cpu().numpy()

# Loading dictionary of available embeddings for atoms
available_embeddings = {}
with open('../VASP/atomic_masses.dat', 'r') as atomic_masses_file:
    for line in atomic_masses_file:
        key, mass, charge, electronegativity, ionization_energy = line.split()

        # Check if all information is present
        if all(val != 'None' for val in (mass, charge, electronegativity, ionization_energy)):
            available_embeddings[key] = np.array([mass, charge, electronegativity, ionization_energy], dtype=float)

# Get most similar atoms for each graph node and create a list of keys
keys = [find_closest_key(available_embeddings, emb) for emb in data_embeddings]

# Get elements' composition, concentration, and positions
POSCAR_composition, POSCAR_concentration, POSCAR_positions = composition_concentration_from_keys(keys, cartesian_positions)

In [32]:
# Write file
with open('CONTCAR', 'w') as POSCAR_file:
    # Delete previous data in the file
    POSCAR_file.truncate()
    
    # Write POSCAR's name
    POSCAR_file.write(f'{POSCAR_name}\n')

    # Write scaling factor (assumed to be 1.0)
    POSCAR_file.write('1.0\n')

    # Write lattice parameters (assumed to be orthogonal)
    np.savetxt(POSCAR_file, lattice_vectors, delimiter=' ')

    # Write composition (each different species, previously sorted)
    np.savetxt(POSCAR_file, [POSCAR_composition], fmt='%s', delimiter=' ')

    # Write concentration (number of each of the previous elements)
    np.savetxt(POSCAR_file, [POSCAR_concentration], fmt='%d', delimiter=' ')

    # Write position in cartesian form
    POSCAR_file.write('Cartesian\n')
    np.savetxt(POSCAR_file, POSCAR_positions, delimiter=' ')