In [1]:
import numpy as np
import torch

from scipy.optimize       import minimize
from libraries.graph      import graph_POSCAR_encoding, find_closest_key, composition_concentration_from_keys
from torch_geometric.data import Data
from pymatgen.core        import Structure

# Graph embedding

In [2]:
distance_threshold = 5
y = 1

# Whether to care about lattice vectors or not
is_molecule = False

structure = Structure.from_file('POSCAR')

nodes, edges, attributes = graph_POSCAR_encoding(structure,
                                                 distance_threshold=distance_threshold,
                                                 encoding_type='sphere-images')
temp = Data(x=nodes,
            edge_index=edges.t().contiguous(),
            edge_attr=attributes.flatten(),
            y=torch.tensor([y], dtype=torch.float)
           )
temp

Data(x=[12, 4], edge_index=[2, 116], edge_attr=[116], y=[1])

In [3]:
edges   = temp.edge_index.detach().cpu().numpy().T
weights = temp.edge_attr.detach().cpu().numpy()

In [4]:
coordinates = []
for s in structure:
    coordinates.append(s.coords)
coordinates = np.array(coordinates)

In [5]:
lattice_vectors = structure.lattice.matrix
lattice_vectors

array([[4.09533151e+00, 0.00000000e+00, 3.00000000e-16],
       [1.00000000e-15, 8.61134159e+00, 5.00000000e-16],
       [0.00000000e+00, 0.00000000e+00, 1.04710589e+01]])

In [10]:
def worst_identification(edges, attributes):
    n_particles = np.unique(edges)

    particle_errors = []
    for particle in range(n_particles):
        # Get those edge indexes where particle has a connection
        particle_connections = np.where((edges[:, 0] == idx) | (edges[:, 1] == idx))

        particle_error = 0
        for idx in particle_connections:
            # Load indexes in edge
            edge = edges[idx]

            # Load reference attribute
            attr = attributes[idx]

            # Load expected attribute
            p1 = positions[edge[0]]
            p2 = positions[edge[1]]

            # Compute distance
            distance = np.linalg.norm(p2 - p1)

            # Add error
            particle_errors += np.linalg.norm(attr - distance)

        # Average over the conection of the node
        particle_error /= len(particle_connections)

        # Append particle error
        particle_errors.append(particle_errors)
    
    return np.argmax(particle_errors), np.max(particle_errors)

## Molecules

In [11]:
if is_molecule:
    # Initial guess for the positions
    #initial_positions = np.random.rand(len(edges) * 3)  # Initialize all points at origin, 1D array
    initial_positions = coordinates.reshape(-1, 1).ravel()
    
    # Function to calculate the squared difference between distances and weights
    def objective(solution_attempt, edges, weights):
        positions = solution_attempt.reshape(-1, 3)  # Reshape to 2D array
        errors = 0
        for edge, weight in zip(edges, weights):
            p1 = positions[edge[0]]
            p2 = positions[edge[1]]
            distance = np.linalg.norm(p2 - p1)
            errors += np.power(distance - weight, 2)
        print(errors)
        return errors

## Crystals

In [12]:
if not is_molecule:
    # Initial guess for the lattice parameters
    lattice_vectors = np.array([[10, 0,   0],
                                [0,   10, 0],
                                [0,   0,   10]])
    
    # Initial guess for the positions
    solution = np.random.rand(len(edges) * 3)*10  # Initialize all points at origin, 1D array
    #initial_positions = coordinates.reshape(-1, 1).ravel()
    #initial_positions = np.concatenate([lattice_vectors.ravel(), initial_positions])
    
    # Function to calculate the squared difference between distances and weights
    def objective(solution_attempt, edges, weights):
        solution_attempt = solution_attempt.reshape(-1, 3)  # Reshape to 2D array
        
        lattice_vectors = solution_attempt[:3]
        positions       = solution_attempt[3:]
        
        errors = 0
        for edge, weight in zip(edges, weights):
            p1 = positions[edge[0]]
            p2 = positions[edge[1]]
            
            trial_errors = [] 
            for i in [-1, 0, 1]:
                for j in [-1, 0, 1]:
                    for k in [-1, 0, 1]:
                        # i*lattice_vectors[0] + j*lattice_vectors[1] + k*lattice_vectors[2]
                        ijk_lattice_vectors = np.sum([i, j, k] * lattice_vectors.T, axis=1)
                        
                        # Compute error
                        distance = np.linalg.norm(p2 - p1 + ijk_lattice_vectors)
                        
                        # Apend to trial errors for differente atom images
                        trial_errors.append(np.power(distance - weight, 2))
            errors += np.min(trial_errors)
        print(errors)
        return errors

In [None]:
local_error_threshold = 1e-5

for attempt in range(10):
    print()
    print(f'Attempt {attempt}')
    solution = minimize(objective, solution,
                        args=(edges, weights),
                        method='Powell')
    
    worst_particle, worst_error = worst_identification(edges, attributes)
    
    if worst_error > local_error_threshold:
        break
    
    # Re-initialize that position
    idx_0 = 9 + worst_particle
    idx_f = 9 + worst_particle + 3
    solution[idx_0:idx_f] = np.random.rand(3)

# Check convergence status
if solution.success:
    print("Converged to a solution.")
else:
    print("Failed to converge:", solution.message)


Attempt 0
246.4940874542983
246.4940874542983
349.6323293098044
206.56090385786496
206.98188365747183
302.74496301526364
224.84763526208803
206.24259278264893
207.7147835598592
214.0828058872382
207.8132644760347
206.1373351327325
206.9785891883671
206.35530889770428
206.064304872908
206.06879814224038
206.06897196160224
206.064304872908
197.38482664795475
217.71641326436423
202.10109493231968
198.20671234767613
197.19779377293938
197.1742910736859
197.16423545997213
197.26658831812676
197.15727390891183
197.15318458426168
197.15467804671695
197.15318458426168
226.3253130679575
209.58050380039757
200.94183744946451
203.41993522471907
196.56395778966655
196.55747515784373
196.55363285090274
196.55341908189797
196.55349238246373
196.55341908189797
220.08445625205502
264.86940222241947
208.19811583249486
200.33279584356052
196.5251585426423
196.51999523705445
196.51984700142978
196.51985026029854
196.51985044885936
196.51984700142978
209.118602114146
211.66315808404474
196.58771586777686

In [None]:
solution_attempt = solution.x.reshape(-1, 3)  # Reshape to 2D array

## Molecules

In [None]:
if is_molecule:
    # Get the position of each atom in direct coordinates
    #direct_positions = graph_to_cartesian_positions(graph)
    #cartesian_positions = solution.x.reshape(-1, 3)*mw
    #cartesian_positions = solution.x.reshape(-1, 3)
    
    lattice_vectors     = np.array([[10, 0,   0],
                                    [0,   10, 0],
                                    [0,   0,   10]])
    cartesian_positions = solution_attempt

## Crystals

In [28]:
if not is_molecule:
    # Get the position of each atom in direct coordinates
    #direct_positions = graph_to_cartesian_positions(graph)
    #cartesian_positions = solution.x.reshape(-1, 3)*mw
    
    lattice_vectors     = solution_attempt[:3]
    cartesian_positions = solution_attempt[3:]

In [31]:
POSCAR_name = None

# Get name for the first line of the POSCAR
POSCAR_name = POSCAR_name or 'POSCAR from GenerativeModels'

# Clone the input graph to preserve the original structure
new_graph = temp.clone()

# Load and detach embeddings for the graph nodes
data_embeddings = new_graph.x.detach().cpu().numpy()

# Loading dictionary of available embeddings for atoms
available_embeddings = {}
with open('../VASP/atomic_masses.dat', 'r') as atomic_masses_file:
    for line in atomic_masses_file:
        key, mass, charge, electronegativity, ionization_energy = line.split()

        # Check if all information is present
        if all(val != 'None' for val in (mass, charge, electronegativity, ionization_energy)):
            available_embeddings[key] = np.array([mass, charge, electronegativity, ionization_energy], dtype=float)

# Get most similar atoms for each graph node and create a list of keys
keys = [find_closest_key(available_embeddings, emb) for emb in data_embeddings]

# Get elements' composition, concentration, and positions
POSCAR_composition, POSCAR_concentration, POSCAR_positions = composition_concentration_from_keys(keys, cartesian_positions)

In [32]:
# Write file
with open('CONTCAR', 'w') as POSCAR_file:
    # Delete previous data in the file
    POSCAR_file.truncate()
    
    # Write POSCAR's name
    POSCAR_file.write(f'{POSCAR_name}\n')

    # Write scaling factor (assumed to be 1.0)
    POSCAR_file.write('1.0\n')

    # Write lattice parameters (assumed to be orthogonal)
    np.savetxt(POSCAR_file, lattice_vectors, delimiter=' ')

    # Write composition (each different species, previously sorted)
    np.savetxt(POSCAR_file, [POSCAR_composition], fmt='%s', delimiter=' ')

    # Write concentration (number of each of the previous elements)
    np.savetxt(POSCAR_file, [POSCAR_concentration], fmt='%d', delimiter=' ')

    # Write position in cartesian form
    POSCAR_file.write('Cartesian\n')
    np.savetxt(POSCAR_file, POSCAR_positions, delimiter=' ')