In [10]:
import os
from ase.io import read
from ase.db import connect

# Path to the folder containing .txt files
txt_folder = "/root/GeoLdm/analyzed_molecules"

# Path to the output ASE database
output_db = "/root/GeoLdm/GeoLDM.db"

# Initialize the ASE database
with connect(output_db, use_lock_file=False) as db:
    # Iterate through all files in the directory
    for file_name in os.listdir(txt_folder):
        if file_name.endswith(".txt"):
            txt_file = os.path.join(txt_folder, file_name)
            print(f"Processing: {txt_file}")
            try:
                # Read the .txt file into an ASE Atoms object
                atoms = read(txt_file, format="xyz")  # Use "xyz" format for XYZ-like data in .txt files

                # Check if valid Atoms object
                if not atoms or len(atoms) == 0:
                    print(f"Skipped {file_name}: No atoms found.")
                    continue

                # Get the chemical formula
                formula = atoms.get_chemical_formula()

                # Add to ASE database with metadata
                db.write(atoms, data={"chemical_formula": formula})
                print(f"Added {formula} from {file_name}.")
            except Exception as e:
                print(f"Failed to process {file_name}: {e}")

# Verify database
with connect(output_db) as db:
    count = len(list(db.select()))
    print(f"Database contains {count} entries.")



Processing: /root/GeoLdm/analyzed_molecules/molecule_18748.txt
Added C8H16O from molecule_18748.txt.
Processing: /root/GeoLdm/analyzed_molecules/molecule_11965.txt
Added C6H10O3 from molecule_11965.txt.
Processing: /root/GeoLdm/analyzed_molecules/molecule_19588.txt
Added C7H10O2 from molecule_19588.txt.
Processing: /root/GeoLdm/analyzed_molecules/molecule_14807.txt
Added C6H8O3 from molecule_14807.txt.
Processing: /root/GeoLdm/analyzed_molecules/molecule_11344.txt
Added C8H7N from molecule_11344.txt.
Processing: /root/GeoLdm/analyzed_molecules/molecule_4933.txt
Added C8H10O2 from molecule_4933.txt.
Processing: /root/GeoLdm/analyzed_molecules/molecule_11291.txt
Added C6H13NO from molecule_11291.txt.
Processing: /root/GeoLdm/analyzed_molecules/molecule_373.txt
Added C7H16O3 from molecule_373.txt.
Processing: /root/GeoLdm/analyzed_molecules/molecule_1001.txt
Added C8H13NO from molecule_1001.txt.
Processing: /root/GeoLdm/analyzed_molecules/molecule_10371.txt
Added C8H8O from molecule_10371