In [1]:
import os
from ase.io import read

# 数据集根路径
root_dir = '/data/home/sczc579/run/wy/2d_work_func/dataset/data/2d_POSCAR'

# 用于保存结构的列表
structures_list = []

# 遍历根目录下的所有子文件夹
for folder_name in os.listdir(root_dir):
    folder_path = os.path.join(root_dir, folder_name)
    
    # 确保是文件夹
    if os.path.isdir(folder_path):
        poscar_path = os.path.join(folder_path, 'POSCAR')
        
        # 如果存在 POSCAR 文件，则读取
        if os.path.isfile(poscar_path):
            try:
                atoms = read(poscar_path)
                structures_list.append({
                    'id': folder_name,
                    'structure': atoms
                })
            except Exception as e:
                print(f"Error reading POSCAR in {folder_name}: {e}")
        else:
            print(f"POSCAR not found in {folder_name}")

In [3]:
structures_list[0]

{'id': 'MIP2D-842-Mo2N2',
 'structure': Atoms(symbols='Mo2N2', pbc=True, cell=[[3.377458499999782, 0.0, 0.0], [-1.688729249999891, 2.924964861227495, 0.0], [0.0, 0.0, 30.0]])}

In [8]:
import os
import numpy as np

# 假设 structures_list 是你之前构建好的 list
# 另一个路径，用于搜索 potential.dat
search_root = '/data/home/sczc579/run/wy/2d_work_func/dataset/data/2d'  # 替换为你要查找 potential.dat 的根路径

# 新的结构列表，只保留有 potential.dat 的项
filtered_structures_list = []

for item in structures_list:
    id_name = item['id']
    target_folder = os.path.join(search_root, id_name)
    
    if os.path.isdir(target_folder):
        potential_file = os.path.join(target_folder, 'potential.dat')
        if os.path.isfile(potential_file):
            with open(potential_file, 'r') as f:
                potential_data = np.loadtxt(f)
            item['work_func'] = potential_data
            filtered_structures_list.append(item)
        else:
            print(f"potential.dat not found in {id_name}")
    else:
        print(f"Folder {id_name} not found in {search_root}")

# 现在 filtered_structures_list 中只保留了包含 potential.dat 的条目


Folder MIP2D-1658-Os2Se6 not found in /data/home/sczc579/run/wy/2d_work_func/dataset/data/2d
Folder MIP2D-133-Bi4I2Te2 not found in /data/home/sczc579/run/wy/2d_work_func/dataset/data/2d
Folder MIP2D-1729-Ba2S4Zr1 not found in /data/home/sczc579/run/wy/2d_work_func/dataset/data/2d
Folder MIP2D-55-As2Hg1K4 not found in /data/home/sczc579/run/wy/2d_work_func/dataset/data/2d
Folder MIP2D-1440-Co2O6 not found in /data/home/sczc579/run/wy/2d_work_func/dataset/data/2d
Folder MIP2D-1741-Br2F8Rb2 not found in /data/home/sczc579/run/wy/2d_work_func/dataset/data/2d
Folder MIP2D-342-C2Fe1O6 not found in /data/home/sczc579/run/wy/2d_work_func/dataset/data/2d
Folder MIP2D-102-Bi14S8Te13 not found in /data/home/sczc579/run/wy/2d_work_func/dataset/data/2d
Folder MIP2D-546-Co2O2 not found in /data/home/sczc579/run/wy/2d_work_func/dataset/data/2d
Folder MIP2D-1582-I2Se2Ti2 not found in /data/home/sczc579/run/wy/2d_work_func/dataset/data/2d
Folder MIP2D-1647-O6Re2 not found in /data/home/sczc579/run/wy/

In [12]:
from ase.db import connect

# 连接到数据库（如果不存在则自动创建）
db = connect('/data/home/sczc579/run/wy/2d_work_func/dataset/data/structures.db')

# 遍历 filtered_structures_list，并写入数据库
for item in filtered_structures_list:
    id_name = item['id']
    atoms = item['structure']
    work_func = item['work_func']

    db.write(
        atoms,
        data={'work_func': work_func,
              'name': id_name}
    )

print("数据已成功写入 structures.db")


数据已成功写入 structures.db
