In [1]:
"""
Copyright (c) Facebook, Inc. and its affiliates.

This source code is licensed under the MIT license found in the
LICENSE file in the root directory of this source tree.
"""

import os
import glob
import errno
import signal
import torch
import argparse
import warnings
import functools
import itertools
import numpy as np
import pandas as pd
from p_tqdm import p_map
import cloudpickle as pickle

from scipy.spatial.distance import pdist
from scipy.spatial.distance import cdist
from scipy.stats import wasserstein_distance

from collections import Counter
from pymatgen.core.structure import Structure
from pymatgen.core.composition import Composition
from pymatgen.core.lattice import Lattice
from matminer.featurizers.site.fingerprint import CrystalNNFingerprint
from matminer.featurizers.composition.composite import ElementProperty
import smact
from smact.screening import pauling_test

from eval_util import (
    chemical_symbols, 
    StandardScaler, 
    CompScalerMeans, 
    CompScalerStds
)


  from .autonotebook import tqdm as notebook_tqdm


In [3]:

# Much of the below code is taken without modification from the original
# CDVAE repo (https://github.com/txie-93/cdvae).
# In some cases, the code has been modified to work with the structure of
# our codebase, but the logic is the same.

COV_Cutoffs = {
    'mp20': {'struc': 0.4, 'comp': 10.},
    'carbon': {'struc': 0.2, 'comp': 4.},
    'perovskite': {'struc': 0.2, 'comp': 4},
}

NOVELTY_Cutoffs = {
    'mp20': {'struc': 0.1, 'comp': 2.},
}

CompScaler = StandardScaler(
    means=np.array(CompScalerMeans),
    stds=np.array(CompScalerStds),
    replace_nan_token=0.)

# CrystalNNFP = CrystalNNFingerprint.from_preset("ops")
CompFP = ElementProperty.from_preset('magpie')

class TimeoutError(Exception):
    pass

def timeout(seconds=10, error_message=os.strerror(errno.ETIME)):
    def decorator(func):
        def _handle_timeout(signum, frame):
            raise TimeoutError(error_message)

        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            signal.signal(signal.SIGALRM, _handle_timeout)
            signal.alarm(seconds)
            try:
                result = func(*args, **kwargs)
            finally:
                signal.alarm(0)
            return result

        return wrapper

    return decorator

@timeout(5)
def timeout_featurize(structure, i):
    return CrystalNNFingerprint.from_preset("ops").featurize(structure, i)

def load_data(file_path):
    if file_path[-3:] == 'npy':
        data = np.load(file_path, allow_pickle=True).item()
        for k, v in data.items():
            if k == 'input_data_batch':
                for k1, v1 in data[k].items():
                    data[k][k1] = torch.from_numpy(v1)
            else:
                data[k] = torch.from_numpy(v).unsqueeze(0)
    else:
        data = torch.load(file_path)
    return data

def get_fp_pdist(fp_array):
    if isinstance(fp_array, list):
        fp_array = np.array(fp_array)
    fp_pdists = pdist(fp_array)
    return fp_pdists.mean()

def filter_fps(struc_fps, comp_fps):
    assert len(struc_fps) == len(comp_fps)

    filtered_struc_fps, filtered_comp_fps = [], []

    for struc_fp, comp_fp in zip(struc_fps, comp_fps):
        if struc_fp is not None and comp_fp is not None:
            filtered_struc_fps.append(struc_fp)
            filtered_comp_fps.append(comp_fp)
    return filtered_struc_fps, filtered_comp_fps

def compute_cov(
    crys, 
    gt_crys,
    struc_cutoff, 
    comp_cutoff, 
    num_gen_crystals=None
):
    struc_fps = [c.struct_fp for c in crys]
    comp_fps = [c.comp_fp for c in crys]
    gt_struc_fps = [c.struct_fp for c in gt_crys]
    gt_comp_fps = [c.comp_fp for c in gt_crys]

    assert len(struc_fps) == len(comp_fps)
    assert len(gt_struc_fps) == len(gt_comp_fps)

    # Use number of crystal before filtering to compute COV
    if num_gen_crystals is None:
        num_gen_crystals = len(struc_fps)

    struc_fps, comp_fps = filter_fps(struc_fps, comp_fps)
    gt_struc_fps, gt_comp_fps = filter_fps(gt_struc_fps, gt_comp_fps)

    comp_fps = CompScaler.transform(comp_fps)
    gt_comp_fps = CompScaler.transform(gt_comp_fps)

    struc_fps = np.array(struc_fps)
    gt_struc_fps = np.array(gt_struc_fps)
    comp_fps = np.array(comp_fps)
    gt_comp_fps = np.array(gt_comp_fps)

    struc_pdist = cdist(struc_fps, gt_struc_fps)
    comp_pdist = cdist(comp_fps, gt_comp_fps)

    struc_recall_dist = struc_pdist.min(axis=0)
    struc_precision_dist = struc_pdist.min(axis=1)
    comp_recall_dist = comp_pdist.min(axis=0)
    comp_precision_dist = comp_pdist.min(axis=1)

    cov_recall = np.mean(np.logical_and(
        struc_recall_dist <= struc_cutoff,
        comp_recall_dist <= comp_cutoff))
    cov_precision = np.mean(np.logical_and(
        struc_precision_dist <= struc_cutoff,
        comp_precision_dist <= comp_cutoff))# / num_gen_crystals

    metrics_dict = {
        'cov_recall': cov_recall,
        'cov_precision': cov_precision,
        'amsd_recall': np.mean(struc_recall_dist),
        'amsd_precision': np.mean(struc_precision_dist),
        'amcd_recall': np.mean(comp_recall_dist),
        'amcd_precision': np.mean(comp_precision_dist),
    }

    combined_dist_dict = {
        'struc_recall_dist': struc_recall_dist.tolist(),
        'struc_precision_dist': struc_precision_dist.tolist(),
        'comp_recall_dist': comp_recall_dist.tolist(),
        'comp_precision_dist': comp_precision_dist.tolist(),
    }

    return metrics_dict, combined_dist_dict

def compute_novelty(
    crys, 
    gt_crys,
    struc_cutoff, 
    comp_cutoff,
    num_gen_crystals=None
):
    struc_fps = [c.struct_fp for c in crys]
    comp_fps = [c.comp_fp for c in crys]
    gt_struc_fps = [c.struct_fp for c in gt_crys]
    gt_comp_fps = [c.comp_fp for c in gt_crys]

    assert len(struc_fps) == len(comp_fps)
    assert len(gt_struc_fps) == len(gt_comp_fps)

    # Use number of crystal before filtering to compute COV
    if num_gen_crystals is None:
        num_gen_crystals = len(struc_fps)

    struc_fps, comp_fps = filter_fps(struc_fps, comp_fps)
    gt_struc_fps, gt_comp_fps = filter_fps(gt_struc_fps, gt_comp_fps)

    comp_fps = CompScaler.transform(comp_fps)
    gt_comp_fps = CompScaler.transform(gt_comp_fps)

    struc_fps = np.array(struc_fps)
    gt_struc_fps = np.array(gt_struc_fps)
    comp_fps = np.array(comp_fps)
    gt_comp_fps = np.array(gt_comp_fps)

    struc_pdist = cdist(struc_fps, gt_struc_fps)
    comp_pdist = cdist(comp_fps, gt_comp_fps)

    struc_precision_dist = struc_pdist.min(axis=1)
    comp_precision_dist = comp_pdist.min(axis=1)

    struc_novelty = np.mean(struc_precision_dist > struc_cutoff)
    comp_novelty = np.mean(comp_precision_dist > comp_cutoff)

    novelty = np.mean(np.logical_or(
        struc_precision_dist > struc_cutoff,
        comp_precision_dist > comp_cutoff))

    metrics_dict = {
        'struc_novelty': struc_novelty,
        'comp_novelty': comp_novelty,
        'novelty': novelty,
    }

    return metrics_dict

class CDVAEGenEval(object):

    def __init__(
        self, 
        pred_crys, 
        gt_cov_crys, 
        gt_novelty_crys,
        n_samples=1000, 
        eval_model_name=None
    ):
        self.crys = pred_crys
        self.gt_cov_crys = gt_cov_crys
        self.gt_novelty_crys = gt_novelty_crys
        self.n_samples = n_samples
        self.eval_model_name = eval_model_name

        valid_crys = [c for c in pred_crys if c.valid]
        if len(valid_crys) >= n_samples:
            sampled_indices = np.random.choice(
                len(valid_crys), n_samples, replace=False)
            self.valid_samples = [valid_crys[i] for i in sampled_indices]
        else:
            raise Exception(
                f'not enough valid crystals in the predicted set: {len(valid_crys)}/{n_samples}')

    def get_validity(self):
        comp_valid = np.array([c.comp_valid for c in self.crys]).mean()
        struct_valid = np.array([c.struct_valid for c in self.crys]).mean()
        valid = np.array([c.valid for c in self.crys]).mean()
        return {'comp_valid': comp_valid,
                'struct_valid': struct_valid,
                'valid': valid}

    def get_comp_diversity(self):
        comp_fps = [c.comp_fp for c in self.valid_samples]
        comp_fps = CompScaler.transform(comp_fps)
        comp_div = get_fp_pdist(comp_fps)
        return {'comp_div': comp_div}

    def get_struct_diversity(self):
        return {'struct_div': get_fp_pdist([c.struct_fp for c in self.valid_samples])}

    def get_density_wdist(self):
        pred_densities = [c.structure.density for c in self.valid_samples]
        gt_densities = [c.structure.density for c in self.gt_cov_crys]
        wdist_density = wasserstein_distance(pred_densities, gt_densities)
        return {'wdist_density': wdist_density}

    def get_num_elem_wdist(self):
        pred_nelems = [len(set(c.structure.species))
                       for c in self.valid_samples]
        gt_nelems = [len(set(c.structure.species)) for c in self.gt_cov_crys]
        wdist_num_elems = wasserstein_distance(pred_nelems, gt_nelems)
        return {'wdist_num_elems': wdist_num_elems}

    def get_coverage(self):
        cutoff_dict = COV_Cutoffs[self.eval_model_name]
        (cov_metrics_dict, combined_dist_dict) = compute_cov(
            self.crys, self.gt_cov_crys,
            struc_cutoff=cutoff_dict['struc'],
            comp_cutoff=cutoff_dict['comp'])
        return cov_metrics_dict

    def get_novelty(self):
        cutoff_dict = NOVELTY_Cutoffs[self.eval_model_name]
        novelty_metrics_dict = compute_novelty(
            self.crys, self.gt_novelty_crys,
            struc_cutoff=cutoff_dict['struc'],
            comp_cutoff=cutoff_dict['comp'])
        return novelty_metrics_dict

    def get_metrics(self):
        metrics = {}
        metrics.update(self.get_validity())
        metrics.update(self.get_comp_diversity())
        metrics.update(self.get_struct_diversity())
        metrics.update(self.get_density_wdist())
        metrics.update(self.get_num_elem_wdist())
        metrics.update(self.get_coverage())
        metrics.update(self.get_novelty())
        return metrics


def smact_validity(comp, count,
                   use_pauling_test=True,
                   include_alloys=True):
    elem_symbols = tuple([chemical_symbols[elem] for elem in comp])
    space = smact.element_dictionary(elem_symbols)
    smact_elems = [e[1] for e in space.items()]
    electronegs = [e.pauling_eneg for e in smact_elems]
    ox_combos = [e.oxidation_states for e in smact_elems]
    if len(set(elem_symbols)) == 1:
        return True
    if include_alloys:
        is_metal_list = [elem_s in smact.metals for elem_s in elem_symbols]
        if all(is_metal_list):
            return True

    threshold = np.max(count)
    compositions = []
    for ox_states in itertools.product(*ox_combos):
        stoichs = [(c,) for c in count]
        # Test for charge balance
        cn_e, cn_r = smact.neutral_ratios(
            ox_states, stoichs=stoichs, threshold=threshold)
        # Electronegativity test
        if cn_e:
            if use_pauling_test:
                try:
                    electroneg_OK = pauling_test(ox_states, electronegs)
                except TypeError:
                    # if no electronegativity data, assume it is okay
                    electroneg_OK = True
            else:
                electroneg_OK = True
            if electroneg_OK:
                for ratio in cn_r:
                    compositions.append(
                        tuple([elem_symbols, ox_states, ratio]))
    compositions = [(i[0], i[2]) for i in compositions]
    compositions = list(set(compositions))
    if len(compositions) > 0:
        return True
    else:
        return False

def structure_validity(crystal, cutoff=0.5):
    dist_mat = crystal.distance_matrix
    # Pad diagonal with a large number
    dist_mat = dist_mat + np.diag(
        np.ones(dist_mat.shape[0]) * (cutoff + 10.))
    if dist_mat.min() < cutoff or crystal.volume < 0.1:
        return False
    else:
        return True

class Crystal(object):

    def __init__(self, crys_array_dict):
        self.frac_coords = crys_array_dict['frac_coords']
        self.atom_types = crys_array_dict['atom_types']
        self.lengths = crys_array_dict['lengths']
        self.angles = crys_array_dict['angles']
        self.dict = crys_array_dict

        self.get_structure()
        self.get_composition()
        self.get_validity()

        if self.valid:
            self.get_fingerprints()
        else:
            self.comp_fp = None
            self.struct_fp = None

    def get_structure(self):
        if min(self.lengths.tolist()) < 0:
            self.constructed = False
            self.invalid_reason = 'non_positive_lattice'
        else:
            try:
                self.structure = Structure(
                    lattice=Lattice.from_parameters(
                        *(self.lengths.tolist() + self.angles.tolist())),
                    species=self.atom_types, coords=self.frac_coords, coords_are_cartesian=False)
                self.constructed = True
            except Exception:
                self.constructed = False
                self.invalid_reason = 'construction_raises_exception'
            if self.structure.volume < 0.1:
                self.constructed = False
                self.invalid_reason = 'unrealistically_small_lattice'

    def get_composition(self):
        elem_counter = Counter(self.atom_types)
        composition = [(elem, elem_counter[elem])
                       for elem in sorted(elem_counter.keys())]
        elems, counts = list(zip(*composition))
        counts = np.array(counts)
        counts = counts / np.gcd.reduce(counts)
        self.elems = elems
        self.comps = tuple(counts.astype('int').tolist())

    def get_validity(self):
        self.comp_valid = smact_validity(self.elems, self.comps)
        if self.constructed:
            self.struct_valid = structure_validity(self.structure)
        else:
            self.struct_valid = False
        self.valid = self.comp_valid and self.struct_valid

    def get_fingerprints(self):
        elem_counter = Counter(self.atom_types)
        comp = Composition(elem_counter)
        self.comp_fp = CompFP.featurize(comp)
        try:
            site_fps = [timeout_featurize(
                self.structure, i) for i in range(len(self.structure))]
        except Exception as e:
            print(e)
            # counts crystal as invalid if fingerprint cannot be constructed.
            self.valid = False
            self.comp_fp = None
            self.struct_fp = None
            return
        self.struct_fp = np.array(site_fps).mean(axis=0)


def cif_str_to_crystal(cif_str):
    try:
        structure = Structure.from_str(cif_str, fmt="cif")
        crystal = Crystal({
            "frac_coords": structure.frac_coords,
            "atom_types": [chemical_symbols.index(str(x)) for x in structure.species],
            "lengths": np.array(structure.lattice.parameters[:3]),
            "angles": np.array(structure.lattice.parameters[3:])
        })
    except Exception as e:
        print(e)
        # print(cif_str)
        return None
    
    return crystal




In [4]:
baseline_numbers = pd.DataFrame([
    {'method': 'Train', 'struct_valid': 1.0, 'comp_valid': 0.9113, 'cov_recall': 1.0, 'cov_precision': 1.0, 'wdist_density': 0.051, 'wdist_num_elems': 0.016},
    {'method': 'FTCP', 'struct_valid': 0.0155, 'comp_valid': 0.4837, 'cov_recall': 0.047, 'cov_precision': 0.0009, 'wdist_density': 23.71, 'wdist_num_elems': 0.736},
    {'method': 'GSchNet', 'struct_valid': 0.9965, 'comp_valid': 0.7596, 'cov_recall': 0.3833, 'cov_precision': 0.9957, 'wdist_density': 3.034, 'wdist_num_elems': 0.641},
    {'method': 'PGSchNet', 'struct_valid': 0.7751, 'comp_valid': 0.7640, 'cov_recall': 0.4193, 'cov_precision': 0.9974, 'wdist_density': 4.04, 'wdist_num_elems': 0.623},
    {'method': 'CDVAE', 'struct_valid': 1.0, 'comp_valid': 0.867, 'cov_recall': 0.9915, 'cov_precision': 0.9949, 'wdist_density': 0.688, 'wdist_num_elems': 1.432},
    {'method': 'LM-CH', 'struct_valid': 0.8481, 'comp_valid': 0.8355, 'cov_recall': 0.9925, 'cov_precision': 0.9789, 'wdist_density': 0.864, 'wdist_num_elems': 0.132},
    {'method': 'LM-AC', 'struct_valid': 0.9581, 'comp_valid': 0.8887, 'cov_recall': 0.996, 'cov_precision': 0.9855, 'wdist_density': 0.696, 'wdist_num_elems': 0.092},
])

# results_df_fn = "generative_model_results_17k_14k.csv"
results_df_fn = "generative_model_results_17k_14k_congen_9046.csv"



In [5]:
# model_name = 'llamat-7b'
# model_name = 'llamat-8b-cif-congen'
model_name = 'llamat-2-7b-cif-congen-9046'

In [None]:
def parse_fn(gen_str):
    lines = [x for x in gen_str.split("\n") if len(x) > 0]
    lengths = [float(x) for x in lines[0].split(" ")]
    angles = [float(x) for x in lines[1].split(" ")]
    species = [x for x in lines[2::2]]
    coords = [[float(y) for y in x.split(" ")] for x in lines[3::2]]
    
    structure = Structure(
        lattice=Lattice.from_parameters(
            *(lengths + angles)),
        species=species,
        coords=coords, 
        coords_are_cartesian=False,
    )
    
    return structure.to(fmt="cif")

In [7]:
import pandas as pd
import re

def parse_fn(gen_str):
    """
    Parse generated structure string to extract lengths, angles, species, and coordinates.
    
    The function handles cases where:
    1. The structure may not start from the first line
    2. There may be text before the actual structure data
    3. The first line should contain three numbers (lengths)
    4. The second line should contain three numbers (angles)a
    5. Then pairs of lines: element followed by three fractional coordinates
    """
    # Remove quotes and clean the string
    gen_str = gen_str.strip().strip('"')
    lines = [x.strip() for x in gen_str.split("\n") if len(x.strip()) > 0]
    
    # Find the start of the actual structure data
    # Look for a line that contains three numbers (lengths)
    start_idx = -1
    for i, line in enumerate(lines):
        # Split by whitespace and check if we have at least 3 numeric values
        parts = line.split()
        if len(parts) >= 3:
            # Try to find 3 consecutive numeric values in the line
            numeric_parts = []
            for part in parts:
                try:
                    val = float(part)
                    numeric_parts.append(val)
                    if len(numeric_parts) == 3:
                        break
                except ValueError:
                    continue
            
            # Check if we found 3 positive numbers (likely lengths)
            if len(numeric_parts) == 3 and all(x > 0 for x in numeric_parts):
                start_idx = i
                break
    
    if start_idx == -1 or start_idx >= len(lines) - 1:
        # If we can't find a valid start, return empty structure
        return [], [], [], []
    
    try:
        # Extract lengths (first line after start)
        lengths_line = lines[start_idx]
        # Find the first 3 numeric values in the line
        lengths = []
        for part in lengths_line.split():
            try:
                val = float(part)
                lengths.append(val)
                if len(lengths) == 3:
                    break
            except ValueError:
                continue
        
        # Extract angles (second line after start)
        if start_idx + 1 < len(lines):
            angles_line = lines[start_idx + 1]
            # Find the first 3 numeric values in the line
            angles = []
            for part in angles_line.split():
                try:
                    val = float(part)
                    angles.append(val)
                    if len(angles) == 3:
                        break
                except ValueError:
                    continue
        else:
            angles = []
        
        # Extract species and coordinates (pairs starting from third line)
        species = []
        coords = []
        
        # Start from the third line after the start index
        data_start = start_idx + 2
        
        for i in range(data_start, len(lines), 2):
            if i < len(lines):
                # Element line
                element_line = lines[i].strip()
                # Clean the element (remove any non-alphabetic characters at the end)
                element = re.sub(r'[^A-Za-z]', '', element_line)
                if element:  # Only add if we have a valid element
                    species.append(element)
                else:
                    # If no valid element, skip this pair
                    continue
                
                # Coordinates line (next line)
                if i + 1 < len(lines):
                    coords_line = lines[i + 1].strip()
                    try:
                        coord_parts = coords_line.split()
                        if len(coord_parts) >= 3:
                            coord_values = [float(x) for x in coord_parts[:3]]
                            coords.append(coord_values)
                        else:
                            # If coordinates are malformed, add empty coordinates
                            coords.append([0.0, 0.0, 0.0])
                    except ValueError:
                        # If coordinates can't be parsed, add empty coordinates
                        coords.append([0.0, 0.0, 0.0])
                else:
                    # If no coordinates line, add empty coordinates
                    coords.append([0.0, 0.0, 0.0])

        structure = Structure(
        lattice=Lattice.from_parameters(
            *(lengths + angles)),
        species=species,
        coords=coords, 
        coords_are_cartesian=False,)
        return structure.to(fmt="cif")

    except:
        return ''
    
    # return structure.to(fmt="cif")
        
    #     return lengths, angles, species, coords
        
    # except (ValueError, IndexError) as e:
    #     # If parsing fails, return empty structure
    #     return [], [], [], []

# def process_csv_file(input_file, output_file):
#     """Process a CSV file and save parsed results."""
#     print(f"Processing {input_file}...")
#     df = pd.read_csv(input_file)
    
#     outputs = []
#     failed_parses = []
#     successful_parses = 0
    
#     for idx, gen_str in enumerate(df['gen_str']):
#         if pd.isna(gen_str):
#             outputs.append({
#                 'lengths': [],
#                 'angles': [],
#                 'species': [],
#                 'coords': []
#             })
#             failed_parses.append({
#                 'index': idx,
#                 'original_gen_str': gen_str,
#                 'reason': 'NaN input'
#             })
#             continue
            
#         lengths, angles, species, coords = parse_fn(str(gen_str))
        
#         # Check if we got valid data
#         if len(lengths) == 3 and len(angles) == 3 and len(species) > 0:
#             successful_parses += 1
#         else:
#             # Record failed parse
#             failed_parses.append({
#                 'index': idx,
#                 'original_gen_str': str(gen_str),
#                 'reason': f'Invalid structure: lengths={len(lengths)}, angles={len(angles)}, species={len(species)}'
#             })
        
#         outputs.append({
#             'lengths': lengths,
#             'angles': angles,
#             'species': species,
#             'coords': coords
#         })
        
#         if (idx + 1) % 1000 == 0:
#             print(f"Processed {idx + 1} structures...")
    
#     result_df = pd.DataFrame(outputs)
#     result_df.to_csv(output_file, index=False)
#     print(f"Saved {len(outputs)} parsed structures to {output_file}")
#     print(f"Successfully parsed {successful_parses} out of {len(outputs)} structures")
    
#     # Save failed parses
#     if failed_parses:
#         failed_df = pd.DataFrame(failed_parses)
#         failed_output_file = output_file.replace('.csv', '_failed.csv')
#         failed_df.to_csv(failed_output_file, index=False)
#         print(f"Saved {len(failed_parses)} failed structures to {failed_output_file}")
    
#     return result_df

# # Process both datasets
# if __name__ == "__main__":
#     # Process llamat2 dataset
#     try:
#         process_csv_file("llamat2_9046.csv", "llamat2_9046_parsed.csv")
#     except FileNotFoundError:
#         print("llamat2_9046.csv not found, skipping...")
    
#     # Process llamat3 dataset
#     try:
#         process_csv_file("llamat3_9046.csv", "llamat3_9046_parsed.csv")
#     except FileNotFoundError:
#         print("llamat3_9046.csv not found, skipping...")

In [42]:
# samples_path = "llamat3_14000_cif.csv"
# samples_path = 'llm_samples_17k_cif.csv'
# samples_path = 'llamat2_cif.csv'
# samples_path = 'llamat2_cif_adapter_cleaned.csv'
# samples_path = 'llamat3_cif_adapter_cleaned.csv'
samples_path = 'llamat2_9064_llamat2_1758654262.csv'
# llamat3_cif_adapter_cleaned

In [43]:
# pd.read_csv('llamat3_9064_llamat3_1758654262.csv').head(2)

In [9]:
dfres = pd.read_csv(samples_path)

In [11]:
from tqdm import tqdm

In [12]:
parsed = []
for gen_str in tqdm(dfres.gen_str):
    parsed.append(parse_fn(gen_str))

  val = (cos_alpha * cos_beta - cos_gamma) / (sin_alpha * sin_beta)
100%|██████████████████████████████████████████████████████████████████████████████| 9046/9046 [00:09<00:00, 991.96it/s]


In [14]:
parsed[0]

"# generated using pymatgen\ndata_GaTe\n_symmetry_space_group_name_H-M   'P 1'\n_cell_length_a   4.20000000\n_cell_length_b   4.20000000\n_cell_length_c   6.70000000\n_cell_angle_alpha   90.00000000\n_cell_angle_beta   90.00000000\n_cell_angle_gamma   120.00000000\n_symmetry_Int_Tables_number   1\n_chemical_formula_structural   GaTe\n_chemical_formula_sum   'Ga2 Te2'\n_cell_volume   102.35381042\n_cell_formula_units_Z   2\nloop_\n _symmetry_equiv_pos_site_id\n _symmetry_equiv_pos_as_xyz\n  1  'x, y, z'\nloop_\n _atom_site_type_symbol\n _atom_site_label\n _atom_site_symmetry_multiplicity\n _atom_site_fract_x\n _atom_site_fract_y\n _atom_site_fract_z\n _atom_site_occupancy\n  Ga  Ga0  1  0.37000000  0.85000000  0.89000000  1\n  Ga  Ga1  1  0.03000000  0.19000000  0.39000000  1\n  Te  Te2  1  0.37000000  0.85000000  0.51000000  1\n  Te  Te3  1  0.03000000  0.19000000  0.01000000  1\n"

In [17]:
if os.path.exists(results_df_fn):
    results_df = pd.read_csv(results_df_fn)
else:
    baseline_numbers.to_csv(results_df_fn, index=False)
    results_df = baseline_numbers

if model_name in results_df["method"].values:
    print(f"Skipping {args.model_name} because it already exists in {results_df_fn}")
    # return

csv_fns = [
    x for x in glob.glob(samples_path) 
        if len(open(x).readlines()) > 1 and 'm3gnet_relaxed_energy' not in x
]
if len(csv_fns) == 0:
    # return
    print(len(csv_fns))


In [18]:

csv_fns

['llamat2_9064_llamat2_1758654262.csv']

In [19]:
pred_cifs = []
for x in csv_fns:
    try:
        df = pd.read_csv(x)
        df['cif'] = df.gen_str.apply(lambda x: parse_fn(x))
        pred_cifs += list(df["cif"].dropna())
    except:
        pass

  val = (cos_alpha * cos_beta - cos_gamma) / (sin_alpha * sin_beta)


In [41]:
pred_cifs = pred_cifs[::-1]
print(len(pred_cifs))

502


In [42]:
samples_path

'llamat3_cif_adapter_cleaned.csv'

In [20]:
# fname = f"pred_{samples_path[:-4]}.pkl";
fname = f"all_congen_pred_{samples_path[:-4]}.pkl";
# if 'pred_crys_17k_14k_cif.pkl' not in os.listdir():
if fname not in os.listdir():
    pred_crys = [x for x in p_map(cif_str_to_crystal, pred_cifs) if x is not None]
    with open(fname,'wb') as f:
        pickle.dump(pred_crys, f)
    f.close()
else:
    print(f'loading {fname}')
    with open(fname,'rb') as f:
        pred_crys = pickle.load(f)
    f.close()

  0%|                                                                                          | 0/9046 [00:00<?, ?it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!Invalid CIF file with no structures!
Invalid CIF file with no structures!



  0%|                                                                                | 1/9046 [00:02<5:40:53,  2.26s/it]

Invalid CIF file with no structures!


  0%|                                                                                | 2/9046 [00:03<3:28:48,  1.39s/it]

Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


  0%|                                                                                | 4/9046 [00:07<4:53:45,  1.95s/it]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!
Invalid CIF file with no structures!


  0%|▎                                                                                | 30/9046 [00:11<27:34,  5.45it/s]

Invalid CIF file with no structures!


  1%|▍                                                                                | 52/9046 [00:13<18:03,  8.30it/s]

Invalid CIF file with no structures!


  1%|▋                                                                                | 74/9046 [00:17<25:49,  5.79it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


  1%|█                                                                               | 116/9046 [00:20<14:23, 10.34it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


  2%|█▌                                                                              | 175/9046 [00:25<12:07, 12.19it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


  2%|█▊                                                                              | 199/9046 [00:27<12:19, 11.97it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


  3%|██                                                                              | 240/9046 [00:32<13:27, 10.91it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!
Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!


  3%|██▋                                                                             | 308/9046 [00:39<15:30,  9.39it/s]

Invalid CIF file with no structures!


  4%|██▉                                                                             | 327/9046 [00:40<12:17, 11.82it/s]

Invalid CIF file with no structures!


  4%|███▏                                                                            | 357/9046 [00:46<19:06,  7.58it/s]

Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!


  5%|████▏                                                                           | 471/9046 [00:56<11:15, 12.69it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!




'Eb0+' is not in list


  6%|████▍                                                                           | 502/9046 [01:01<15:09,  9.40it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


  6%|████▍                                                                           | 508/9046 [01:04<25:59,  5.47it/s]

Invalid CIF file with no structures!


  7%|█████▏                                                                          | 591/9046 [01:10<13:14, 10.64it/s]

Invalid CIF file with no structures!


  7%|█████▎                                                                          | 601/9046 [01:13<16:46,  8.39it/s]

Invalid CIF file with no structures!


  7%|█████▌                                                                          | 625/9046 [01:14<13:37, 10.30it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


  7%|█████▋                                                                          | 640/9046 [01:16<14:41,  9.54it/s]

Invalid CIF file with no structures!


  7%|█████▉                                                                          | 665/9046 [01:19<12:33, 11.12it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


  8%|██████                                                                          | 688/9046 [01:20<09:55, 14.04it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


  8%|██████▍                                                                         | 734/9046 [01:27<16:40,  8.31it/s]

Invalid CIF file with no structures!


  8%|██████▌                                                                         | 738/9046 [01:28<19:03,  7.26it/s]

Invalid CIF file with no structures!Invalid CIF file with no structures!



Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!


  9%|███████                                                                         | 796/9046 [01:33<12:54, 10.65it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


  9%|███████                                                                         | 803/9046 [01:37<21:49,  6.29it/s]

Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!
Invalid CIF file with no structures!Invalid CIF file with no structures!

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 10%|███████▊                                                                        | 881/9046 [01:42<10:41, 12.73it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 10%|████████▎                                                                       | 938/9046 [01:49<12:13, 11.05it/s]

Invalid CIF file with no structures!


 11%|████████▌                                                                       | 970/9046 [01:51<10:19, 13.04it/s]

Invalid CIF file with no structures!


 11%|████████▊                                                                       | 999/9046 [01:54<12:19, 10.88it/s]

Invalid CIF file with no structures!


 11%|████████▉                                                                      | 1025/9046 [01:56<10:52, 12.29it/s]

Invalid CIF file with no structures!


 12%|█████████▌                                                                     | 1088/9046 [02:05<15:32,  8.54it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!

 12%|█████████▌                                                                     | 1097/9046 [02:06<15:52,  8.34it/s]


Invalid CIF file with no structures!


 13%|█████████▉                                                                     | 1142/9046 [02:08<09:44, 13.52it/s]

Invalid CIF file with no structures!


 13%|█████████▉                                                                     | 1145/9046 [02:10<13:12,  9.97it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 13%|██████████                                                                     | 1150/9046 [02:12<21:09,  6.22it/s]

Invalid CIF file with no structures!


 13%|██████████▏                                                                    | 1167/9046 [02:15<19:21,  6.78it/s]

Invalid CIF file with no structures!


 13%|██████████▍                                                                    | 1194/9046 [02:15<11:54, 10.98it/s]

Invalid CIF file with no structures!


 13%|██████████▌                                                                    | 1216/9046 [02:18<13:15,  9.84it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 13%|██████████▋                                                                    | 1221/9046 [02:19<14:59,  8.70it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!
Invalid CIF file with no structures!


 14%|██████████▊                                                                    | 1234/9046 [02:22<20:06,  6.48it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 14%|███████████▏                                                                   | 1277/9046 [02:24<09:55, 13.04it/s]

Invalid CIF file with no structures!


 14%|███████████▏                                                                   | 1288/9046 [02:28<16:31,  7.82it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 15%|███████████▍                                                                   | 1315/9046 [02:31<14:53,  8.65it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 15%|████████████                                                                   | 1377/9046 [02:32<06:46, 18.89it/s]

Invalid CIF file with no structures!


 15%|████████████▏                                                                  | 1395/9046 [02:35<10:27, 12.20it/s]

Invalid CIF file with no structures!


 16%|████████████▎                                                                  | 1404/9046 [02:36<11:06, 11.47it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 16%|████████████▎                                                                  | 1416/9046 [02:39<15:59,  7.95it/s]

Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!


 16%|████████████▍                                                                  | 1428/9046 [02:42<20:24,  6.22it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 17%|█████████████                                                                  | 1494/9046 [02:45<08:57, 14.04it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!
Invalid CIF file with no structures!


 17%|█████████████▏                                                                 | 1511/9046 [02:50<15:19,  8.19it/s]

Invalid CIF file with no structures!


 17%|█████████████▌                                                                 | 1554/9046 [02:50<08:46, 14.24it/s]

Invalid CIF file with no structures!


 17%|█████████████▋                                                                 | 1569/9046 [02:51<08:16, 15.04it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 18%|█████████████▊                                                                 | 1586/9046 [02:55<13:44,  9.05it/s]

Invalid CIF file with no structures!


 18%|██████████████                                                                 | 1614/9046 [02:58<12:52,  9.62it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 18%|██████████████▏                                                                | 1618/9046 [03:02<22:17,  5.55it/s]

Invalid CIF file with no structures!


 19%|██████████████▌                                                                | 1674/9046 [03:05<14:43,  8.35it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 19%|██████████████▊                                                                | 1697/9046 [03:07<12:57,  9.45it/s]

Invalid CIF file with no structures!


 19%|███████████████                                                                | 1726/9046 [03:09<10:17, 11.86it/s]

Invalid CIF file with no structures!


 19%|███████████████▎                                                               | 1755/9046 [03:12<11:40, 10.40it/s]

Invalid CIF file with no structures!


 20%|███████████████▍                                                               | 1769/9046 [03:12<09:29, 12.79it/s]

Invalid CIF file with no structures!


 20%|███████████████▍                                                               | 1771/9046 [03:16<18:54,  6.41it/s]

Invalid CIF file with no structures!


 20%|███████████████▌                                                               | 1781/9046 [03:17<17:32,  6.90it/s]

Invalid CIF file with no structures!


 20%|███████████████▊                                                               | 1806/9046 [03:20<16:57,  7.12it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!

 20%|████████████████                                                               | 1834/9046 [03:25<18:16,  6.57it/s]




 21%|████████████████▌                                                              | 1895/9046 [03:28<10:18, 11.55it/s]

Invalid CIF file with no structures!

 21%|████████████████▋                                                              | 1912/9046 [03:32<13:58,  8.51it/s]




 22%|█████████████████                                                              | 1951/9046 [03:33<09:03, 13.06it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 22%|█████████████████▏                                                             | 1961/9046 [03:34<09:54, 11.92it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!

 22%|█████████████████▏                                                             | 1963/9046 [03:38<19:27,  6.07it/s]




 22%|█████████████████▍                                                             | 1999/9046 [03:40<12:17,  9.55it/s]

Invalid CIF file with no structures!


 22%|█████████████████▌                                                             | 2004/9046 [03:42<17:24,  6.74it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!
Invalid CIF file with no structures!


 23%|██████████████████                                                             | 2070/9046 [03:49<12:11,  9.54it/s]

Invalid CIF file with no structures!


 23%|██████████████████▎                                                            | 2104/9046 [03:53<12:47,  9.04it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 24%|██████████████████▊                                                            | 2150/9046 [03:56<09:42, 11.85it/s]

Invalid CIF file with no structures!


 24%|██████████████████▊                                                            | 2154/9046 [03:57<11:29,  9.99it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 24%|██████████████████▊                                                            | 2159/9046 [04:00<18:46,  6.12it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 24%|██████████████████▉                                                            | 2172/9046 [04:01<15:13,  7.53it/s]

Invalid CIF file with no structures!


 24%|██████████████████▉                                                            | 2175/9046 [04:05<30:13,  3.79it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 25%|███████████████████▌                                                           | 2234/9046 [04:07<10:20, 10.99it/s]

Invalid CIF file with no structures!


 25%|███████████████████▌                                                           | 2241/9046 [04:08<11:42,  9.69it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 25%|███████████████████▋                                                           | 2256/9046 [04:13<18:01,  6.28it/s]

Invalid CIF file with no structures!


 26%|████████████████████▏                                                          | 2312/9046 [04:14<08:47, 12.76it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 26%|████████████████████▏                                                          | 2316/9046 [04:15<09:49, 11.41it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 26%|████████████████████▋                                                          | 2371/9046 [04:20<09:07, 12.18it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 26%|████████████████████▋                                                          | 2374/9046 [04:26<17:45,  6.26it/s]

Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!


 27%|█████████████████████                                                          | 2417/9046 [04:28<10:06, 10.93it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!


 27%|█████████████████████▏                                                         | 2424/9046 [04:33<19:14,  5.73it/s]

Invalid CIF file with no structures!


 27%|█████████████████████▌                                                         | 2466/9046 [04:34<10:02, 10.92it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 28%|█████████████████████▉                                                         | 2507/9046 [04:37<10:27, 10.42it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 28%|██████████████████████▏                                                        | 2540/9046 [04:41<11:19,  9.58it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 29%|██████████████████████▋                                                        | 2601/9046 [04:47<08:05, 13.27it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 29%|██████████████████████▊                                                        | 2617/9046 [04:51<12:47,  8.38it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 29%|██████████████████████▉                                                        | 2622/9046 [04:53<14:16,  7.50it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 29%|███████████████████████▎                                                       | 2668/9046 [04:53<07:31, 14.14it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 30%|███████████████████████▌                                                       | 2698/9046 [04:56<08:16, 12.80it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 30%|███████████████████████▊                                                       | 2728/9046 [05:02<13:10,  7.99it/s]

Invalid CIF file with no structures!


 30%|████████████████████████                                                       | 2755/9046 [05:03<08:54, 11.76it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!
Invalid CIF file with no structures!


 31%|████████████████████████▏                                                      | 2769/9046 [05:13<26:23,  3.96it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 31%|████████████████████████▊                                                      | 2835/9046 [05:15<11:35,  8.93it/s]

Invalid CIF file with no structures!

 32%|█████████████████████████                                                      | 2865/9046 [05:15<08:21, 12.32it/s]


Invalid CIF file with no structures!


No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!


 32%|█████████████████████████▏                                                     | 2878/9046 [05:20<12:15,  8.38it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!


 33%|█████████████████████████▋                                                     | 2947/9046 [05:24<08:42, 11.68it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!Invalid CIF file with no structures!

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 33%|██████████████████████████                                                     | 2983/9046 [05:28<09:23, 10.76it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 33%|██████████████████████████▏                                                    | 2993/9046 [05:35<16:28,  6.12it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!Invalid CIF file with no structures!



No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!


 34%|██████████████████████████▋                                                    | 3063/9046 [05:40<10:07,  9.85it/s]

Invalid CIF file with no structures!


 35%|███████████████████████████▎                                                   | 3134/9046 [05:41<05:23, 18.29it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 35%|███████████████████████████▌                                                   | 3150/9046 [05:45<08:42, 11.28it/s]

Invalid CIF file with no structures!


 35%|███████████████████████████▉                                                   | 3196/9046 [05:49<08:40, 11.24it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 36%|████████████████████████████▏                                                  | 3224/9046 [05:52<08:36, 11.28it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!Invalid CIF file with no structures!



 36%|████████████████████████████▎                                                  | 3245/9046 [05:54<08:05, 11.96it/s]

Invalid CIF file with no structures!


 36%|████████████████████████████▎                                                  | 3247/9046 [05:55<11:05,  8.71it/s]

Invalid CIF file with no structures!


 36%|████████████████████████████▌                                                  | 3274/9046 [05:58<10:59,  8.75it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 37%|████████████████████████████▊                                                  | 3306/9046 [06:00<07:00, 13.64it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 37%|████████████████████████████▉                                                  | 3310/9046 [06:00<08:08, 11.74it/s]

Invalid CIF file with no structures!


 37%|█████████████████████████████                                                  | 3333/9046 [06:04<11:33,  8.24it/s]

Invalid CIF file with no structures!


 37%|█████████████████████████████▏                                                 | 3340/9046 [06:06<17:43,  5.37it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 37%|█████████████████████████████▌                                                 | 3390/9046 [06:12<09:38,  9.78it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 37%|█████████████████████████████▌                                                 | 3392/9046 [06:17<22:35,  4.17it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 38%|██████████████████████████████▎                                                | 3465/9046 [06:22<08:55, 10.42it/s]

Invalid CIF file with no structures!




Invalid CIF file with no structures!
Invalid CIF file with no structures!


 39%|██████████████████████████████▉                                                | 3544/9046 [06:31<08:12, 11.18it/s]

Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!
Invalid CIF file with no structures!


 40%|███████████████████████████████▎                                               | 3590/9046 [06:35<08:27, 10.75it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 40%|███████████████████████████████▍                                               | 3606/9046 [06:38<11:51,  7.65it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 40%|███████████████████████████████▌                                               | 3608/9046 [06:42<20:42,  4.38it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 40%|███████████████████████████████▉                                               | 3654/9046 [06:45<11:48,  7.61it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 41%|████████████████████████████████▍                                              | 3717/9046 [06:48<07:38, 11.61it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 41%|████████████████████████████████▌                                              | 3724/9046 [06:51<09:31,  9.32it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!

 41%|████████████████████████████████▋                                              | 3740/9046 [06:54<11:23,  7.76it/s]




 42%|████████████████████████████████▉                                              | 3775/9046 [06:55<07:03, 12.43it/s]

Invalid CIF file with no structures!


 42%|█████████████████████████████████▎                                             | 3810/9046 [06:57<06:36, 13.19it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!


 42%|█████████████████████████████████▍                                             | 3825/9046 [07:01<12:15,  7.10it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!Invalid CIF file with no structures!

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 42%|█████████████████████████████████▍                                             | 3834/9046 [07:07<25:08,  3.45it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!Invalid CIF file with no structures!



 43%|██████████████████████████████████▎                                            | 3925/9046 [07:09<05:16, 16.19it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 44%|██████████████████████████████████▌                                            | 3960/9046 [07:16<09:57,  8.51it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 44%|███████████████████████████████████                                            | 4018/9046 [07:18<05:28, 15.30it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 45%|███████████████████████████████████▎                                           | 4039/9046 [07:22<08:05, 10.31it/s]

Invalid CIF file with no structures!


 45%|███████████████████████████████████▍                                           | 4051/9046 [07:22<06:57, 11.97it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 45%|███████████████████████████████████▋                                           | 4088/9046 [07:27<08:15, 10.01it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 45%|███████████████████████████████████▊                                           | 4102/9046 [07:28<07:09, 11.52it/s]

Invalid CIF file with no structures!


No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!
Invalid CIF file with no structures!


 46%|███████████████████████████████████▉                                           | 4117/9046 [07:31<12:03,  6.81it/s]

Invalid CIF file with no structures!


 46%|████████████████████████████████████▎                                          | 4152/9046 [07:33<08:11,  9.96it/s]

Invalid CIF file with no structures!


 46%|████████████████████████████████████▎                                          | 4158/9046 [07:35<09:16,  8.78it/s]

Invalid CIF file with no structures!


 46%|████████████████████████████████████▋                                          | 4198/9046 [07:38<06:46, 11.92it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 47%|████████████████████████████████████▉                                          | 4233/9046 [07:44<08:39,  9.26it/s]

Invalid CIF file with no structures!Invalid CIF file with no structures!



 47%|█████████████████████████████████████▏                                         | 4252/9046 [07:45<07:28, 10.68it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 47%|█████████████████████████████████████▏                                         | 4254/9046 [07:50<17:28,  4.57it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 47%|█████████████████████████████████████▍                                         | 4286/9046 [07:52<10:09,  7.81it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 48%|█████████████████████████████████████▋                                         | 4315/9046 [07:53<05:57, 13.25it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 48%|██████████████████████████████████████                                         | 4363/9046 [07:57<06:27, 12.08it/s]

Invalid CIF file with no structures!


 48%|██████████████████████████████████████▏                                        | 4372/9046 [07:57<05:50, 13.34it/s]

Invalid CIF file with no structures!


 48%|██████████████████████████████████████▏                                        | 4378/9046 [07:58<05:49, 13.35it/s]

Invalid CIF file with no structures!


 48%|██████████████████████████████████████▎                                        | 4381/9046 [08:01<13:03,  5.95it/s]

Invalid CIF file with no structures!


No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!
Invalid CIF file with no structures!Invalid CIF file with no structures!



 49%|██████████████████████████████████████▌                                        | 4417/9046 [08:07<11:58,  6.45it/s]

Invalid CIF file with no structures!


 49%|██████████████████████████████████████▊                                        | 4448/9046 [08:08<08:44,  8.76it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 50%|███████████████████████████████████████▌                                       | 4523/9046 [08:14<06:10, 12.20it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!

 50%|███████████████████████████████████████▌                                       | 4530/9046 [08:15<06:42, 11.23it/s]


Invalid CIF file with no structures!


 50%|███████████████████████████████████████▊                                       | 4557/9046 [08:18<07:25, 10.08it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 50%|███████████████████████████████████████▉                                       | 4568/9046 [08:20<08:42,  8.57it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 51%|████████████████████████████████████████                                       | 4594/9046 [08:27<12:29,  5.94it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 51%|████████████████████████████████████████▌                                      | 4643/9046 [08:31<09:29,  7.73it/s]

Invalid CIF file with no structures!


 53%|█████████████████████████████████████████▌                                     | 4761/9046 [08:40<05:42, 12.52it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 53%|█████████████████████████████████████████▋                                     | 4780/9046 [08:44<09:08,  7.78it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 53%|██████████████████████████████████████████                                     | 4822/9046 [08:47<06:49, 10.30it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 54%|██████████████████████████████████████████▎                                    | 4842/9046 [08:49<06:54, 10.15it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 54%|██████████████████████████████████████████▍                                    | 4856/9046 [08:53<08:58,  7.78it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 55%|███████████████████████████████████████████                                    | 4935/9046 [08:55<04:25, 15.47it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 55%|███████████████████████████████████████████▏                                   | 4943/9046 [08:59<07:19,  9.33it/s]

Invalid CIF file with no structures!


 55%|███████████████████████████████████████████▎                                   | 4956/9046 [09:05<12:18,  5.54it/s]

Invalid CIF file with no structures!


 55%|███████████████████████████████████████████▋                                   | 5009/9046 [09:06<05:55, 11.34it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 56%|████████████████████████████████████████████                                   | 5039/9046 [09:09<06:02, 11.06it/s]

Invalid CIF file with no structures!


 56%|████████████████████████████████████████████▏                                  | 5055/9046 [09:11<06:37, 10.04it/s]

Invalid CIF file with no structures!


 56%|████████████████████████████████████████████▍                                  | 5091/9046 [09:14<05:45, 11.44it/s]

Invalid CIF file with no structures!


 56%|████████████████████████████████████████████▌                                  | 5109/9046 [09:16<06:36,  9.94it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 57%|████████████████████████████████████████████▋                                  | 5114/9046 [09:18<08:26,  7.76it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 57%|████████████████████████████████████████████▉                                  | 5148/9046 [09:20<05:34, 11.64it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!


 57%|█████████████████████████████████████████████                                  | 5156/9046 [09:26<13:20,  4.86it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!Invalid CIF file with no structures!

Invalid CIF file with no structures!


 58%|█████████████████████████████████████████████▍                                 | 5203/9046 [09:29<07:23,  8.66it/s]

Invalid CIF file with no structures!


 58%|█████████████████████████████████████████████▌                                 | 5223/9046 [09:30<05:52, 10.83it/s]

Invalid CIF file with no structures!


 58%|█████████████████████████████████████████████▊                                 | 5249/9046 [09:34<07:17,  8.68it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 58%|█████████████████████████████████████████████▉                                 | 5265/9046 [09:42<15:13,  4.14it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 59%|██████████████████████████████████████████████▌                                | 5330/9046 [09:45<07:27,  8.31it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 59%|██████████████████████████████████████████████▋                                | 5350/9046 [09:48<07:26,  8.28it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 60%|███████████████████████████████████████████████▏                               | 5410/9046 [09:58<09:38,  6.28it/s]

Invalid CIF file with no structures!


 60%|███████████████████████████████████████████████▋                               | 5458/9046 [09:59<05:29, 10.89it/s]

Invalid CIF file with no structures!


 61%|████████████████████████████████████████████████                               | 5503/9046 [10:04<05:43, 10.33it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 61%|████████████████████████████████████████████████▏                              | 5518/9046 [10:06<06:25,  9.16it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 61%|████████████████████████████████████████████████▎                              | 5528/9046 [10:13<12:27,  4.71it/s]

Invalid CIF file with no structures!


 62%|████████████████████████████████████████████████▉                              | 5603/9046 [10:14<04:32, 12.64it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 62%|█████████████████████████████████████████████████                              | 5616/9046 [10:15<04:37, 12.34it/s]

Invalid CIF file with no structures!


 63%|█████████████████████████████████████████████████▍                             | 5657/9046 [10:20<05:14, 10.76it/s]

Invalid CIF file with no structures!


 63%|█████████████████████████████████████████████████▊                             | 5700/9046 [10:22<03:41, 15.10it/s]

Invalid CIF file with no structures!


 63%|██████████████████████████████████████████████████                             | 5738/9046 [10:27<04:56, 11.16it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 64%|██████████████████████████████████████████████████▎                            | 5755/9046 [10:29<05:06, 10.72it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!


 64%|██████████████████████████████████████████████████▌                            | 5795/9046 [10:35<06:13,  8.71it/s]

Invalid CIF file with no structures!


 64%|██████████████████████████████████████████████████▋                            | 5805/9046 [10:36<06:16,  8.62it/s]

Invalid CIF file with no structures!


 64%|██████████████████████████████████████████████████▊                            | 5820/9046 [10:37<05:21, 10.04it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 64%|██████████████████████████████████████████████████▉                            | 5828/9046 [10:38<05:28,  9.79it/s]

Invalid CIF file with no structures!


 65%|███████████████████████████████████████████████████                            | 5846/9046 [10:41<07:09,  7.45it/s]

Invalid CIF file with no structures!


 65%|███████████████████████████████████████████████████▍                           | 5896/9046 [10:43<03:14, 16.21it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
initial_value must be str or None, not tuple


 65%|███████████████████████████████████████████████████▋                           | 5918/9046 [10:45<04:04, 12.81it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!Invalid CIF file with no structures!

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 66%|████████████████████████████████████████████████████▌                          | 6013/9046 [10:54<02:04, 24.32it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 67%|████████████████████████████████████████████████████▋                          | 6027/9046 [10:58<04:59, 10.07it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 67%|████████████████████████████████████████████████████▋                          | 6038/9046 [11:06<11:32,  4.35it/s]

Invalid CIF file with no structures!


 68%|█████████████████████████████████████████████████████▋                         | 6143/9046 [11:10<03:29, 13.85it/s]

Invalid CIF file with no structures!


 68%|█████████████████████████████████████████████████████▋                         | 6153/9046 [11:12<04:41, 10.29it/s]

Invalid CIF file with no structures!Invalid CIF file with no structures!



 68%|█████████████████████████████████████████████████████▊                         | 6162/9046 [11:17<09:27,  5.08it/s]

Invalid CIF file with no structures!

 68%|█████████████████████████████████████████████████████▉                         | 6178/9046 [11:17<05:58,  8.01it/s]


Invalid CIF file with no structures!


 69%|██████████████████████████████████████████████████████▏                        | 6209/9046 [11:19<03:54, 12.10it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 69%|██████████████████████████████████████████████████████▎                        | 6225/9046 [11:23<06:32,  7.18it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!

 69%|██████████████████████████████████████████████████████▌                        | 6244/9046 [11:26<06:46,  6.90it/s]




Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!
Invalid CIF file with no structures!


 70%|███████████████████████████████████████████████████████▏                       | 6324/9046 [11:32<05:01,  9.03it/s]

Invalid CIF file with no structures!


 70%|███████████████████████████████████████████████████████▍                       | 6349/9046 [11:33<03:58, 11.33it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!Invalid CIF file with no structures!

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 70%|███████████████████████████████████████████████████████▌                       | 6359/9046 [11:35<04:38,  9.64it/s]

Invalid CIF file with no structures!


 71%|███████████████████████████████████████████████████████▉                       | 6407/9046 [11:38<03:23, 12.98it/s]

Invalid CIF file with no structures!


 71%|████████████████████████████████████████████████████████                       | 6423/9046 [11:40<04:07, 10.60it/s]

Invalid CIF file with no structures!


 71%|████████████████████████████████████████████████████████▏                      | 6430/9046 [11:41<04:09, 10.50it/s]

Invalid CIF file with no structures!


 71%|████████████████████████████████████████████████████████▎                      | 6454/9046 [11:43<03:52, 11.16it/s]

Invalid CIF file with no structures!


 72%|████████████████████████████████████████████████████████▌                      | 6471/9046 [11:45<04:01, 10.66it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 72%|████████████████████████████████████████████████████████▌                      | 6474/9046 [11:48<06:23,  6.71it/s]

Invalid CIF file with no structures!


 72%|████████████████████████████████████████████████████████▋                      | 6497/9046 [11:51<06:00,  7.07it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 73%|█████████████████████████████████████████████████████████▎                     | 6559/9046 [11:54<03:45, 11.01it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!

 73%|█████████████████████████████████████████████████████████▎                     | 6561/9046 [11:55<04:23,  9.44it/s]




 73%|█████████████████████████████████████████████████████████▌                     | 6591/9046 [11:57<03:47, 10.79it/s]

Invalid CIF file with no structures!


 73%|█████████████████████████████████████████████████████████▌                     | 6593/9046 [11:58<04:14,  9.65it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 73%|█████████████████████████████████████████████████████████▋                     | 6600/9046 [12:01<07:09,  5.70it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 73%|█████████████████████████████████████████████████████████▉                     | 6638/9046 [12:02<03:24, 11.80it/s]

Invalid CIF file with no structures!


 74%|██████████████████████████████████████████████████████████▏                    | 6661/9046 [12:04<03:16, 12.16it/s]

Invalid CIF file with no structures!


 74%|██████████████████████████████████████████████████████████▏                    | 6663/9046 [12:05<04:02,  9.81it/s]

Invalid CIF file with no structures!


 74%|██████████████████████████████████████████████████████████▎                    | 6680/9046 [12:07<03:51, 10.21it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 74%|██████████████████████████████████████████████████████████▍                    | 6697/9046 [12:07<02:34, 15.25it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Timer expired
Invalid CIF file with no structures!


 74%|██████████████████████████████████████████████████████████▌                    | 6701/9046 [12:12<07:42,  5.07it/s]

Invalid CIF file with no structures!


 75%|██████████████████████████████████████████████████████████▉                    | 6755/9046 [12:15<03:42, 10.27it/s]

Invalid CIF file with no structures!Invalid CIF file with no structures!

Invalid CIF file with no structures!
Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!


 75%|███████████████████████████████████████████████████████████                    | 6764/9046 [12:20<06:33,  5.80it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 76%|███████████████████████████████████████████████████████████▊                   | 6843/9046 [12:25<03:17, 11.14it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 76%|███████████████████████████████████████████████████████████▊                   | 6849/9046 [12:27<04:13,  8.67it/s]

Invalid CIF file with no structures!


 76%|████████████████████████████████████████████████████████████                   | 6877/9046 [12:27<02:47, 12.93it/s]

Invalid CIF file with no structures!


 76%|████████████████████████████████████████████████████████████▏                  | 6886/9046 [12:28<02:39, 13.53it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 76%|████████████████████████████████████████████████████████████▎                  | 6905/9046 [12:30<03:16, 10.91it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 76%|████████████████████████████████████████████████████████████▎                  | 6908/9046 [12:32<04:27,  7.99it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 77%|████████████████████████████████████████████████████████████▊                  | 6957/9046 [12:37<03:37,  9.60it/s]

Invalid CIF file with no structures!


 77%|████████████████████████████████████████████████████████████▊                  | 6962/9046 [12:37<03:28, 10.01it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 77%|████████████████████████████████████████████████████████████▉                  | 6977/9046 [12:41<05:25,  6.36it/s]

Invalid CIF file with no structures!


 77%|█████████████████████████████████████████████████████████████▏                 | 7005/9046 [12:43<03:24,  9.98it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 78%|█████████████████████████████████████████████████████████████▎                 | 7027/9046 [12:48<05:10,  6.50it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 78%|█████████████████████████████████████████████████████████████▊                 | 7071/9046 [12:53<04:08,  7.94it/s]

Invalid CIF file with no structures!


 78%|█████████████████████████████████████████████████████████████▉                 | 7099/9046 [12:55<03:39,  8.86it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 79%|██████████████████████████████████████████████████████████████▎                | 7141/9046 [12:56<02:03, 15.38it/s]

Invalid CIF file with no structures!


 79%|██████████████████████████████████████████████████████████████▍                | 7146/9046 [12:58<02:43, 11.59it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 79%|██████████████████████████████████████████████████████████████▌                | 7164/9046 [13:00<02:57, 10.61it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!


 80%|██████████████████████████████████████████████████████████████▊                | 7198/9046 [13:06<03:37,  8.49it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 80%|███████████████████████████████████████████████████████████████                | 7215/9046 [13:08<03:34,  8.52it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 80%|███████████████████████████████████████████████████████████████▏               | 7234/9046 [13:08<02:44, 11.05it/s]

Invalid CIF file with no structures!


 80%|███████████████████████████████████████████████████████████████▍               | 7268/9046 [13:12<03:12,  9.25it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 80%|███████████████████████████████████████████████████████████████▍               | 7271/9046 [13:12<03:17,  8.99it/s]

Invalid CIF file with no structures!


 81%|███████████████████████████████████████████████████████████████▋               | 7291/9046 [13:14<02:52, 10.19it/s]

Invalid CIF file with no structures!


 81%|███████████████████████████████████████████████████████████████▊               | 7305/9046 [13:16<03:14,  8.94it/s]

Invalid CIF file with no structures!


 81%|████████████████████████████████████████████████████████████████▏              | 7343/9046 [13:21<03:21,  8.45it/s]

Invalid CIF file with no structures!


No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!


 81%|████████████████████████████████████████████████████████████████▎              | 7360/9046 [13:24<04:09,  6.75it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 82%|████████████████████████████████████████████████████████████████▊              | 7418/9046 [13:28<02:11, 12.41it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 82%|████████████████████████████████████████████████████████████████▊              | 7421/9046 [13:31<04:00,  6.75it/s]

Invalid CIF file with no structures!


 83%|█████████████████████████████████████████████████████████████████▎             | 7475/9046 [13:35<02:44,  9.54it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 83%|█████████████████████████████████████████████████████████████████▎             | 7481/9046 [13:38<03:55,  6.65it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 83%|█████████████████████████████████████████████████████████████████▊             | 7537/9046 [13:42<02:16, 11.02it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 84%|█████████████████████████████████████████████████████████████████▉             | 7555/9046 [13:46<03:13,  7.72it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 84%|██████████████████████████████████████████████████████████████████▎            | 7596/9046 [13:51<02:50,  8.51it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 84%|██████████████████████████████████████████████████████████████████▍            | 7604/9046 [13:55<04:06,  5.85it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!

 85%|██████████████████████████████████████████████████████████████████▉            | 7668/9046 [13:58<02:20,  9.81it/s]


Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 85%|███████████████████████████████████████████████████████████████████            | 7676/9046 [14:04<03:25,  6.65it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 86%|███████████████████████████████████████████████████████████████████▊           | 7765/9046 [14:08<01:49, 11.70it/s]

Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 86%|███████████████████████████████████████████████████████████████████▉           | 7773/9046 [14:11<02:29,  8.51it/s]

Invalid CIF file with no structures!


 86%|████████████████████████████████████████████████████████████████████▏          | 7805/9046 [14:12<01:41, 12.22it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 86%|████████████████████████████████████████████████████████████████████▏          | 7808/9046 [14:13<01:56, 10.63it/s]

Invalid CIF file with no structures!


 87%|████████████████████████████████████████████████████████████████████▍          | 7830/9046 [14:14<01:32, 13.15it/s]

Invalid CIF file with no structures!


 87%|████████████████████████████████████████████████████████████████████▊          | 7886/9046 [14:21<02:37,  7.38it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 87%|█████████████████████████████████████████████████████████████████████          | 7908/9046 [14:23<02:29,  7.64it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 88%|█████████████████████████████████████████████████████████████████████▎         | 7932/9046 [14:25<02:04,  8.93it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 88%|█████████████████████████████████████████████████████████████████████▍         | 7948/9046 [14:30<03:04,  5.95it/s]

Invalid CIF file with no structures!
Timer expired
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 88%|█████████████████████████████████████████████████████████████████████▊         | 7989/9046 [14:33<02:03,  8.53it/s]

Invalid CIF file with no structures!


 89%|██████████████████████████████████████████████████████████████████████         | 8023/9046 [14:34<01:22, 12.37it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!


 89%|██████████████████████████████████████████████████████████████████████▌        | 8087/9046 [14:42<01:35, 10.08it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


Occupancy 2.0 exceeded tolerance.
No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!


 90%|███████████████████████████████████████████████████████████████████████▎       | 8164/9046 [14:50<01:33,  9.46it/s]

Invalid CIF file with no structures!


 90%|███████████████████████████████████████████████████████████████████████▍       | 8174/9046 [14:51<01:33,  9.37it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 90%|███████████████████████████████████████████████████████████████████████▍       | 8182/9046 [14:53<02:06,  6.83it/s]

Invalid CIF file with no structures!


 91%|███████████████████████████████████████████████████████████████████████▋       | 8204/9046 [14:56<01:47,  7.84it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 91%|████████████████████████████████████████████████████████████████████████▏      | 8261/9046 [15:01<01:20,  9.71it/s]

Invalid CIF file with no structures!Invalid CIF file with no structures!

Invalid CIF file with no structures!
Invalid CIF file with no structures!Invalid CIF file with no structures!

Invalid CIF file with no structures!


 92%|████████████████████████████████████████████████████████████████████████▌      | 8305/9046 [15:05<01:04, 11.42it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 92%|████████████████████████████████████████████████████████████████████████▋      | 8317/9046 [15:07<01:22,  8.83it/s]

Invalid CIF file with no structures!


 93%|█████████████████████████████████████████████████████████████████████████      | 8369/9046 [15:08<00:37, 18.25it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 93%|█████████████████████████████████████████████████████████████████████████▏     | 8375/9046 [15:10<00:55, 12.00it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


No structure parsed for section 1 in CIF.
Occupancy 2.0 exceeded tolerance.


Invalid CIF file with no structures!

 93%|█████████████████████████████████████████████████████████████████████████▎     | 8399/9046 [15:12<01:00, 10.61it/s]

Invalid CIF file with no structures!

Invalid CIF file with no structures!
Invalid CIF file with no structures!Invalid CIF file with no structures!



 93%|█████████████████████████████████████████████████████████████████████████▍     | 8403/9046 [15:15<01:36,  6.69it/s]

Invalid CIF file with no structures!


 93%|█████████████████████████████████████████████████████████████████████████▋     | 8440/9046 [15:18<01:02,  9.66it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 94%|██████████████████████████████████████████████████████████████████████████▏    | 8489/9046 [15:25<01:09,  8.06it/s]

Invalid CIF file with no structures!Invalid CIF file with no structures!

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


  val = (cos_alpha * cos_beta - cos_gamma) / (sin_alpha * sin_beta)


cannot convert float NaN to integer


 94%|██████████████████████████████████████████████████████████████████████████▍    | 8519/9046 [15:27<00:54,  9.69it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 94%|██████████████████████████████████████████████████████████████████████████▍    | 8525/9046 [15:33<02:19,  3.72it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 95%|██████████████████████████████████████████████████████████████████████████▊    | 8572/9046 [15:38<01:17,  6.13it/s]

Invalid CIF file with no structures!


 96%|███████████████████████████████████████████████████████████████████████████▌   | 8647/9046 [15:43<00:38, 10.50it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!Invalid CIF file with no structures!

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 96%|████████████████████████████████████████████████████████████████████████████   | 8713/9046 [15:50<00:28, 11.75it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 96%|████████████████████████████████████████████████████████████████████████████▏  | 8718/9046 [15:54<00:43,  7.62it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 97%|████████████████████████████████████████████████████████████████████████████▌  | 8771/9046 [15:56<00:23, 11.57it/s]

Invalid CIF file with no structures!


 97%|████████████████████████████████████████████████████████████████████████████▋  | 8778/9046 [15:57<00:24, 11.15it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 97%|████████████████████████████████████████████████████████████████████████████▊  | 8790/9046 [15:59<00:28,  9.14it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 97%|████████████████████████████████████████████████████████████████████████████▊  | 8797/9046 [16:01<00:29,  8.44it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!


 98%|█████████████████████████████████████████████████████████████████████████████▍ | 8869/9046 [16:05<00:12, 14.64it/s]

Invalid CIF file with no structures!


 98%|█████████████████████████████████████████████████████████████████████████████▋ | 8900/9046 [16:08<00:11, 12.50it/s]

Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!
Invalid CIF file with no structures!


 98%|█████████████████████████████████████████████████████████████████████████████▊ | 8907/9046 [16:11<00:17,  7.99it/s]

Invalid CIF file with no structures!


100%|███████████████████████████████████████████████████████████████████████████████| 9046/9046 [16:17<00:00,  9.25it/s]


In [21]:
import cloudpickle as pickle
# with open(fname,'wb') as f:
#    pickle.dump(pred_crys, f)
# f.close()
from copy import copy
pred_crystal = copy(pred_crys)
# with open(fname, 'rb') as f:
#     pred_crystal = pickle.load(f)

In [22]:
# nsamps = 10000
nsamps = 9046
if len(pred_crystal) > nsamps:
    # random_idx = np.random.choice(len(pred_crystal), 10000)
    # pred_crys = [pred_crystal[x] for x in random_idx]
    pred_crys = pred_crystal[:nsamps]

In [23]:
len(pred_crys)

8274

In [24]:
# if len(pred_crys) > 10000:
#     random_idx = np.random.choice(len(pred_crys), 10000)
#     pred_crys = [pred_crys[x] for x in random_idx]

In [25]:
samples_path

'llamat2_9064_llamat2_1758654262.csv'

In [26]:
os.makedirs(samples_path[:-4], exist_ok=True)

In [27]:
from tqdm import tqdm

In [28]:
pwd

'/home/scai/phd/aiz218326/github/crystal-text-llm'

In [29]:
for i, cc in tqdm(enumerate(pred_crys)):
    cc.structure.to_file(f'./{samples_path[:-4]}/{i}.cif')

8274it [01:04, 128.50it/s]


In [53]:
validss = []
os.makedirs(f"{samples_path[:-4]}-valids", exist_ok=True)
for idx, cc in enumerate(pred_crys):
    if len(validss)<10:
        if cc.struct_valid & cc.comp_valid:
            
            
            cc.structure.to_file(f'./{samples_path[:-4]}-valids/{idx}.cif')
            validss.append(cc)
            print(idx, f'./{samples_path[:-4]}-valids/{idx}.cif')


2 ./llamat3_cif_adapter_cleaned-valids/2.cif
10 ./llamat3_cif_adapter_cleaned-valids/10.cif
13 ./llamat3_cif_adapter_cleaned-valids/13.cif
17 ./llamat3_cif_adapter_cleaned-valids/17.cif
18 ./llamat3_cif_adapter_cleaned-valids/18.cif
21 ./llamat3_cif_adapter_cleaned-valids/21.cif
22 ./llamat3_cif_adapter_cleaned-valids/22.cif
23 ./llamat3_cif_adapter_cleaned-valids/23.cif
24 ./llamat3_cif_adapter_cleaned-valids/24.cif
25 ./llamat3_cif_adapter_cleaned-valids/25.cif


In [30]:
samples_path

'llamat2_9064_llamat2_1758654262.csv'

In [31]:
import ase

In [32]:
ase.io.read

<function ase.io.formats.read(filename: Union[str, pathlib.PurePath, IO], index: Any = None, format: Optional[str] = None, parallel: bool = True, do_not_split_by_at_sign: bool = False, **kwargs) -> Union[ase.atoms.Atoms, List[ase.atoms.Atoms]]>

In [34]:
# ase.io.read('checkme.cif')

In [35]:
test_cov_path = 'data/basic/test.csv'

In [36]:
gt_cov_cifs = pd.read_csv(test_cov_path)["cif"]

gt_cov_crys_fn = test_cov_path.replace(".csv", "_cached.pkl")

In [37]:
if not os.path.exists(gt_cov_crys_fn):
    gt_cov_crys = p_map(cif_str_to_crystal, gt_cov_cifs)
    pickle.dump(gt_cov_crys, open(gt_cov_crys_fn, "wb"))
    print('saved')
else:
    print('loading ',gt_cov_crys_fn)
    gt_cov_crys = pickle.load(open(gt_cov_crys_fn, "rb"))

 28%|██████████████████████▎                                                        | 2561/9046 [04:37<11:11,  9.65it/s]

Timer expired




Timer expired


 33%|█████████████████████████▋                                                     | 2946/9046 [05:15<07:53, 12.89it/s]

Timer expired


 33%|█████████████████████████▉                                                     | 2966/9046 [05:18<09:54, 10.23it/s]

Timer expired


 37%|█████████████████████████████▏                                                 | 3345/9046 [05:56<09:04, 10.48it/s]

Timer expired




Timer expired


 37%|█████████████████████████████▍                                                 | 3367/9046 [06:03<16:41,  5.67it/s]

Timer expired


 38%|█████████████████████████████▉                                                 | 3433/9046 [06:04<06:26, 14.53it/s]

Timer expired




Timer expired




Timer expired




Timer expired


 39%|██████████████████████████████▉                                                | 3544/9046 [06:21<11:44,  7.81it/s]

Timer expired
Timer expired


 39%|██████████████████████████████▉                                                | 3546/9046 [06:24<19:42,  4.65it/s]

Timer expired




Timer expired


 41%|████████████████████████████████▏                                              | 3687/9046 [06:40<14:23,  6.21it/s]

Timer expired


 42%|█████████████████████████████████▎                                             | 3820/9046 [06:51<13:47,  6.31it/s]

Timer expired


 42%|█████████████████████████████████▍                                             | 3824/9046 [06:51<13:46,  6.32it/s]

Timer expired


 42%|█████████████████████████████████▍                                             | 3835/9046 [06:55<20:20,  4.27it/s]

Timer expired




Timer expired


 43%|██████████████████████████████████▎                                            | 3931/9046 [07:06<11:37,  7.34it/s]

Timer expired


 44%|██████████████████████████████████▌                                            | 3955/9046 [07:07<08:45,  9.69it/s]

Timer expired


 44%|██████████████████████████████████▊                                            | 3985/9046 [07:09<07:05, 11.89it/s]

Timer expired


 44%|██████████████████████████████████▉                                            | 3997/9046 [07:11<08:30,  9.90it/s]

Timer expired


 44%|██████████████████████████████████▉                                            | 4006/9046 [07:13<10:02,  8.37it/s]

Timer expired


 45%|███████████████████████████████████▏                                           | 4029/9046 [07:14<08:01, 10.41it/s]

Timer expired
Timer expired

 45%|███████████████████████████████████▍                                           | 4054/9046 [07:15<06:40, 12.46it/s]




 45%|███████████████████████████████████▋                                           | 4093/9046 [07:18<05:41, 14.52it/s]

Timer expired


 46%|███████████████████████████████████▉                                           | 4116/9046 [07:23<11:41,  7.03it/s]

Timer expired




Timer expired


 47%|████████████████████████████████████▉                                          | 4227/9046 [07:37<09:54,  8.11it/s]

Timer expired




Timer expired




Timer expired




Timer expired


 87%|█████████████████████████████████████████████████████████████████████          | 7908/9046 [14:05<02:09,  8.76it/s]

Timer expired




Timer expired


 87%|█████████████████████████████████████████████████████████████████████          | 7913/9046 [14:08<03:51,  4.90it/s]

Timer expired
Timer expired


 88%|█████████████████████████████████████████████████████████████████████▊         | 7987/9046 [14:11<01:10, 15.08it/s]

Timer expired




Timer expired
Timer expired


 90%|██████████████████████████████████████████████████████████████████████▋        | 8098/9046 [14:24<01:24, 11.21it/s]

Timer expired




Timer expired




Timer expired


 91%|███████████████████████████████████████████████████████████████████████▋       | 8204/9046 [14:36<01:29,  9.41it/s]

Timer expired




Timer expired


 94%|██████████████████████████████████████████████████████████████████████████▏    | 8502/9046 [15:06<01:13,  7.41it/s]

Timer expired




Timer expired


 94%|██████████████████████████████████████████████████████████████████████████▌    | 8533/9046 [15:11<01:12,  7.10it/s]

Timer expired
Timer expired
Timer expired
Timer expired
Timer expired


 95%|██████████████████████████████████████████████████████████████████████████▉    | 8587/9046 [15:15<00:43, 10.55it/s]

Timer expired


 96%|███████████████████████████████████████████████████████████████████████████▊   | 8688/9046 [15:28<00:34, 10.41it/s]

Timer expired


 96%|████████████████████████████████████████████████████████████████████████████▏  | 8727/9046 [15:32<00:31,  9.99it/s]

Timer expired
Timer expired


 97%|████████████████████████████████████████████████████████████████████████████▎  | 8732/9046 [15:35<01:01,  5.12it/s]

Timer expired


 98%|█████████████████████████████████████████████████████████████████████████████▋ | 8892/9046 [15:51<00:19,  7.91it/s]

Timer expired


 99%|█████████████████████████████████████████████████████████████████████████████▉ | 8928/9046 [15:54<00:12,  9.70it/s]

Timer expired


 99%|██████████████████████████████████████████████████████████████████████████████▍| 8983/9046 [15:58<00:05, 11.24it/s]

Timer expired


100%|███████████████████████████████████████████████████████████████████████████████| 9046/9046 [16:01<00:00,  9.41it/s]


saved


In [38]:
test_novelty_path = 'data/basic/train.csv'
gt_novelty_cifs = pd.read_csv(test_novelty_path)["cif"]

In [44]:
gt_novelty_cifs = pd.read_csv(test_novelty_path)["cif"]

gt_novelty_crys_fn = test_novelty_path.replace(".csv", "_cached.pkl")

In [48]:
if not os.path.exists(gt_novelty_crys_fn):
    gt_novelty_crys = p_map(cif_str_to_crystal, gt_novelty_cifs)
    pickle.dump(gt_novelty_crys, open(gt_novelty_crys_fn, "wb"))
    print('saved')
else:
    print('loading')
    gt_novelty_crys = pickle.load(open(gt_novelty_crys_fn, "rb"))
    print('loaded from',gt_novelty_crys_fn)

loading
loaded from data/basic/train_cached.pkl


In [49]:
type(gt_novelty_crys)

list

In [50]:
valid_crys = [x for x in pred_crys if x.valid]

print("Number of pred crystals: ", len(pred_crys))
print("Number of valid crystals: ", len(valid_crys))
print("ratio", len(valid_crys)/len(pred_crys))

Number of pred crystals:  8274
Number of valid crystals:  7252
ratio 0.8764805414551607


In [65]:
994/1000, 868/994 #llamat-2-cif-congen

(0.994, 0.8732394366197183)

In [66]:
491/1000, 209/491 #llamat-3-cif-congen

(0.491, 0.4256619144602851)

In [25]:
9958/10000, 8650/10000, 8650/9958

(0.9958, 0.865, 0.8686483229564169)

In [51]:
metrics = CDVAEGenEval(
        pred_crys, 
        gt_cov_crys,
        gt_novelty_crys,
        n_samples=len(valid_crys), 
        eval_model_name='mp20'
    )#.get_metrics()

In [52]:
metrics.get_metrics()

{'comp_valid': 0.8954556441866086,
 'struct_valid': 0.9772782209330433,
 'valid': 0.8764805414551607,
 'comp_div': 15.520819779408347,
 'struct_div': 0.9002762982343132,
 'wdist_density': 0.17426589733296383,
 'wdist_num_elems': 0.01680809819371447,
 'cov_recall': 0.9938837920489296,
 'cov_precision': 0.9951737451737451,
 'amsd_recall': 0.11372955840352154,
 'amsd_precision': 0.133831247137884,
 'amcd_recall': 0.5117096469684149,
 'amcd_precision': 0.20458088840637634,
 'struc_novelty': 0.5124103695532267,
 'comp_novelty': 0.34624931053502483,
 'novelty': 0.6840871483728627}

In [60]:
metrics = {'comp_valid': 0.6505,
 'struct_valid': 0.7281,
 'valid': 0.5199,
 'comp_div': 15.049131375700867,
 'struct_div': 0.646087535174593,
 'wdist_density': 10.404570939360589,
 'wdist_num_elems': 0.12935201105231345,
 'cov_recall': 0.9593515903229768,
 'cov_precision': 0.9824966339680707,
 'amsd_recall': 0.19431147943564403,
 'amsd_precision': 0.21623487953435078,
 'amcd_recall': 4.174398269649543,
 'amcd_precision': 3.7243309433717013,
 'struc_novelty': 0.8726678207347567,
 'comp_novelty': 0.7357184073860358,
 'novelty': 0.9224850932871707}

In [61]:
metrics_llamat3 = {'comp_valid': 0.6028574617702869,
 'struct_valid': 0.6742940060274584,
 'valid': 0.45563120884027236,
 'comp_div': 14.813907952342776,
 'struct_div': 0.5931430120303295,
 'wdist_density': 12.35512616311076,
 'wdist_num_elems': 0.2607242443028679,
 'cov_recall': 0.9247206189365099,
 'cov_precision': 0.9936305732484076,
 'amsd_recall': 0.21367498510494704,
 'amsd_precision': 0.22137167569311592,
 'amcd_recall': 4.792127000566393,
 'amcd_precision': 3.9909140918141657,
 'struc_novelty': 0.9152376286134248,
 'comp_novelty': 0.7893189612934836,
 'novelty': 0.9468397844194023}

In [62]:
metrics_llamat2 = {'comp_valid': 0.8783,
 'struct_valid': 0.9951,
 'valid': 0.8755,
 'comp_div': 15.537416884939825,
 'struct_div': 0.9754777500902315,
 'wdist_density': 0.6225818111254449,
 'wdist_num_elems': 0.022798721630026456,
 'cov_recall': 0.9863686601989439,
 'cov_precision': 0.99565962307253,
 'amsd_recall': 0.13205219925068698,
 'amsd_precision': 0.11404735865757508,
 'amcd_recall': 3.237658132946266,
 'amcd_precision': 2.5953085459418874,
 'struc_novelty': 0.39771559109080523,
 'comp_novelty': 0.36070816676185036,
 'novelty': 0.5888063963449457}

In [75]:
metrics_old = {'comp_valid': 0.8726651938140189,
 'struct_valid': 0.9945772243422374,
 'valid': 0.8686483229564169,
 'comp_div': 15.435847558071819,
 'struct_div': 0.997068275992884,
 'wdist_density': 0.6633393952088423,
 'wdist_num_elems': 0.07941215035802868,
 'cov_recall': 0.9878423185558148,
 'cov_precision': 0.9922543352601156,
 'amsd_recall': 0.12996201483321324,
 'amsd_precision': 0.11859335672718942,
 'amcd_recall': 3.002275575248583,
 'amcd_precision': 2.6840815605625474,
 'struc_novelty': 0.41063583815028903,
 'comp_novelty': 0.41132947976878614,
 'novelty': 0.6338728323699422}
for key in metrics.keys():
    metrics[key] = round(metrics[key], 3);
    metrics_llamat2[key] = round(metrics_llamat2[key], 3);
    metrics_llamat3[key] = round(metrics_llamat3[key], 3);


In [63]:
metrics_llamat2_congen = {'comp_valid': 0.65,
 'struct_valid': 0.728,
 'valid': 0.52,
 'comp_div': 15.049,
 'struct_div': 0.646,
 'wdist_density': 10.405,
 'wdist_num_elems': 0.129,
 'cov_recall': 0.959,
 'cov_precision': 0.982,
 'amsd_recall': 0.194,
 'amsd_precision': 0.216,
 'amcd_recall': 4.174,
 'amcd_precision': 3.724,
 'struc_novelty': 0.873,
 'comp_novelty': 0.736,
 'novelty': 0.922,
 'method': 'llamat2-conditional'}

# {'comp_valid': 0.8782696177062375,
#  'struct_valid': 0.9929577464788732,
#  'valid': 0.8732394366197183,
#  'comp_div': 15.571199340292052,
#  'struct_div': 0.9869229138181784,
#  'wdist_density': 0.6994115001718545,
#  'wdist_num_elems': 0.07296959421940691,
#  'cov_recall': 0.9292643988701953,
#  'cov_precision': 0.9976958525345622,
#  'amsd_recall': 0.1893616040611489,
#  'amsd_precision': 0.11334040556446105,
#  'amcd_recall': 5.15971819523058,
#  'amcd_precision': 2.630082032588238,
#  'struc_novelty': 0.402073732718894,
#  'comp_novelty': 0.3444700460829493,
#  'novelty': 0.5841013824884793}

In [64]:
os.listdir('./relaxed/relaxed_structures_llamat2_m3gnet_str/')[:5]

['relaxed_2250.cif',
 'relaxed_569.cif',
 'relaxed_6132.cif',
 'relaxed_3167.cif',
 'relaxed_3702.cif']

In [65]:
ffff = ase.io.read('./relaxed/relaxed_structures_llamat2_m3gnet_str/relaxed_2250.cif')

In [66]:
ffff.get_chemical_formula()

'Be4GaRu'

In [67]:
metrics_llamat3_congen = {'comp_valid': 0.6162117452440034,
 'struct_valid': 0.8112489660876757,
 'valid': 0.5062034739454094,
 'comp_div': 15.28758984347853,
 'struct_div': 0.4821768826321802,
 'wdist_density': 4.394579753143331,
 'wdist_num_elems': 0.2793940628797764,
 'cov_recall': 0.9360244648318042,
 'cov_precision': 0.996078431372549,
 'amsd_recall': 0.1957761116921352,
 'amsd_precision': 0.20102959815274782,
 'amcd_recall': 3.761770727043405,
 'amcd_precision': 2.6059243799555727,
 'struc_novelty': 0.9274509803921569,
 'comp_novelty': 0.6186274509803922,
 'novelty': 0.953921568627451,
  'method': 'llamat3-conditional'}


# {'comp_valid': 0.5621181262729125,
#  'struct_valid': 0.6659877800407332,
#  'valid': 0.4256619144602851,
#  'comp_div': 15.04533186738125,
#  'struct_div': 0.5125635516680817,
#  'wdist_density': 11.791204907329718,
#  'wdist_num_elems': 0.31365524639085507,
#  'cov_recall': 0.5012894510622621,
#  'cov_precision': 1.0,
#  'amsd_recall': 0.39510580924426775,
#  'amsd_precision': 0.2147817139006082,
#  'amcd_recall': 7.651830772483437,
#  'amcd_precision': 4.144427428076488,
#  'struc_novelty': 0.9521531100478469,
#  'comp_novelty': 0.8277511961722488,
#  'novelty': 0.9665071770334929}

In [68]:
# metriccs = metrics = {k: v for k,v in metrics.items()}
# metrics['method'] = '17k_14k_cif'
metrics['method'] = 'llamat3_1400_cif'
metrics_llamat3['method'] = 'llamat3_cif'
metrics_llamat2['method'] = 'llamat2_cif';
metrics_llamat3_congen['method'] = 'llamat3_cif_congen'
metrics_llamat2_congen['method'] = 'llamat2_cif_congen'

In [70]:
# metrics

In [72]:
results_df_fn = 'generative_model_results_17k_14k.csv'
results_df = pd.read_csv(results_df_fn)

results_df = pd.concat([
    results_df,
    pd.DataFrame([metrics_llamat2]),
    pd.DataFrame([metrics_llamat3]),
    pd.DataFrame([metrics]),
    pd.DataFrame([metrics_llamat2_congen]),
    pd.DataFrame([metrics_llamat3_congen])
], ignore_index = True)



In [73]:
results_df

Unnamed: 0,method,struct_valid,comp_valid,cov_recall,cov_precision,wdist_density,wdist_num_elems,valid,comp_div,struct_div,amsd_recall,amsd_precision,amcd_recall,amcd_precision,struc_novelty,comp_novelty,novelty
0,Train,1.0,0.9113,1.0,1.0,0.051,0.016,,,,,,,,,,
1,FTCP,0.0155,0.4837,0.047,0.0009,23.71,0.736,,,,,,,,,,
2,GSchNet,0.9965,0.7596,0.3833,0.9957,3.034,0.641,,,,,,,,,,
3,PGSchNet,0.7751,0.764,0.4193,0.9974,4.04,0.623,,,,,,,,,,
4,CDVAE,1.0,0.867,0.9915,0.9949,0.688,1.432,,,,,,,,,,
5,LM-CH,0.8481,0.8355,0.9925,0.9789,0.864,0.132,,,,,,,,,,
6,LM-AC,0.9581,0.8887,0.996,0.9855,0.696,0.092,,,,,,,,,,
7,llamat2_cif,0.9951,0.8783,0.986369,0.99566,0.622582,0.022799,0.8755,15.537417,0.975478,0.132052,0.114047,3.237658,2.595309,0.397716,0.360708,0.588806
8,llamat3_cif,0.674294,0.602857,0.924721,0.993631,12.355126,0.260724,0.455631,14.813908,0.593143,0.213675,0.221372,4.792127,3.990914,0.915238,0.789319,0.94684
9,llamat3_1400_cif,0.7281,0.6505,0.959352,0.982497,10.404571,0.129352,0.5199,15.049131,0.646088,0.194311,0.216235,4.174398,3.724331,0.872668,0.735718,0.922485


In [36]:
results_df

Unnamed: 0,method,struct_valid,comp_valid,cov_recall,cov_precision,wdist_density,wdist_num_elems,valid,comp_div,struct_div,amsd_recall,amsd_precision,amcd_recall,amcd_precision,struc_novelty,comp_novelty,novelty
0,Train,1.0,0.9113,1.0,1.0,0.051,0.016,,,,,,,,,,
1,FTCP,0.0155,0.4837,0.047,0.0009,23.71,0.736,,,,,,,,,,
2,GSchNet,0.9965,0.7596,0.3833,0.9957,3.034,0.641,,,,,,,,,,
3,PGSchNet,0.7751,0.764,0.4193,0.9974,4.04,0.623,,,,,,,,,,
4,CDVAE,1.0,0.867,0.9915,0.9949,0.688,1.432,,,,,,,,,,
5,LM-CH,0.8481,0.8355,0.9925,0.9789,0.864,0.132,,,,,,,,,,
6,LM-AC,0.9581,0.8887,0.996,0.9855,0.696,0.092,,,,,,,,,,
7,llamat2_cif,0.995,0.878,0.986,0.996,0.623,0.023,0.875,15.537,0.975,0.132,0.114,3.238,2.595,0.398,0.361,0.589
8,llamat3_cif,0.674,0.603,0.925,0.994,12.355,0.261,0.456,14.814,0.593,0.214,0.221,4.792,3.991,0.915,0.789,0.947
9,llamat3_1400_cif,0.728,0.65,0.959,0.982,10.405,0.129,0.52,15.049,0.646,0.194,0.216,4.174,3.724,0.873,0.736,0.922


In [85]:
results_df_fn = 'final_results_with_congen.csv'
results_df.to_csv(results_df_fn, index=False)
# results_df_fn = 'llamat2_cif_results.csv'

In [38]:
results_df

Unnamed: 0,method,struct_valid,comp_valid,cov_recall,cov_precision,wdist_density,wdist_num_elems,valid,comp_div,struct_div,amsd_recall,amsd_precision,amcd_recall,amcd_precision,struc_novelty,comp_novelty,novelty
0,Train,1.0,0.9113,1.0,1.0,0.051,0.016,,,,,,,,,,
1,FTCP,0.0155,0.4837,0.047,0.0009,23.71,0.736,,,,,,,,,,
2,GSchNet,0.9965,0.7596,0.3833,0.9957,3.034,0.641,,,,,,,,,,
3,PGSchNet,0.7751,0.764,0.4193,0.9974,4.04,0.623,,,,,,,,,,
4,CDVAE,1.0,0.867,0.9915,0.9949,0.688,1.432,,,,,,,,,,
5,LM-CH,0.8481,0.8355,0.9925,0.9789,0.864,0.132,,,,,,,,,,
6,LM-AC,0.9581,0.8887,0.996,0.9855,0.696,0.092,,,,,,,,,,
7,llamat2_cif,0.995,0.878,0.986,0.996,0.623,0.023,0.875,15.537,0.975,0.132,0.114,3.238,2.595,0.398,0.361,0.589
8,llamat3_cif,0.674,0.603,0.925,0.994,12.355,0.261,0.456,14.814,0.593,0.214,0.221,4.792,3.991,0.915,0.789,0.947
9,llamat3_1400_cif,0.728,0.65,0.959,0.982,10.405,0.129,0.52,15.049,0.646,0.194,0.216,4.174,3.724,0.873,0.736,0.922


In [86]:
dfcgenl3 = pd.read_csv('llamat3_cif_adapter_cleaned.csv')
dfcgenl2 = pd.read_csv('llamat2_cif_adapter_cleaned.csv')

In [88]:
dfgentest = pd.read_csv("data/with_tags/test.csv").drop_duplicates().rename(columns={"spacegroup.number": "spacegroup_number"})

In [89]:
dfgentest.head()

Unnamed: 0.1,Unnamed: 0,material_id,formation_energy_per_atom,band_gap,pretty_formula,e_above_hull,elements,cif,spacegroup_number
0,6000,mp-10009,-0.575092,0.898,GaTe,0.0,"['Ga', 'Te']",# generated using pymatgen\ndata_GaTe\n_symmet...,194
1,37702,mp-1218989,-0.942488,0.0,SmThCN,0.044109,"['C', 'N', 'Sm', 'Th']",# generated using pymatgen\ndata_SmThCN\n_symm...,160
2,42245,mp-1225695,0.064863,0.0,CuNi,0.064863,"['Cu', 'Ni']",# generated using pymatgen\ndata_CuNi\n_symmet...,65
3,780,mp-1220884,-1.456116,0.0,NaTiVS4,0.0,"['Na', 'S', 'Ti', 'V']",# generated using pymatgen\ndata_NaTiVS4\n_sym...,8
4,35749,mp-1224266,0.024139,0.0,Ho3TmMn8,0.036496,"['Ho', 'Mn', 'Tm']",# generated using pymatgen\ndata_Ho3TmMn8\n_sy...,8


In [93]:
ase.io.read('./llamat2_cif_adapter_cleaned/1.cif')

Atoms(symbols='Pr2Ti2O6', pbc=True, cell=[[8.1, 0.0, 0.0], [6.113147599804454, 5.314078134823108, 0.0], [-7.014805770653953, -2.622728407209111, 3.0860647598549114]], spacegroup_kinds=...)

In [94]:
dfgentest[dfgentest.pretty_formula=='Pr2Ti2O6']

Unnamed: 0.1,Unnamed: 0,material_id,formation_energy_per_atom,band_gap,pretty_formula,e_above_hull,elements,cif,spacegroup_number


In [None]:

# def main(args):
    
    

    

    
    
    
    
   
    

    

    metrics = CDVAEGenEval(
        pred_crys, 
        gt_cov_crys,
        gt_novelty_crys,
        n_samples=len(valid_crys), 
        eval_model_name='mp20'
    ).get_metrics()

    metrics = {k: v for k,v in metrics.items()}
    metrics['method'] = args.model_name

    results_df = pd.read_csv(results_df_fn)

    results_df = pd.concat([
        results_df,
        pd.DataFrame([metrics])
    ])

    results_df.to_csv(results_df_fn, index=False)

    print(results_df)

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--model_name", type=str, required=True)
    parser.add_argument("--test_cov_path", type=str, default='data/basic/test.csv')
    parser.add_argument("--test_novelty_path", type=str, default='data/basic/train.csv')
    parser.add_argument("--samples_path", type=str, required=True)
    args = parser.parse_args()

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        main(args)

In [18]:
import pandas as pd

filename="/home/cse/btech/cs1200389/MatLlama/MatLLaMA/src/infer_logs/full_ex_results.csv"
df = pd.read_csv(filename)

for i in range(len(df)): 
    epochs = '1' if '1epochs' in df.iloc[i,0] else '2'
    name = 'LLaMa3' if 'llama3' in df.iloc[i,0] else 'LLama2'# if 'llama2' in df.iloc[i,0]
    name = 'LLaMaT3' if 'llamat3' in df.iloc[i,0] else 'LLaMaT2' if 'llamat2' in df.iloc[i,0] else name
    name += 'chat' if 'chat' in df.iloc[i,0] and 'nochat' not in df.iloc[i,0] else ""
    name = name + "_" + epochs
    df.iloc[i,0] = name

df.to_csv(filename.split('.')[0] + '_namefix.csv', index = False)
print(df)


        file_name  mat_sci_micro_f1  mat_sci_macro_f1  english_micro_f1  \
0    LLaMa3chat_1            77.344            69.767            86.118   
1        LLaMa3_1            85.895            79.106            86.233   
2       LLaMaT3_1            84.584            77.141            84.260   
3    LLama2chat_1            86.601            80.700            83.900   
4        LLama2_1            87.103            81.217            83.830   
5   LLaMaT2chat_1            88.976            83.565            84.542   
6       LLaMaT2_1            88.152            82.324            82.360   
7   LLaMaT3chat_1            89.059            83.183            87.738   
8    LLaMa3chat_2            84.715            78.406            87.145   
9       LLaMaT2_2            91.614            86.414            85.245   
10       LLaMa3_2            88.810            82.742            87.602   
11      LLaMaT3_2            89.611            83.883            86.227   
12       LLama2_2        