# Prepare the data for run the inferences in NOMAD

In [1]:
import pandas as pd
from tf_chpvk_pv.config import PROCESSED_DATA_DIR

df = pd.read_csv(PROCESSED_DATA_DIR / 'stable_compositions.csv')
df.rename(columns={'Unnamed: 0': 'composition'}, inplace=True)
df['num_formula_units_per_cell'] = 4
df['space_group'] = ''

final_df = df[['composition', 'num_formula_units_per_cell', 'space_group']]


final_df.to_csv(PROCESSED_DATA_DIR / 'stable_compositions_for_CrystaLLM.csv', index=False)

[32m2025-12-19 21:18:07.960[0m | [1mINFO    [0m | [36mtf_chpvk_pv.config[0m:[36m<module>[0m:[36m11[0m - [1mPROJ_ROOT path is: /home/dagar/TF-ChPVK-PV[0m


Then this the file 'stable_compositions_for_CrystaLLM.csv' can be uploaded in [nomad](https://nomad-lab.eu/prod/v1/oasis/gui/user/uploads/upload/id/rAGhkvDaTgyQPb_k3NcFbg)

# Analyze the inferences made in NOMAD

In [4]:
import json
import os
from tf_chpvk_pv.config import CRYSTALLM_DATA_DIR

# Replace with the path to your directory containing JSON files
directory_path = CRYSTALLM_DATA_DIR / 'json_files'

json_data = {}

for filename in os.listdir(directory_path):
    if filename.endswith('.json'):
        filepath = os.path.join(directory_path, filename)
        with open(filepath, 'r') as f:
            try:
                data = json.load(f)
                json_data[ filename + '_' + filename] = data
            except json.JSONDecodeError:
                print(f"Error decoding JSON from file: {filename}")

In [5]:
import pandas as pd
from tf_chpvk_pv.config import CRYSTALLM_DATA_DIR

results = pd.DataFrame(index=range(0, len(json_data.keys())),
                       columns=['material', 'atoms',
                                'a', 'b', 'c', 'alpha', 'beta', 'gamma',
                                'volume', 'atomic_density', 'mass_density'])
for idx, key in enumerate(json_data.keys()):
  try:
    chemical_formula_iupac = json_data[key]['archive']['results']['material']['topology'][0]['chemical_formula_iupac']
  except:
    print(key)
    print(json_data[key]['archive'])
    continue
  atoms_data = json_data[key]['archive']['results']['material']['topology'][0]['atoms']
  # Convert the 'atoms' dictionary to a string representation
  atoms_string = str(atoms_data)
  results.loc[idx, 'material'] = chemical_formula_iupac
  results.loc[idx, 'atoms'] = atoms_string
  for col in json_data[key]['archive']['results']['material']['topology'][0]['cell']:
    if col in ['a', 'b', 'c']:
      results.loc[idx, col] = json_data[key]['archive']['results']['material']['topology'][0]['cell'][col] * 10**10 #Amstrongs
    elif col in ['volume']:
      results.loc[idx, col] = json_data[key]['archive']['results']['material']['topology'][0]['cell'][col] * 10**30 #Amstrongs cubed
    elif col in ['atomic_density']:
      results.loc[idx, col] = json_data[key]['archive']['results']['material']['topology'][0]['cell'][col] * 10**-30 #1/Amstrons cubed
    elif col in ['alpha', 'beta', 'gamma']:
      results.loc[idx, col] = json_data[key]['archive']['results']['material']['topology'][0]['cell'][col] * 180 / 3.141592653589793 #degrees
    elif col in ['mass_density']:
      results.loc[idx, col] = json_data[key]['archive']['results']['material']['topology'][0]['cell'][col] / 1000 #g/cm3
    else:
      results.loc[idx, col] = json_data[key]['archive']['results']['material']['topology'][0]['cell'][col]


results.to_csv(CRYSTALLM_DATA_DIR / 'results CrystaLLM.csv')

In [6]:
results.head()

Unnamed: 0,material,atoms,a,b,c,alpha,beta,gamma,volume,atomic_density,mass_density
0,TbZnS3,{'m_def': 'nomad.datamodel.metainfo.system.Ato...,7.1784,8.7559,6.3013,90.0,90.0,90.0,396.05783,0.050498,5.375006
1,SrUSe3,{'m_def': 'nomad.datamodel.metainfo.system.Ato...,8.0312,10.4242,7.0155,90.0,90.0,90.0,587.329487,0.034052,6.361685
2,UCeSe3,{'m_def': 'nomad.datamodel.metainfo.system.Ato...,8.3754,10.4925,6.2499,90.0,90.0,90.0,549.23424,0.036414,7.437795
3,AlSnSe3,{'m_def': 'nomad.datamodel.metainfo.system.Ato...,6.9309,10.3087,7.2282,90.0,90.0,90.0,516.444545,0.038726,4.920373
4,TbEuS3,{'m_def': 'nomad.datamodel.metainfo.system.Ato...,7.6155,9.2152,6.4995,90.0,90.0,90.0,456.124222,0.043848,5.92803
