# Prepare the data for run the inferences in NOMAD

In [7]:
import pandas as pd
from tf_chpvk_pv.config import PROCESSED_DATA_DIR

df = pd.read_csv(PROCESSED_DATA_DIR / 'stable_compositions.csv')
df.rename(columns={'Unnamed: 0': 'composition'}, inplace=True)
df['num_formula_units_per_cell'] = 4
df['space_group'] = ''

final_df = df[['composition', 'num_formula_units_per_cell', 'space_group']]


final_df.to_csv(PROCESSED_DATA_DIR / 'stable_compositions_for_CrystaLLM.csv', index=False)

Then this the file 'stable_compositions_for_CrystaLLM.csv' can be uploaded in [nomad](https://nomad-lab.eu/prod/v1/oasis/gui/user/uploads/upload/id/rAGhkvDaTgyQPb_k3NcFbg)

# Analyze the inferences made in NOMAD

In [9]:
import json
import os
from tf_chpvk_pv.config import CRYSTALLM_DATA_DIR

# Replace with the path to your directory containing JSON files
directory_path = CRYSTALLM_DATA_DIR / 'json_files'

json_data = {}

for filename in os.listdir(directory_path):
    if filename.endswith('.json'):
        filepath = os.path.join(directory_path, filename)
        with open(filepath, 'r') as f:
            try:
                data = json.load(f)
                json_data[ filename + '_' + filename] = data
            except json.JSONDecodeError:
                print(f"Error decoding JSON from file: {filename}")

In [13]:
import pandas as pd
from tf_chpvk_pv.config import CRYSTALLM_DATA_DIR

results = pd.DataFrame(index=range(0, len(json_data.keys())),
                       columns=['material', 'atoms',
                                'a', 'b', 'c', 'alpha', 'beta', 'gamma',
                                'volume', 'atomic_density', 'mass_density'])
for idx, key in enumerate(json_data.keys()):
  try:
    chemical_formula_iupac = json_data[key]['archive']['results']['material']['topology'][0]['chemical_formula_iupac']
  except:
    print(key)
    print(json_data[key]['archive'])
    continue
  atoms_data = json_data[key]['archive']['results']['material']['topology'][0]['atoms']
  # Convert the 'atoms' dictionary to a string representation
  atoms_string = str(atoms_data)
  results.loc[idx, 'material'] = chemical_formula_iupac
  results.loc[idx, 'atoms'] = atoms_string
  for col in json_data[key]['archive']['results']['material']['topology'][0]['cell']:
    if col in ['a', 'b', 'c']:
      results.loc[idx, col] = json_data[key]['archive']['results']['material']['topology'][0]['cell'][col] * 10**10 #Amstrongs
    elif col in ['volume']:
      results.loc[idx, col] = json_data[key]['archive']['results']['material']['topology'][0]['cell'][col] * 10**30 #Amstrongs cubed
    elif col in ['atomic_density']:
      results.loc[idx, col] = json_data[key]['archive']['results']['material']['topology'][0]['cell'][col] * 10**-30 #1/Amstrons cubed
    elif col in ['alpha', 'beta', 'gamma']:
      results.loc[idx, col] = json_data[key]['archive']['results']['material']['topology'][0]['cell'][col] * 180 / 3.141592653589793 #degrees
    elif col in ['mass_density']:
      results.loc[idx, col] = json_data[key]['archive']['results']['material']['topology'][0]['cell'][col] / 1000 #g/cm3
    else:
      results.loc[idx, col] = json_data[key]['archive']['results']['material']['topology'][0]['cell'][col]


results.to_csv(CRYSTALLM_DATA_DIR / 'results CrystaLLM.csv')

In [None]:
results.head

Unnamed: 0,material,atoms,a,b,c,alpha,beta,gamma,volume,atomic_density,mass_density
0,LaScS3,{'m_def': 'nomad.datamodel.metainfo.system.Ato...,7.3204,9.622,6.564,90.0,90.0,90.0,462.347738,0.043257,4.023332
1,BaSnS3,{'m_def': 'nomad.datamodel.metainfo.system.Ato...,7.4207,11.0208,6.8407,90.0,90.0,90.0,559.446473,0.03575,4.181955
2,ZrCdS3,{'m_def': 'nomad.datamodel.metainfo.system.Ato...,7.4207,9.602,6.6649,90.0,90.0,90.0,474.897861,0.042114,4.193571
3,TmEuS3,{'m_def': 'nomad.datamodel.metainfo.system.Ato...,7.3207,9.822,6.5149,90.0,90.0,90.0,468.446818,0.042694,5.914008
4,UInS3,{'m_def': 'nomad.datamodel.metainfo.system.Ato...,7.3204,9.722,6.3949,90.0,90.0,90.0,455.118183,0.043945,6.553477
5,TbSmSe3,{'m_def': 'nomad.datamodel.metainfo.system.Ato...,7.4206,10.0808,7.1412,90.0,90.0,90.0,534.20164,0.037439,6.790911
6,UEuSe3,{'m_def': 'nomad.datamodel.metainfo.system.Ato...,7.8206,10.0218,7.1417,90.0,90.0,90.0,559.741372,0.035731,7.438771
7,SmScS3,{'m_def': 'nomad.datamodel.metainfo.system.Ato...,7.1204,9.502,6.464,90.0,90.0,90.0,437.341576,0.045731,4.427343
8,LuEuS3,{'m_def': 'nomad.datamodel.metainfo.system.Ato...,7.3207,9.822,6.5149,90.0,90.0,90.0,468.446818,0.042694,5.999545
9,CeInS3,{'m_def': 'nomad.datamodel.metainfo.system.Ato...,7.4202,9.902,6.6641,90.0,90.0,90.0,489.643551,0.040846,4.763166
