## Convert old IMAS at NERSC to new IMAS 
June 21, 2024 

Venkitesh Ayyar

In [1]:
import numpy as np
import datetime
import os

import pymongo
import pprint
from bson.objectid import ObjectId

import yaml
import json
import xmltodict

In [2]:
# ## Convert json to yaml for easy viewing
# with open('gyrokinetics_new.json', 'r') as j:
#     GK_dict = json.loads(j.read())
    
# with open('gyrokinetics_new.yaml','w') as f:
#     yaml.dump(GK_dict,f,default_flow_style=False)

## Load both yaml files

In [4]:
fname='gyro_imas_old.yaml'
with open(fname,'r') as f:
    gk_dict = yaml.safe_load(f)
    
fname='gyro_imas_new.yaml'
with open(fname,'r') as f:
    GK_dict = yaml.safe_load(f)

In [5]:
print(gk_dict.keys(),'\n',GK_dict.keys())

dict_keys(['code', 'collisions', 'flux_surface', 'fluxes_integrated_norm', 'ids_properties', 'model', 'species', 'species_all', 'wavevector']) 
 dict_keys(['code', 'collisions', 'flux_surface', 'ids_properties', 'linear', 'max_repr_length', 'model', 'non_linear', 'normalizing_quantities', 'species', 'species_all', 'time', 'version'])


## Print dictionary hierarchy

In [7]:
def f_print_dict_struct(dict1,tab_count=0):
    '''
    Module to print dictionary hierarchy
    '''
    
    if isinstance(dict1,dict):
        for i,j in dict1.items():
            if not isinstance(j,dict):
                print('\t'*tab_count, i,'\t',type(j))
                if (isinstance(j,list) and len(j)>0):
                    if isinstance(j[0],dict):
                        print('\t'*tab_count,i,'[0]:')                    
                        f_print_dict_struct(j[0],tab_count+1)
            else: 
                # print('\t'*tab_count,"------------")
                print('\t'*tab_count,i,':')
                f_print_dict_struct(j, tab_count+1)

                
# f_print_dict_struct(gk_dict,0)

# f_print_dict_struct(GK_dict,0)
    

In [None]:
gk_dict['code']['parameters']

In [None]:
GK_dict['code']['parameters']

In [None]:
xmltodict.parse(GK_dict['code']['parameters'])

In [None]:
keys='code'
for key in gk_dict[keys]:
    print(key)
print('\n\n')
for key in GK_dict[keys]:
    print(key)

In [None]:
GK_dict[keys].keys(),gk_dict[keys].keys()

In [None]:
gk_dict['species'][0].keys()

In [None]:
GK_dict['species'][0].keys()

In [None]:
gk_dict.keys()


In [32]:
# ## modification for species 
key1='species'

common_keys   = [i for i in gk_dict[key1][0].keys() if i in GK_dict[key1][0].keys()]
old_only_keys = [i for i in gk_dict[key1][0].keys() if i not in GK_dict[key1][0].keys()]
new_only_keys = [i for i in GK_dict[key1][0].keys() if i not in gk_dict[key1][0].keys()]

print(common_keys,'\n\n',old_only_keys,'\n\n',new_only_keys)


['charge_norm', 'density_log_gradient_norm', 'density_norm', 'mass_norm', 'temperature_log_gradient_norm', 'temperature_norm', 'velocity_tor_gradient_norm'] 

 ['name'] 

 ['max_repr_length', 'potential_energy_gradient_norm', 'potential_energy_norm', 'version']


In [33]:
key1='species_all'

common_keys   = [i for i in gk_dict[key1].keys() if i in GK_dict[key1].keys()]
old_only_keys = [i for i in gk_dict[key1].keys() if i not in GK_dict[key1].keys()]
new_only_keys = [i for i in GK_dict[key1].keys() if i not in gk_dict[key1].keys()]

print(common_keys,'\n\n',old_only_keys,'\n\n',new_only_keys)

# for i in common_keys: 
#     print(i,'\t',gk_dict[key1][i],'\t',GK_dict[key1][i],'\n')

# for i in old_only_keys:
#     print(i, gk_dict[key1][i])
    
# for i in new_only_keys:
#     print(i, GK_dict[key1][i])

['beta_reference', 'shearing_rate_norm', 'velocity_tor_norm'] 

 ['debye_length_reference', 'zeff'] 

 ['angle_pol', 'debye_length_norm', 'max_repr_length', 'version']


In [44]:
def f_collisions(old_gk_dict,new_gk_dict):
    ''' conversion for key: collisions '''

    main_key='collisions'
    old_gk,new_gk=old_gk_dict[main_key],new_gk_dict[main_key]
    
    dict1={}
    for key in ['max_repr_length','version']: 
        dict1[key] = new_gk[key]
    
    dict1['collisionality_norm']=old_gk['collisionality_norm']
    
    return dict1

def f_flux_surface(old_gk_dict,new_gk_dict):
    ''' conversion for key: flux_surface '''

    main_key='flux_surface'
    old_gk,new_gk=old_gk_dict[main_key],new_gk_dict[main_key]
    
    dict1={}
    ## Old keys not present 
    keys = ['triangularity_lower', 'triangularity_upper']
    
    ## New keys not present in old 
    keys = ['delongation_dr_minor_norm', 'dgeometric_axis_r_dr_minor', 'dgeometric_axis_z_dr_minor', 'max_repr_length', 'version']
    for key in keys:
        dict1[key] = new_gk[key]
    
    ## Common keys 
    common_keys = ['b_field_tor_sign', 'dc_dr_minor_norm', 'ds_dr_minor_norm', 'elongation', 'ip_sign', 'magnetic_shear_r_minor', 'pressure_gradient_norm', 'q', 'r_minor_norm', 'shape_coefficients_c', 'shape_coefficients_s']
    
    for key in common_keys:
        dict1[key] = old_gk[key]
        
    return dict1

def f_species_all(old_gk_dict,new_gk_dict):
    ''' conversion for key: species_all '''
    
    main_key='species_all'
    old_gk,new_gk=old_gk_dict[main_key],new_gk_dict[main_key]
    
    dict1={}
    ## Old keys not present 
    keys =  ['debye_length_reference', 'zeff'] 
    
    ## New keys not present in old 
    keys = ['angle_pol', 'debye_length_norm', 'max_repr_length', 'version']
    for key in keys:
        dict1[key] = new_gk[key]
    
    ## Common keys 
    common_keys = ['beta_reference', 'shearing_rate_norm', 'velocity_tor_norm'] 

    for key in common_keys:
        dict1[key] = old_gk[key]
        
    return dict1

def f_species(old_gk_dict, new_gk_dict):
    ''' conversion for key: species '''

    main_key='species'
    old_gk,new_gk=old_gk_dict[main_key][0],new_gk_dict[main_key][0]

    dict1={}
    ## Old keys not present 
    keys =  ['name'] 

    ## New keys not present in old 
    keys = ['max_repr_length', 'potential_energy_gradient_norm', 'potential_energy_norm', 'version']
    for key in keys:
        dict1[key] = new_gk[key]

    ## Common keys 
    common_keys = ['charge_norm', 'density_log_gradient_norm', 'density_norm', 'mass_norm', 'temperature_log_gradient_norm', 'temperature_norm', 'velocity_tor_gradient_norm'] 

    for key in common_keys:
        dict1[key] = old_gk[key]
    
    return [dict1]

def f_ids_properties(old_gk_dict,new_gk_dict):
    ''' conversion for key: ids_properties '''

    main_key='ids_properties'
    old_gk,new_gk=old_gk_dict[main_key],new_gk_dict[main_key]
    
    dict1={}
    ## Old keys not present 
    keys = ['creator', 'date'] 
    
    ## New keys not present in old 
    keys = ['creation_date', 'homogeneous_time', 'max_repr_length', 'name', 'occurrence_type', 'provenance', 'provider', 'version']
    for key in keys:
        dict1[key] = new_gk[key]
    
    ## Common keys 
    common_keys = ['comment'] 

    for key in common_keys[:]:
        dict1[key] = old_gk[key]
        
    ## Special fixes
    dict1['creation_data']=old_gk['date']
    dict1['provider'] = 'manual'
    
    ## Note: 'creator' not used. Should it be stored in 'name' ? 
    # dict1['name'] = old_gk['creator']

    return dict1


# f_collisions(gk_dict,GK_dict)
# f_flux_surface(gk_dict,GK_dict)
# f_species_all(gk_dict,GK_dict)
# f_species(gk_dict,GK_dict)
f_ids_properties(gk_dict,GK_dict)


{'creation_date': '2024-06-17 14:59:44.808105',
 'homogeneous_time': 1,
 'max_repr_length': 64,
 'name': '',
 'occurrence_type': None,
 'provenance': None,
 'provider': 'manual',
 'version': {'idspy_internal_version': '2.0',
  'idspy_version': '034000.2.0',
  'imas_dd_git_commit': '845f1b30816f86a3cd4d53714dc56cdd307fdca1',
  'imas_dd_version': '03.40.00'},
 'comment': 'json entry for testing only - note that mode structure moments and flux-surface averaged fluxes are given in particle space (i.e. incl. the full pullback operator)\n EM flux contributions are for now *not* separated and all attributed to the Apar term',
 'creation_data': '2020-08-12'}

### Wavevector

In [50]:
gk_dict['wavevector'][0].keys(), GK_dict['linear']['wavevector'][0].keys()


(dict_keys(['binormal_component_norm', 'eigenmode', 'poloidal_turns', 'radial_component_norm']),
 dict_keys(['binormal_wavevector_norm', 'eigenmode', 'max_repr_length', 'radial_wavevector_norm', 'version']))

In [51]:
gk_dict['wavevector'][0]['eigenmode'][0].keys()

dict_keys(['a_field_parallel_perturbed_norm_imaginary', 'a_field_parallel_perturbed_norm_real', 'fluxes_norm', 'frequency_norm', 'growth_rate_norm', 'growth_rate_tolerance', 'moments_norm_rotating_frame', 'phi_potential_perturbed_norm_imaginary', 'phi_potential_perturbed_norm_real', 'poloidal_angle'])

In [52]:
GK_dict['linear']['wavevector'][0]['eigenmode'][0].keys()

dict_keys(['angle_pol', 'code', 'fields', 'frequency_norm', 'growth_rate_norm', 'growth_rate_tolerance', 'initial_value_run', 'linear_weights', 'linear_weights_rotating_frame', 'max_repr_length', 'moments_norm_gyrocenter', 'moments_norm_gyrocenter_bessel_0', 'moments_norm_gyrocenter_bessel_1', 'moments_norm_particle', 'poloidal_turns', 'time_norm', 'version'])

### Code

In [54]:
gk_dict['code'].keys()

dict_keys(['name', 'parameters', 'version'])

In [59]:
gk_dict['code']['parameters']

{'Bref': 2.577853980941141,
 'ENDIANNESS': 'LITTLE',
 'ExBrate': 0.0,
 'GIT_BRANCH': 'b982ac2bfe5cfcbffff1f6a7033d1b9cafbc1071                    ',
 'GIT_MASTER': 'b982ac2bfe5cfcbffff1f6a7033d1b9cafbc1071                    ',
 'Lref': 0.6565730869283082,
 'OMP_NUM_THREADS': 1,
 'Omega0_tor': 0.0,
 'PRECISION': 'DOUBLE',
 'RELEASE': '1.8 - alpha 0',
 'Tref': 1.82937,
 'adapt_lx': True,
 'beta': 0.000242274,
 'calc_dt': True,
 'charge1': 1,
 'charge2': -1,
 'charge3': 6,
 'coll': 1.7076475e-05,
 'coll_cons_model': "'self_adj'",
 'coll_order': "'second'",
 'collision_op': "'landau'",
 'comp_type': "'IV'",
 'debye2': 0.0,
 'dens1': 0.93,
 'dens2': 1.0,
 'dens3': 0.011666667,
 'diag_Blev(0)': 1.329,
 'diag_trap_levels': 1,
 'diagdir': "'/global/cscratch1/sd/michoski/0.7/scanfiles0000/'",
 'dpdx_pm': 0.0016907972,
 'dpdx_term': "'gradB_eq_curv'",
 'dt_max': 0.00305,
 'dt_vlasov': 0.00305,
 'ev_coll': 4.3589,
 'ev_coll_est': 3.9556449,
 'geomfile': "'AUG_34626_3.71s.eqd'",
 'hyp_z': -1.0,
 

In [61]:
GK_dict['code'].keys()

dict_keys(['commit', 'description', 'library', 'max_repr_length', 'name', 'output_flag', 'parameters', 'repository', 'version'])

In [62]:
GK_dict['code']['library']

[{'commit': 'dfff6913005c78579aa532be6cbb3da1f9478dcc',
  'description': '',
  'max_repr_length': 64,
  'name': 'pyrokinetics',
  'parameters': '',
  'repository': 'https://github.com/pyro-kinetics/pyrokinetics',
  'version': '0.7.0'}]

In [63]:
par1 = gk_dict['code']['parameters']
par1.keys()

dict_keys(['Bref', 'ENDIANNESS', 'ExBrate', 'GIT_BRANCH', 'GIT_MASTER', 'Lref', 'OMP_NUM_THREADS', 'Omega0_tor', 'PRECISION', 'RELEASE', 'Tref', 'adapt_lx', 'beta', 'calc_dt', 'charge1', 'charge2', 'charge3', 'coll', 'coll_cons_model', 'coll_order', 'collision_op', 'comp_type', 'debye2', 'dens1', 'dens2', 'dens3', 'diag_Blev(0)', 'diag_trap_levels', 'diagdir', 'dpdx_pm', 'dpdx_term', 'dt_max', 'dt_vlasov', 'ev_coll', 'ev_coll_est', 'geomfile', 'hyp_z', 'hypz_compensation', 'init_cond', 'init_time', 'istep_energy', 'istep_field', 'istep_mom', 'istep_nrg', 'istep_omega', 'istep_schpt', 'istep_vsp', 'kx_center', 'ky0_ind', 'kymin', 'lv', 'lw', 'lx', 'ly', 'magn_geometry', 'major_R', 'mass1', 'mass2', 'mass3', 'minor_r', 'mref', 'n0_global', 'n_fields', 'n_moms', 'n_parallel_sims', 'n_pol', 'n_procs_s', 'n_procs_sim', 'n_procs_v', 'n_procs_w', 'n_procs_x', 'n_procs_y', 'n_procs_z', 'n_spec', 'name1', 'name2', 'name3', 'nblocks', 'nky0', 'nltdt_off', 'nonlinear', 'norm_flux_projection', 'nr

In [78]:
key2 = 'charge'
key2 in par1.keys()

False

In [64]:
# GK_dict['ids_properties']
par2 = xmltodict.parse(GK_dict['code']['parameters'])

In [70]:
par2['root'].keys()

dict_keys(['parallelization', 'box', 'in_out', 'general', 'geometry', 'species', 'info', 'units', 'bsgrid'])

In [74]:
[ky2 for ky1 in par2['root'].keys() for ky2 in par2['root'][ky1]]

['n_procs_s',
 'n_procs_v',
 'n_procs_w',
 'n_procs_x',
 'n_procs_y',
 'n_procs_z',
 'n_procs_sim',
 'n_parallel_sims',
 'n_spec',
 'nx0',
 'nky0',
 'nz0',
 'nv0',
 'nw0',
 'kymin',
 'lv',
 'lw',
 'adapt_lx',
 'x0',
 'n0_global',
 'ky0_ind',
 'diagdir',
 'read_checkpoint',
 'write_checkpoint',
 'istep_field',
 'istep_mom',
 'istep_nrg',
 'istep_omega',
 'istep_vsp',
 'istep_schpt',
 'istep_energy',
 'write_std',
 'iterdb_file',
 'nonlinear',
 'comp_type',
 'perf_vec',
 'nblocks',
 'arakawa_zv',
 'timescheme',
 'dt_max',
 'dt_vlasov',
 'ev_coll',
 'timelim',
 'ntimesteps',
 'beta',
 'debye2',
 'collision_op',
 'coll',
 'coll_cons_model',
 'init_cond',
 'hyp_z_with_dz_prefactor',
 'hyp_z',
 'hyp_v_with_dv_prefactor',
 'hyp_v',
 'perf_tsteps',
 'magn_geometry',
 'q0',
 'shat',
 'geomdir',
 'geomfile',
 'edge_opt',
 'minor_r',
 'major_r',
 'rhostar',
 'dpdx_term',
 'dpdx_pm',
 'norm_flux_projection',
 {'name': 'i',
  'omn': '5.9963121',
  'omt': '2.8994479',
  'mass': '1.0',
  'temp': '1.4

In [97]:
def f_get_dict_all_keys(dict1):
    '''
    Module to get lowest keys in dict hierarchy
    '''
    keys_lst =[] 
    
    if isinstance(dict1,dict):
        for i,j in dict1.items():
            if not isinstance(j,dict):
                keys_lst.append(i)
                if (isinstance(j,list) and len(j)>0):
                    if isinstance(j[0],dict):
                        keys_lst.extend( f_get_dict_all_keys(j[0]) )
            else: 
                keys_lst.extend( f_get_dict_all_keys(j) ) 

    return keys_lst

keys_par2 = f_get_dict_all_keys(par2)

In [98]:
len(par1.keys()),len(keys_par2)

(125, 101)

In [100]:
common = [i for i in par1.keys() if i in keys_par2]
not_in_new = [i for i in par1.keys() if i not in keys_par2]
not_in_old = [i for i in keys_par2 if i not in par1.keys()]

In [101]:
len(common),len(not_in_new),len(not_in_old)

(73, 52, 28)

In [93]:
# f_print_dict_struct(par2,0)