# 2024 May 27, updated
for CrySPY 1.3.0

# Import

In [None]:
#---------- import
import gzip
import matplotlib.pyplot as plt
import numpy as np
import pickle

# Load function
you can load *.gzip files.

In [None]:
def load_data(filename):
    if filename.endswith('.gz'):
        with gzip.open(filename, 'rb') as f:
            return pickle.load(f)
    else:
        with open(filename, 'rb') as f:
            return pickle.load(f)

# Common data
## input_data.pkl

In [None]:
rin = load_data('input_data.pkl')

Instance of ReadInput class

In [None]:
rin

In [None]:
print(rin.algo)
print(rin.atype)

## init_struc_data.pkl

In [None]:
init_struc_data = load_data('init_struc_data.pkl')

### init_struc_data
dict  
key: structure ID  
value: pymatgen Structure object  
{0: struc0, 1: struc1, ...}  

Example: initial structure data of ID 7
``` python
init_struc_data[7]
```

In [None]:
init_struc_data[7]

## opt_struc_data.pkl

In [None]:
opt_struc_data = load_data('opt_struc_data.pkl')

### opt_struc_data
dict  
key: structure ID  
value: pymatgen Structure object  
{0: struc0, 1: struc1, ...}  


Example: optimized structure data of ID 7
``` python
opt_struc_data[7]
```

In [None]:
opt_struc_data[7]

## rslt_data.pkl

In [None]:
rslt_data = load_data('rslt_data.pkl')

### rslt_data
DataFrame  

```
   Spg_num  Spg_sym  Spg_num_opt Spg_sym_opt  E_eV_atom  Magmom   Opt
0      168       P6          191      P6/mmm  -3.826916     NaN  done
1       95   P4_322           95      P4_322  -3.978478     NaN  done
2      191   P6/mmm          191      P6/mmm  -2.289173     NaN  done
3      113  P-42_1m          113     P-42_1m  -3.773191     NaN  done
4      128   P4/mnc          123      P4/mmm  -3.296904     NaN  done
```

In [None]:
# ---------- sort by Energy
rslt_data.sort_values(by=['E_eV_atom']).head(10)

## kpt_data.pkl

In [None]:
kpt_data = load_data('kpt_data.pkl')

### kpt_data
**kpt_data**: k-point data  
- Type: dict
  + The keys are structre IDs  
  + The values are list of k-mesh in each stage  
- String form: {0: [[2, ,2 ,2], [4, 4, 4,], [6, 6, 6,], ...], 1: ...}  

Example: k-mesh of ID 7 stage 2
``` python
# kpt_data[ID][stage]
# kpt_data[ID][0] <-- stage 1
# kpt_data[ID][1] <-- stage 2
is = 2
kpt_data[7][is-1]
```

In [None]:
kpt_data

## id_queueing.pkl

In [None]:
id_queueing = load_data('id_queueing.pkl')

### id_queueing
List  
queueing IDs  

e.g. [5, 6, 7, 8, 9]  

In [None]:
id_queueing

## id_running.pkl

In [None]:
id_running = load_data('id_running.pkl')

### id_running
List  
running IDs  

e.g. [0, 1, 2, 3, 4]  

In [None]:
id_running

# Bayesian Optimizaiton

## n_selection.pkl

In [None]:
n_selection = load_data('n_selection.pkl')

### n_selection
Int  
number of selection

In [None]:
n_selection

## id_select_hist.pkl

In [None]:
id_select_hist = load_data('id_select_hist.pkl')

### id_select_hist
List  
selection history  
e.g. [[3, 9], [6, 4]..., ]
  + IDs 3 and 9 were selected at first
  + 6 and 4 were 2nd.

In [None]:
id_select_hist

## init_dscrpt_data.pkl

In [None]:
init_dscrpt_data = load_data('init_dscrpt_data.pkl')

### init_dscrpt_data
Dict  
descriptor data of initial structures  
key: structure ID  
value: descriptor data in numpy.ndarray  

Example: init_dscrpt_data of ID 3
``` python 
init_dscrpt_data[3]
```

In [None]:
init_dscrpt_data[3]

In [None]:
plt.plot(init_dscrpt_data[3])

## opt_dscrpt_data.pkl

In [None]:
opt_dscrpt_data = load_data('opt_dscrpt_data.pkl')

### opt_dscrpt_data
Dict  
descriptor data of optimized structures  
key: structure ID  
value: descriptor data in numpy.ndarray  

 
Example: opt_dscrpt_data of ID 3
``` python 
opt_dscrpt_data[3]
```

In [None]:
opt_dscrpt_data[3]

In [None]:
plt.plot(opt_dscrpt_data[3])

## bo_mean.pkl

In [None]:
bo_mean = load_data('bo_mean.pkl')

### bo_mean
mean data for each selection  
Dict  
key: selection No.  
value: dict  <-- key: structure ID, value: predictive mean  
e.g.  {2: {0: 3.93, 1: 3.92, 2: 3.94, ...}, 3: {...}, ...}

Example: mean data for each candidate at 2nd selection (no 1st selection data. 1st selection is random.)
``` python 
bo_mean[2]
```

In [None]:
bo_mean[2]

### bo_var.pkl

In [None]:
bo_var = load_data('bo_var.pkl')

### bo_var
variance data for each selection  
Dict  
key: selection No.  
value: dict  <-- key: structure ID, value: predictive variance  
e.g.   {2: {0: 0.014, 1: 0.013, 2: 0.018, ...}, 3: {...}, ...}

Example: variance data for each candidate at 2nd selection (1st selection is random)
``` python 
bo_var[2]
```

In [None]:
bo_var[2]

## bo_score.pkl

In [None]:
bo_score = load_data('bo_score.pkl')

### bo_score
score data for each selection  
Dict  
key: selection No.  
value: dict  <-- key: structure ID, value: score  
e.g.   {2: {0: 4.076, 1: 3.995, 2: 4.010, ...}, 3: {...}, ...}

Example: score data for each candidate at 2nd selection (1st selection is random)
``` python 
bo_score[2]
```

In [None]:
bo_score[2]

# LAQA

## id_select_hist.pkl

In [None]:
id_select_hist = load_data('id_select_hist.pkl')

### id_select_hist
ID selection history  
List  
e.g. [[3, 9], [6, 4]..., ]

In [None]:
id_select_hist

## tot_step_select.pkl

In [None]:
tot_step_select = load_data('tot_step_select.pkl')

### tot_step_select
total number of optimization steps in each selection  
List  
  + len(``tot_step_select``) = len(``id_select_hist``) + 1
  + ``tot_step_select`` includes 0th selection

e.g. [2000, 200, 200, ...]  
  + [0th selection, 1st selection, 2nd ...]


In [None]:
# ---------- total number of optimization steps (all steps)
print(f'Total steps: {sum(tot_step_select)}')

In [None]:
tot_step_select

## laqa_step.pkl

In [None]:
laqa_step = load_data('laqa_step.pkl')

### laqa_step
number of optimization steps in each ID  
Dict  
key: structure ID  
value:  list of number of optimization steps  
e.g.  {0: [20, 7], 1:[20, 20, 20, 5], ...}  
  

Example: list of optimization steps in ID 7
``` python
laqa_step[7]
```

In [None]:
print('Total number of optimization steps in ID 7: {}'.format(sum(laqa_step[7])))

In [None]:
laqa_step[7]

## laqa_struc.pkl

In [None]:
laqa_struc = load_data('laqa_struc.pkl')

### laqa_struc
list of structure data in each ID  
Dict  
key: structure ID  
value: list of structure data in pymatgen Structure  
e.g. {0: [list of structures], 1:[list of structures], ...}  

``` python
len(laqa_struc[7]) == len(laqa_step[7])  
```

Latest structure data in each job are save in laqa_struc.  
If the optimization finished, LAQA_struc[7][-1] is equal to opt_struc_data[7]  

For example,  
number of iteration for optimization = 5 (NSW = 5 in VASP input)  
- 5 opt. step --> save latest struc. --> 5 opt. step --> save latest struc. --> ...

laqa_step[ID] = [5, 5, 5, ...]  
laqa_struc[ID] = [a struc_data, a struc_data, ...]  

So, 4 structure data are discarded in each job.  
If you want to save full structure data step by step,  
use `struc_step_flag = True` in cryspy.in.

Example: list of structure data in ID 7
``` python
laqa_struc[7]
```

In [None]:
# ---------- latest structure of ID 7
print(laqa_struc[7][-1])

## laqa_energy.pkl

In [None]:
laqa_energy = load_data('laqa_energy.pkl')

### laqa_energy
list of energy data in each ID  
Dict  
key: structure ID  
value: list of energy data
e.g. {0: [-3.287, -3.330], 1:[-3.105, -3.194, -3.233, -3.347], ...}  

``` python
len(laqa_energy[7]) == len(laqa_step[7])  
```

Latest energy data in each job are save in laqa_energy.   

For example,  
number of iteration for optimization = 5 (NSW = 5 in VASP input)  
- 5 opt. step --> save latest energy. --> 5 opt. step --> save latest energy. --> ...

laqa_step[ID] = [5, 5, 5, ...]  
laqa_energy[ID] = [an energy_data, an energy_data, ...]  

So, 4 energy data are discarded in each job.  
If you want to save full energy data step by step,  
use `energy_step_flag = True` in cryspy.in.


Example: list of energy data in ID 7
``` python
laqa_energy[7]
```

In [None]:
# ---------- energy list of ID 7
print(laqa_energy[7])
# ---------- latest energy of ID 7
print(laqa_energy[7][-1])

## laqa_bias.pkl

In [None]:
laqa_bias = load_data('laqa_bias.pkl')

### laqa_bias
list of bias data in each ID  
Dict  
key: structure ID  
value: list of bias data  
e.g. {0: [0.059, 0.003], 1:[0.501, 0.210, 0.984, 0.758], ...}  

``` python
len(laqa_bias[7]) == len(laqa_step[7])
```

Example: list of bias data in ID 7
``` python
laqa_bias[7]
```

In [None]:
# ---------- bias list of ID 7
print(laqa_bias[7])
# ---------- latest bias of ID 7
print(laqa_bias[7][-1])

## laqa_score.pkl

In [None]:
laqa_score = load_data('laqa_score.pkl')

### laqa_score
list of score data in each ID  
Dict  
key: structure ID  
value: list of score data  
e.g. {0: [inf, 3.346, -inf], 1:[3.606, 3.404, 4.217, -inf], ...}  

``` python
len(laqa_score[7]) == len(laqa_step[7]) + 1  
```

``laqa_score`` includes 0th score (= plus infinity)  
When the optimization finishes, -inf is appended to the score list  

Example: list of score data in ID 7
``` python
laqa_score[7]
```

In [None]:
# ---------- score list of ID 7
print(laqa_score[7])
# ---------- latest score of ID 7
print(laqa_score[7][-1])

# Evolutionary algorithm

## gen.pkl

In [None]:
gen = load_data('gen.pkl')

### gen
current generation  
Int

In [None]:
gen

## ea_info.pkl

In [None]:
ea_info = load_data('ea_info.pkl')

### ea_info
information on generation change  
DataFrame  
e.g.   
```
 Gen  Population  Crossover  Permutation  Strain  Random  Elite crs_func crs_lat slct_func
   1          10          0            0       0      10      0       OP   equal       TNM
   2          10          5            0       3       2      2       OP   equal       TNM
```

In [None]:
ea_info

## ea_origin.pkl

In [None]:
ea_origin = load_data('ea_origin.pkl')

### ea_origin
information on origins (parents)  
DataFrame  
e.g.
```
 Gen  Struc_ID  Operation  Parent
   1         0     random    None
   1         1     random    None
   1         2     random    None
   1         3     random    None
   1         4     random    None
   1         5     random    None
   1         6     random    None
   1         7     random    None
   1         8     random    None
   1         9     random    None
   2        10  crossover  (9, 5)
   2        11  crossover  (9, 4)
   2        12  crossover  (7, 4)
   2        13  crossover  (4, 5)
   2        14  crossover  (9, 7)
   2        15     strain    (0,)
   2        16     strain    (4,)
   2        17     strain    (9,)
   2        18     random    None
   2        19     random    None
   2         4      elite   elite
   2         0      elite   elite
```

In [None]:
ea_origin

## elite_struc.pkl

In [None]:
elite_struc = load_data('elite_struc.pkl')

### elite_struc
elite structure data  
Dict  
key: structre ID  
value: structure data in pymatgen Structure  
e.g.  {0: struc0, 4: struc4, ...}

In [None]:
elite_struc.keys()

## elite_fitness.pkl

In [None]:
elite_fitness = load_data('elite_fitness.pkl')

### elite_fitness
fitness of elite structures  
Dict  
key: structre ID  
value: fitness  
e.g. {4: -4.101055417556523, 0: -4.061872594010355}

In [None]:
elite_fitness

# EA-vc
## nat_data.pkl

In [None]:
nat_data = load_data('nat_data.pkl')

## nat_data
data of the number of atoms  
Dict  
key: structre ID  
value: nat  
e.g. {  
    0: (4, 2),  
     1: (2, 4),  
      2: (2, 1),  
       ...  
    }

In [None]:
nat_data

# ratio_data.pkl

In [None]:
ratio_data = load_data('ratio_data.pkl')

## ratio_data
data of composition ratio  
Dict  
key: structre ID  
value: composition ratio  
e.g  
 {  
0: (0.75, 0.25),  
1: (0.42857142857142855, 0.5714285714285714),  
2: (0.5, 0.5), ...  
}

In [None]:
ratio_data

## hdist_data.pkl

In [None]:
hdist_data = load_data('hdist_data.pkl')

### hdist_data
data of the hull distance  
Dict  
key: generation  
value: {ID: hull distance, ...}  
e.g. {  
  1: {0: 0.0, 1: 0.14048749296990964, 2: 0.7806598225267387, ...},  
  2: {0: 0.0, 1: 0.14048749296990964, 2: 0.7806598225267387, ...},  
   ...  
   }

In [None]:
# hdist_data[gen][ID]
hdist_data

# Option

## energy_step_data.pkl

In [None]:
energy_step_data = load_data('energy_step_data.pkl')
#energy_step_data = load_data_gz('energy_step_data.pkl.gz')

### energy-step data  
Dict  
key: structre ID  
value: energy-step numpy.ndarray  
e.g.  
 { 0:  [ [array(stage1, step1), array(stage1, step2), ...], [array(stage2, step1), array(stage2, step2), ...], ... ]}  

Example: energy of ID 7, stage 2, step 5
``` python
# energy_step_data[ID][stage][step]
# energy_step_data[ID][0] <-- stage 1
# energy_step_data[ID][1] <-- stage 2
#
# in LAQA
# energy_step_data[ID][0] <-- 1st selection
# energy_step_data[ID][1] <-- 2nd selection
energy_step_data[7][2-1][5-1]
```

In [None]:
# ---------- energy-step data of ID 7, stage 2
energy_step_data[7][2-1]

## struc_step_data.pkl

In [None]:
#struc_step_data = load_data('struc_step_data.pkl')
struc_step_data = load_data_gz('struc_step_data.pkl.gz')

### struc_step_data
Dict  
key: structre ID  
value: structure-step list  
e.g. {0:  [ [ (stage1, step1), (stage1, step2), ...], [(stage2, step1), (stage2, step2), ...], ...]}  

Example: structure of ID 7, stage 2, step 5
``` python
# struc_step_data[ID][stage][step]
# struc_step_data[ID][0] <-- stage 1
# struc_step_data[ID][1] <-- stage 2
#
# in LAQA
# struc_step_data[ID][0] <-- 1st selection
# struc_step_data[ID][1] <-- 2nd selection
sturc_step_data[7][2-1][5-1]
```

In [None]:
# ---------- structure-step data of ID 7, stage 2
struc_step_data[7][2-1]

## force_step_data.pkl

In [None]:
force_step_data = load_data('force_step_data.pkl')
#force_step_data = load_data_gz('force_step_data.pkl.gz')

### force_step_data
Dict  
key: structre ID  
value: force-step numpy.ndarray
e.g. {0:  [ [array(stage1, step1), array(stage1, step2), ...], [array(stage2, step1), array(stage2, step2), ...], ... ]}  

Example: force of ID 7, stage 2, step 5
``` python
# force_step_data[ID][stage][step]
# force_step_data[ID][0] <-- stage 1
# force_step_data[ID][1] <-- stage 2
#
# in LAQA
# force_step_data[ID][0] <-- 1st selection
# force_step_data[ID][1] <-- 2nd selection
force_step_data[7][2-1][5-1]
```

In [None]:
force_step_data[7][2-1][5-1]

## stress_step_data.pkl

In [None]:
stress_step_data = load_data('stress_step_data.pkl')
#stress_step_data = load_data_gz('stress_step_data.pkl.gz')

### stress_step_data
Dict  
key: structre ID  
value: stress-step numpy.ndarray  
e.g. {0:  [ [array(stage1, step1), array(stage1, step2), ...], [array(stage2, step1), array(stage2, step2), ...], ... ]}  

Example: stress of ID 7, stage 2, step 5
``` python
# stress_step_data[ID][stage][step]
# stress_step_data[ID][0] <-- stage 1
# stress_step_data[ID][1] <-- stage 2
#
# in LAQA
# stress_step_data[ID][0] <-- 1st selection
# stress_step_data[ID][1] <-- 2nd selection
stress_step_data[7][2-1][5-1]
```

In [None]:
stress_step_data[7][2-1][5-1]