# Import

In [None]:
#---------- import
import matplotlib.pyplot as plt
import numpy as np
import pickle

In [None]:
# ------ for gzip
#imoprt gzip

# Load function

In [None]:
def load_data(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)

In [None]:
#  ---------- for gzip
#def load_data_gz(filename):
#    with gzip.open(filename, 'rb') as f:
#        return pickle.load(f)

# Structure data

## init_struc_data.pkl

In [None]:
init_struc_data = load_data('init_struc_data.pkl')
#init_struc_data = load_data_gz('init_struc_data.pkl.gz')

### init_struc_data
**init_struc_data**: initial structure data  
- Type: dict  
  + The keys are structre IDs 
  + The values are structure data in pymatgen format
- String form: {0: struc0, 1: struc1, ...}  
  

Example: initial structure data of ID 7
``` python
init_struc_data[7]
```

In [None]:
init_struc_data[7]

## opt_struc_data.pkl

In [None]:
opt_struc_data = load_data('opt_struc_data.pkl')
#opt_struc_data = load_data_gz('opt_struc_data.pkl.gz')

### opt_struc_data
**opt_struc_data**: optimized structure data  
- Type: dict  
  + The keys are structre IDs
  + The values are structure data in pymatgen format
- String form: {0: struc0, 1: struc1, ...}  

Example: optimized structure data of ID 7
``` python
opt_struc_data[7]
```

In [None]:
opt_struc_data[7]

# rslt_data.pkl

In [None]:
rslt_data = load_data('rslt_data.pkl')

## rslt_data
**rslt_data**: result data  
- Type: DataFrame  
  + Row labels are structure IDs  
- String form:
```
   Spg_num  Spg_sym  Spg_num_opt Spg_sym_opt  E_eV_atom  Magmom   Opt
0      168       P6          191      P6/mmm  -3.826916     NaN  done
1       95   P4_322           95      P4_322  -3.978478     NaN  done
2      191   P6/mmm          191      P6/mmm  -2.289173     NaN  done
3      113  P-42_1m          113     P-42_1m  -3.773191     NaN  done
4      128   P4/mnc          123      P4/mmm  -3.296904     NaN  done
```

In [None]:
# ---------- sort by Energy
rslt_data.sort_values(by=['E_eV_atom']).head(10)

# kpt_data.pkl

In [None]:
kpt_data = load_data('kpt_data.pkl')

## kpt_data
**kpt_data**: k-point data  
- Type: dict
  + The keys are structre IDs  
  + The values are list of k-mesh in each stage  
- String form: {0: [[2, ,2 ,2], [4, 4, 4,], [6, 6, 6,], ...], 1: ...}  

Example: k-mesh of ID 7 stage 2
``` python
# kpt_data[ID][stage]
# kpt_data[ID][0] <-- stage 1
# kpt_data[ID][1] <-- stage 2
is = 2
kpt_data[7][is-1]
```

In [None]:
kpt_data

# Random Search

## RS_id_data.pkl

In [None]:
id_queueing, id_running = load_data('RS_id_data.pkl')

### id_queueing
**id_queueing**: queueing IDs  
- Type: list  
- String form: [5, 6, 7, 8, 9]  

In [None]:
id_queueing

### id_running
**id_running**: running IDs  
- Type: list  
- String form: [0, 1, 2, 3, 4]  

In [None]:
id_running

# Bayesian Optimizaiton

## BO_id_data.pkl

In [None]:
n_selection, id_queueing, id_running, id_select_hist = load_data('BO_id_data.pkl')

### n_selection
**n_selection**: number of selection  
- Type: int
- String form: 1

In [None]:
n_selection

### id_queueing
**id_queueing**: queueing IDs  
- Type: list  
- String form: [2, 5, 8] 

In [None]:
id_queueing

### id_running
**id_running**: running IDs  
- Type: list  
- String form: [1, 7]  

In [None]:
id_running

### id_select_hist
**id_select_hist**: history of ID selection  
- Type: list
- String form: [[3, 9], [6, 4]..., ]
  + [[list of first selection], [list of second selection], ...]

In [None]:
id_select_hist

## BO_data.pkl

In [None]:
init_dscrpt_data, opt_dscrpt_data, bo_mean, bo_var, bo_score = load_data('BO_data.pkl')

### init_dscrpt_data
**init_dscrpt_data**: descriptor data of initial structures  
- Type: dict
  + The keys are structre IDs 
  + The values are descriptor data in numpy.ndarray
- String form: {0: array([descriptor array of ID 0]), 1: array([descriptor array of ID 1]), ...}


Example: init_dscrpt_data of ID 3
``` python 
init_dscrpt_data[3]
```

In [None]:
init_dscrpt_data[3]  

### opt_dscrpt_data
**opt_dscrpt_data**: descriptor data of optimized structures  
- Type: dict
  + The keys are structre IDs 
  + The values are descriptor data in numpy.ndarray
- String form: {0: array([descriptor array of ID 0]), 1: array([descriptor array of ID 1]), ...}

 
Example: opt_dscrpt_data of ID 3
``` python 
opt_dscrpt_data[3]
```

In [None]:
opt_dscrpt_data[3]

### bo_mean
**bo_mean**: mean data for each selection  
- Type: dict
  + The keys are selection No. 
  + The values are dict of predictive mean
- String form: {2: {0: 3.93, 1: 3.92, 2: 3.94, ...}, 3: {...}, ...}

Example: mean data for each candidate at 2nd selection (1st selection is random)
``` python 
bo_mean[2]
```

In [None]:
bo_mean[2]

### bo_var
**bo_var**: variance data for each selection  
- Type: dict
  + The keys are selection No. 
  + The values are dict of predictive variance
- String form: {2: {0: 0.014, 1: 0.013, 2: 0.018, ...}, 3: {...}, ...}

Example: variance data for each candidate at 2nd selection (1st selection is random)
``` python 
bo_var[2]
```

In [None]:
bo_var[2]

### bo_score
**bo_score**: score (acquisition function) data for each selection  
- Type: dict
  + The keys are selection No. 
  + The values are dict of score
- String form: {2: {0: 4.076, 1: 3.995, 2: 4.010, ...}, 3: {...}, ...}

Example: score data for each candidate at 2nd selection (1st selection is random)
``` python 
bo_score[2]
```

In [None]:
bo_score[2]

# LAQA

## LAQA_id_data.pkl

In [None]:
id_queueing, id_running, id_select_hist = load_data('LAQA_id_data.pkl')

### id_queueing
**id_queueing**: queueing IDs  
- Type: list  
- String form: [2, 3, 4, 5, 6, 7, 8, 9]  

In [None]:
id_queueing

### id_running
**id_running**: running IDs  
- Type: list  
- String form: [0, 1]  

In [None]:
id_running

### id_select_hist
**id_select_hist**: history of ID selection  
- Type: list
- String form: [[3, 9], [6, 4]..., ]
  + [[list of first selection], [list of second selection], ...]

In [None]:
id_select_hist

## LAQA_data.pkl

In [None]:
tot_step_select, laqa_step, laqa_struc, laqa_energy, laqa_bias, laqa_score = load_data('LAQA_data.pkl')
#tot_step_select, laqa_step, laqa_struc, laqa_energy, laqa_bias, laqa_score = load_data_gz('LAQA_data.pkl.gz')

### tot_step_select
**tot_step_select**: total number of optimization steps in each selection  
- Type: list  
  + len(``tot_step_select``) = len(``id_select_hist``) + 1
  + ``tot_step_select`` includes 0th selection
- String form: [2000, 200, 200, ...]  
  + [0th, 1st, 2nd, ...]  

In [None]:
# ---------- total number of optimization steps (all steps)
print('Total steps: {}'.format(sum(tot_step_select)))
tot_step_select

### laqa_step
**laqa_step**: number of optimization steps in each ID  
- Type: dict
  + The keys are structure ID  
  + The values are list of number of optimization steps
- String form: {0: [20, 7], 1:[20, 20, 20, 5], ...}  
  

Example: list of optimization steps in ID 7
``` python
laqa_step[7]
```

In [None]:
print(laqa_step[7])
# ---------- total steps
print('Total number of optimization steps in ID 7: {}'.format(sum(laqa_step[7])))

### laqa_struc
**laqa_struc**: list of structure data in each ID  
- Type: dict  
  + The keys are structure ID
  + The values are list of structure data in pymatgen format
- String form: {0: [list of structures], 1:[list of structures], ...}  

``` python
len(laqa_struc[7]) == len(laqa_step[7])  
```

Latest structure data in each job are save in laqa_struc.  
If the optimization finished, LAQA_struc[7][-1] is equal to opt_struc_data[7]  

For example,  
number of iteration for optimization = 5 (NSW = 5 in VASP input)  
- 5 opt. step --> save latest struc. --> 5 opt. step --> save latest struc. --> ...

laqa_step[ID] = [5, 5, 5, ...]  
laqa_struc[ID] = [a struc_data, a struc_data, ...]  

So, 4 structure data are discarded in each job.  
If you want to save full structure data step by step,  
use `struc_step_flag = True` in cryspy.in.

Example: list of structure data in ID 7
``` python
laqa_struc[7]
```

In [None]:
# ---------- latest structure of ID 7
print(laqa_struc[7][-1])

### laqa_energy
**laqa_energy**: list of energy data in each ID  
- Type: dict  
  + The keys are structure ID
  + The values are list of energy data
- String form: {0: [-3.287, -3.330], 1:[-3.105, -3.194, -3.233, -3.347], ...}  

``` python
len(laqa_energy[7]) == len(laqa_step[7])  
```

Latest energy data in each job are save in laqa_energy.   

For example,  
number of iteration for optimization = 5 (NSW = 5 in VASP input)  
- 5 opt. step --> save latest energy. --> 5 opt. step --> save latest energy. --> ...

laqa_step[ID] = [5, 5, 5, ...]  
laqa_energy[ID] = [an energy_data, an energy_data, ...]  

So, 4 energy data are discarded in each job.  
If you want to save full energy data step by step,  
use `energy_step_flag = True` in cryspy.in.


Example: list of energy data in ID 7
``` python
laqa_energy[7]
```

In [None]:
# ---------- energy list of ID 7
print(laqa_energy[7])
# ---------- latest energy of ID 7
print(laqa_energy[7][-1])

### laqa_bias
**laqa_bias**: list of bias data in each ID  
- Type: dict  
  + The keys are structure ID
  + The values are list of bias data
- String form: {0: [0.059, 0.003], 1:[0.501, 0.210, 0.984, 0.758], ...}  

``` python
len(laqa_bias[7]) == len(laqa_step[7])
```

Example: list of bias data in ID 7
``` python
laqa_bias[7]
```

In [None]:
# ---------- bias list of ID 7
print(laqa_bias[7])
# ---------- latest bias of ID 7
print(laqa_bias[7][-1])

### laqa_score
**laqa_score**: list of score data in each ID  
- Type: dict  
  + The keys are structure ID  
  + The values are list of score data  
- String form: {0: [inf, 3.346, -inf], 1:[3.606, 3.404, 4.217, -inf], ...}  

``` python
len(laqa_score[7]) == len(laqa_step[7]) + 1  
```

``laqa_score`` includes 0th score (= plus infinity)  
If the optimization finished, -inf is appended to the score list  

Example: list of score data in ID 7
``` python
laqa_score[7]
```

In [None]:
# ---------- score list of ID 7
print(laqa_score[7])
# ---------- latest score of ID 7
print(laqa_score[7][-1])

# Evolutionary algorithm

## EA_id_data.pkl

In [None]:
gen, id_queueing, id_running = load_data('EA_id_data.pkl')

### gen
**gen**: current generation  
- Type: int
- String form: 1

In [None]:
gen

### id_queueing
**id_queueing**: queueing IDs  
- Type: list  
- String form: [5, 6, 7, 8, 9]  

In [None]:
id_queueing

### id_running
**id_running**: running IDs  
- Type: list  
- String form: [0 1 2 3 4]  

In [None]:
id_running

## EA_data.pkl

In [None]:
elite_struc, elite_fitness, ea_info, ea_origin = load_data('EA_data.pkl')

### elite_struc
**elite_struc**: elite structure data  
- Type: dict
  + The keys are elite structre IDs 
  + The values are elite structure data in pymatgen format
- String form: {0: struc0, 4: struc4, ...}

In [None]:
elite_struc

### elite_fitness
**elite_fitness**: fitness of elite structures  
- Type: dict  
  + The keys are elite structre IDs 
  + The values are fitness of elite structures
- String form: {4: -4.101055417556523, 0: -4.061872594010355}

In [None]:
elite_fitness

### ea_info
**ea_info**: information on generational changes  
- Type: DataFrame  
- String form:  
```
 Gen  Population  Crossover  Permutation  Strain  Random  Elite crs_func crs_lat slct_func
   1          10          0            0       0      10      0       OP   equal       TNM
   2          10          5            0       3       2      2       OP   equal       TNM
```

In [None]:
ea_info

### ea_origin
**ea_origin**: information on origins (parents)  
- Type: DataFrame  
- String form:  
```
 Gen  Struc_ID  Operation  Parent
   1         0     random    None
   1         1     random    None
   1         2     random    None
   1         3     random    None
   1         4     random    None
   1         5     random    None
   1         6     random    None
   1         7     random    None
   1         8     random    None
   1         9     random    None
   2        10  crossover  (9, 5)
   2        11  crossover  (9, 4)
   2        12  crossover  (7, 4)
   2        13  crossover  (4, 5)
   2        14  crossover  (9, 7)
   2        15     strain    (0,)
   2        16     strain    (4,)
   2        17     strain    (9,)
   2        18     random    None
   2        19     random    None
   2         4      elite   elite
   2         0      elite   elite
```

In [None]:
ea_origin

# *_step_data.pkl

## energy_step_data.pkl
**energy_step_data**: energy-step data  
- Type: dict  
  + The keys are structre ID
  + The values are energy-step numpy.ndarray
- String form: {0:  [ [array(stage1, step1), array(stage1, step2), ...], [array(stage2, step1), array(stage2, step2), ...], ... ]}  

Example: energy of ID 7, stage 2, step 5
``` python
# energy_step_data[ID][stage][step]
# energy_step_data[ID][0] <-- stage 1
# energy_step_data[ID][1] <-- stage 2
#
# in LAQA
# energy_step_data[ID][0] <-- 1st selection
# energy_step_data[ID][1] <-- 2nd selection
energy_step_data[7][2-1][5-1]
```

In [None]:
energy_step_data = load_data('energy_step_data.pkl')
#energy_step_data = load_data_gz('energy_step_data.pkl.gz')

In [None]:
# ---------- energy-step data of ID 7, stage 2
print(energy_step_data[7][2-1])

## struc_step_data.pkl
**struc_step_data**: structure-step data  
- Type: dict  
  + The keys are structre ID  
  + The values are structure-step list
- String form: {0:  [ [ (stage1, step1), (stage1, step2), ...], [(stage2, step1), (stage2, step2), ...], ...]}  

Example: structure of ID 7, stage 2, step 5
``` python
# struc_step_data[ID][stage][step]
# struc_step_data[ID][0] <-- stage 1
# struc_step_data[ID][1] <-- stage 2
#
# in LAQA
# struc_step_data[ID][0] <-- 1st selection
# struc_step_data[ID][1] <-- 2nd selection
sturc_step_data[7][2-1][5-1]
```

In [None]:
#struc_step_data = load_data('struc_step_data.pkl')
struc_step_data = load_data_gz('struc_step_data.pkl.gz')

In [None]:
# ---------- structure-step data of ID 7, stage 2
print(struc_step_data[7][2-1])

## fs_step_data.pkl

In [None]:
force_step_data, stress_step_data = load_data('fs_step_data.pkl')
#force_step_data, stress_step_data = load_data_gz('fs_step_data.pkl.gz')

### force_step_data
**force_step_data**: force-step  data  
- Type: dict  
  + The keys are structre ID  
  + The values are force-step numpy.ndarray
- String form: {0:  [ [array(stage1, step1), array(stage1, step2), ...], [array(stage2, step1), array(stage2, step2), ...], ... ]}  

Example: force of ID 7, stage 2, step 5
``` python
# force_step_data[ID][stage][step]
# force_step_data[ID][0] <-- stage 1
# force_step_data[ID][1] <-- stage 2
#
# in LAQA
# force_step_data[ID][0] <-- 1st selection
# force_step_data[ID][1] <-- 2nd selection
force_step_data[7][2-1][5-1]
```

In [None]:
print(force_step_data[7][2-1][5-1])

### stress_step_data
**stress_step_data**: stress-step  data  
- Type: dict  
  + The keys are structre ID  
  + The values are stress-step numpy.ndarray
- String form: {0:  [ [array(stage1, step1), array(stage1, step2), ...], [array(stage2, step1), array(stage2, step2), ...], ... ]}  

Example: stress of ID 7, stage 2, step 5
``` python
# stress_step_data[ID][stage][step]
# stress_step_data[ID][0] <-- stage 1
# stress_step_data[ID][1] <-- stage 2
#
# in LAQA
# stress_step_data[ID][0] <-- 1st selection
# stress_step_data[ID][1] <-- 2nd selection
stress_step_data[7][2-1][5-1]
```

In [None]:
print(stress_step_data[7][2-1][5-1])