# Import

In [None]:
!pwd

In [None]:
#---------- for python2
from __future__ import print_function
from __future__ import division

In [None]:
#---------- import
import gzip
import matplotlib.pyplot as plt
import numpy as np
import pickle

# Load function

In [None]:
def load_data(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)

In [None]:
#  ---------- for gzip
def load_data_gz(filename):
    with gzip.open(filename, 'rb') as f:
        return pickle.load(f)

# Structure data

## init_struc_data.pkl
**init_struc_data**: initial structure data  
Type: dict  
String form: {0: struc0, 1: struc1, ...}  
The keys are structre ID  
The values are structure data in pymatgen format  

Example: Initial structure data of ID 7
``` python
init_struc_data[7]
```

In [None]:
init_struc_data = load_data('init_struc_data.pkl')
#init_struc_data = load_data_gz('init_struc_data.pkl.gz')

## opt_struc_data.pkl
**opt_struc_data**: optimized structure data  
Type: dict  
String form: {0: struc0, 1: struc1, ...}  
The keys are structre ID  
The values stand for structure data in pymatgen format  

Example: Optimized structure data of ID 7
``` python
opt_struc_data[7]
```

In [None]:
opt_struc_data = load_data('opt_struc_data.pkl')
#opt_struc_data = load_data_gz('opt_struc_data.pkl.gz')

# rslt_data.pkl
**rslt_data**: result data  
Type: DataFrame  
String form: Struc_ID Spg_num Spg_sym Spg_num_opt Spg_sym_opt Energy Magmom

In [None]:
rslt_data = load_data('rslt_data.pkl')
# ---------- sort by Energy
rslt_data.sort_values(by=['Energy']).head(10)

# kpt_data.pkl
**kpt_data**: k-point data  
Type: dict  
String form: {0: [[2, ,2 ,2], [4, 4, 4,], [6, 6, 6,], ...], 1: ...}  
The keys are structre ID  
indicate k-mesh in each stage as list  

Example: k-mesh of ID 7 stage 2
``` python
# kpt_data[ID][stage]
# kpt_data[ID][0] <-- stage 1
# kpt_data[ID][1] <-- stage 2
kpt_data[7][2-1]
```

In [None]:
kpt_data = load_data('kpt_data.pkl')
kpt_data[7]

# Random Search

## RS_id_data.pkl
**next_id**: next structure ID to calculate  
Type: int  
String form: 5  


**id_done**: finished structure ID  
Type: 1d array  
String form: [0 1 2 3 4]  

In [None]:
next_id, id_done = load_data('RS_id_data.pkl')

# Bayesian Optimizaiton

## BO_id_data.pkl
**gen**: generation  
Type: int  
String form: 1  

**non_error_id**: non-error structure ID  
Type: 1d array  
String form: [0 1 2 3 4 5 6 7 8 9]  

**id_tocalc**: strucgture ID to calculate in the current generation  
Type: 1d array  
String form: [8 6 4 1 2]

**id_done**: finished structure ID  
Type: 1d array  
String form: [0 9 3 5 7]

In [None]:
gen, non_error_id, id_to_calc, id_done = load_data('BO_id_data.pkl')

## BO_data.pkl

In [None]:
descriptors, targets = load_data('BO_data.pkl')

### descriptors
**descriptors**: descriptor data  
Type: 2d array  
String form: [[descriptor array of ID 0], [descriptor array of ID 1], [descriptor array of ID 3], ....]  
len(``descriptors``) = len(``non_error_id``)  
If your calculation for ID 2 failed, the descriptor data of ID 2 will be deleted like this example.  

Example: descriptor of ID 3
``` python
# descriptors[3] does not always correspond to the data of ID 3! 
descriptors[np.where(non_error_id == 3)[0][0]]
```

In [None]:
# ---------- descriptor of specified structure ID
cID = 3
indx = np.where(non_error_id == cID)[0]    # array
if len(indx) == 0:
    print('No data of structure ID {}'.format(cID))
else:
    indx = indx[0]    # int
    plt.plot(descriptors[indx])    

### targets
**targets**: target(=energy) data  
Type: 1d array
String form: [-10.45, -8.789, ....]  
len(``targets``) = len(``id_done``)  
The order of ID in ``targets`` follows the order of ``id_done``

e.g.,  
id_done = [7, 3, 0, 1, ...]  
targets = [energy of ID 7, energy of ID 3, energy of ID 0, energy of ID 1, ...]  

Example: target of ID 3
``` python
# targets[3] does not correspond to the data of ID 3! 
targets[np.where(id_done == 3)[0][0]]
```

# LAQA

## LAQA_id_data.pkl
**id_to_calc**: strucgture ID to calculate in the current selection  
Type: list  
String form: [8, 6, 4, 1, 2]

**id_select_hist**: history of ID selection  
Type: list  
String form: [[5, 3, 9, 0, 7], ..., [8, 6, 4, 1, 2]]  
[[list of first selection], [list of second selection], ...]

**id_done**: finished structure ID  
Type: list  
String form: [5, 3, 9, 0, 7]  

In [None]:
id_to_calc, id_select_hist, id_done = load_data('LAQA_id_data.pkl')

In [None]:
len(id_select_hist)

## LAQA_data.pkl

In [None]:
tot_step_select, LAQA_step, LAQA_struc, LAQA_energy, LAQA_bias, LAQA_score = load_data('LAQA_data.pkl')
#tot_step_select, LAQA_step, LAQA_struc, LAQA_energy, LAQA_bias, LAQA_score = load_data_gz('LAQA_data.pkl.gz')

### tot_step_select
**tot_step_select**: total number of optimization steps in each selection  
Type: list  
String form: [2000, 200, 200, ...]  
len(``tot_step_select``) = len(``id_select_hist``) + 1  
``tot_step_select`` includes 0th selection  
[0th, 1st, 2nd, ...]  

In [None]:
# ---------- total number of optimization steps (all steps)
print('Total steps: {}'.format(sum(tot_step_select)))
# ---------- up to 5 selection. Note that tot_step_select includes 0th selection
print('Number of steps up to 5 selection: {}'.format(sum(tot_step_select[:5+1])))

### LAQA_step
**LAQA_step**: number of optimization steps in each ID  
Type: dict  
String form: {0: [20, 7], 1:[20, 20, 20, 5], ...}  
The keys are structure ID  
The values are list of number of optimization steps  

Example: list of optimization steps in ID 7
``` python
LAQA_step[7]
```

In [None]:
print(LAQA_step[7])
# ---------- total steps in ID 7
print('Total number of optimization steps in ID 7: {}'.format(sum(LAQA_step[7])))

### LAQA_struc
**LAQA_struc**: list of structure data in each ID  
Type: dict  
String form: {0: [list of structures], 1:[list of structures], ...}  
The keys are structure ID  
The values are list of structure data in pymatgen format  
len(LAQA_struc[7]) == len(LAQA_step[7])  
If the optimization finished, LAQA_struc[7][-1] is equal to opt_struc_data[7]  

Example: list of structure data in ID 7
``` python
LAQA_struc[7]
```

In [None]:
# ---------- latest structure of ID 7
print(LAQA_struc[7][-1])

### LAQA_energy
**LAQA_energy**: list of energy data in each ID  
Type: dict  
String form: {0: [-3.287, -3.330], 1:[-3.105, -3.194, -3.233, -3.347], ...}  
The keys are structure ID  
The values are list of energy data  
len(LAQA_energy[7]) == len(LAQA_step[7])  

Example: list of energy data in ID 7
``` python
LAQA_energy[7]
```

In [None]:
# ---------- energy list of ID 7
print(LAQA_energy[7])
# ---------- latest energy of ID 7
print(LAQA_energy[7][-1])

### LAQA_bias
**LAQA_bias**: list of bias data in each ID  
Type: dict  
String form: {0: [0.059, 0.003], 1:[0.501, 0.210, 0.984, 0.758], ...}  
The keys are structure ID  
The values are list of bias data  
len(LAQA_bias[7]) == len(LAQA_step[7])

Example: list of bias data in ID 7
``` python
LAQA_bias[7]
```

In [None]:
# ---------- bias list of ID 7
print(LAQA_bias[7])
# ---------- latest bias of ID 7
print(LAQA_bias[7][-1])

### LAQA_score
**LAQA_score**: list of score data in each ID  
Type: dict  
String form: {0: [inf, 3.346, -inf], 1:[3.606, 3.404, 4.217, -inf], ...}  
The keys are structure ID  
The values are list of score data  
len(LAQA_score[7]) == len(LAQA_step[7]) + 1  
``LAQA_score`` includes 0th score (= plus infinity)  
If the optimization finished, -inf is appended to the score list  

Example: list of score data in ID 7
``` python
LAQA_score[7]
```

In [None]:
# ---------- score list of ID 7
print(LAQA_score[7])
# ---------- latest score of ID 7
print(LAQA_score[7][-1])

# *_step_data.pkl

## energy_step_data.pkl
**energy_step_data**: energy-step data  
Type: dict  
String form: {0:  [ [array(stage1, step1), array(stage1, step2), ...], [array(stage2, step1), array(stage2, step2), ...], ... ]}  
The keys are structre ID  
The values are energy-step array

Example: energy of ID 7, stage 2, step 8
``` python
# energy_step_data[ID][stage][step]
# energy_step_data[ID][0] <-- stage 1
# energy_step_data[ID][1] <-- stage 2
energy_step_data[7][2-1][8-1]
```

In [None]:
#energy_step_data = load_data('energy_step_data.pkl')
energy_step_data = load_data_gz('energy_step_data.pkl.gz')

In [None]:
# ---------- energy-step data of ID 7, stage 2
print(energy_step_data[7][2-1])

## struc_step_data.pkl
**struc_step_data**: structure-step data  
Type: dict  
String form: {0:  [ [ (stage1, step1), (stage1, step2), ...], [(stage2, step1), (stage2, step2), ...], ...]}  
The keys are structre ID  
The values are structure-step list

Example: structure of ID 7, stage 2, step 8
``` python
# struc_step_data[ID][stage][step]
# struc_step_data[ID][0] <-- stage 1
# struc_step_data[ID][1] <-- stage 2
sturc_step_data[7][2-1][8-1]
```

In [None]:
#struc_step_data = load_data('struc_step_data.pkl')
struc_step_data = load_data_gz('struc_step_data.pkl.gz')

In [None]:
# ---------- structure-step data of ID 7, stage 2
print(struc_step_data[7][2-1])

## fs_step_data.pkl

In [None]:
force_step_data, stress_step_data = load_data_gz('fs_step_data.pkl.gz')

### force_step_data
**force_step_data**: force-step  data  
Type: dict  
String form: {0:  [ [array(stage1, step1), array(stage1, step2), ...], [array(stage2, step1), array(stage2, step2), ...], ... ]}  
The keys are structre ID  
The values are force-step array

Example: force of ID 7, stage 2, step 8
``` python
# force_step_data[ID][stage][step]
# force_step_data[ID][0] <-- stage 1
# force_step_data[ID][1] <-- stage 2
force_step_data[7][2-1][8-1]
```

In [None]:
print(force_step_data[7][2-1][8-1])

### stress_step_data
**stress_step_data**: stress-step  data  
Type: dict  
String form: {0:  [ [array(stage1, step1), array(stage1, step2), ...], [array(stage2, step1), array(stage2, step2), ...], ... ]}  
The keys are structre ID  
The values are stress-step array

Example: stress of ID 7, stage 2, step 8
``` python
# stress_step_data[ID][stage][step]
# stress_step_data[ID][0] <-- stage 1
# stress_step_data[ID][1] <-- stage 2
stress_step_data[7][2-1][8-1]
```

In [None]:
print(stress_step_data[7][2-1][8-1])