# Merge and Reduce Data

In [28]:
import numpy as np
import matplotlib.pyplot as plt
import re,glob,os
import dgutils
from pimcscripts import pimchelp
from collections import defaultdict
import linecache


%config InlineBackend.figure_format = 'svg'
%matplotlib inline

colors = dgutils.colors.get_linear_colors('Spectral',10)

from hg_utils import *

In [31]:
ΔR = [0.0,2.0,3.0,4.0]
dirNames = ' '.join([f'dR_eq_{cΔR:3.1f}' for cΔR in ΔR])

run = {'delete_merge':True, 'merge':True, 'reduce':True, 'serial':False, 'copy':True}

## Delete the Merged Directory and Re-Generate

In [17]:
delete_merge = True

if run['delete_merge']:
    for cΔR in ΔR:
        dirName = f'{base_dir(cΔR,cylinder=False)}'
        print(f'Deleting ΔR = {cΔR}: {dirName}')
        ! rm -rf $dirName

Deleting ΔR = 0.0: /lustre/isaac/scratch/agdelma/Projects/HourGlass/w_eq_3.0/dR_eq_0.0/OUTPUT/MERGED
Deleting ΔR = 2.0: /lustre/isaac/scratch/agdelma/Projects/HourGlass/w_eq_3.0/dR_eq_2.0/OUTPUT/MERGED
Deleting ΔR = 3.0: /lustre/isaac/scratch/agdelma/Projects/HourGlass/w_eq_3.0/dR_eq_3.0/OUTPUT/MERGED
Deleting ΔR = 4.0: /lustre/isaac/scratch/agdelma/Projects/HourGlass/w_eq_3.0/dR_eq_4.0/OUTPUT/MERGED


## Merge QMC Results over Seeds

In [19]:
%%time

if run['merge']:
    merge_command = f"parallel {pimc_bin_path}/merge.py {data_dir}/{{}}/OUTPUT/ ::: {dirNames}"
    stream = os.popen(merge_command)
    output = stream.read()
    print(output)

Merged data files:
gce-estimator-02.000-100.000--007.200-0.00400-52acb562-1996-4b7e-a2b6-240bba1b9334.dat     42240
gce-position-02.000-100.000--007.200-0.00400-52acb562-1996-4b7e-a2b6-240bba1b9334.dat  15813251
gce-estimator-02.000-100.000--007.200-0.00400-52acb562-1996-4b7e-a2b6-240bba1b9334.dat     42433
gce-linedensity-02.000-100.000--007.200-0.00400-52acb562-1996-4b7e-a2b6-240bba1b9334.dat     42433
gce-linepotential-02.000-100.000--007.200-0.00400-52acb562-1996-4b7e-a2b6-240bba1b9334.dat     42433
Merged data files:
gce-estimator-02.000-100.000--007.200-0.00400-b733d35f-d1da-408f-a2b5-debd7acdbbcf.dat    126143
gce-position-02.000-100.000--007.200-0.00400-05c02280-a810-46e5-bff7-84cw3p0dR4p0.dat  15813251
gce-estimator-02.000-100.000--007.200-0.00400-b733d35f-d1da-408f-a2b5-debd7acdbbcf.dat    144667
gce-linedensity-02.000-100.000--007.200-0.00400-b733d35f-d1da-408f-a2b5-debd7acdbbcf.dat    144667
gce-linepotential-02.000-100.000--007.200-0.00400-b733d35f-d1da-408f-a2b5-debd7acdb

## Reduce QMC Results over Temperature

In [20]:
%%time

if run['reduce']:
    reduce_command = f"parallel {pimc_bin_path}/reduce-one.py -r T -L 100 -u -7.2 {data_dir}/{{}}/OUTPUT/MERGED/CYLINDER/ ::: {dirNames}"
    stream = os.popen(reduce_command)
    output = stream.read()
    print(output)

Reduced estimator over 1 Temperature value(s).
Reduced linedensity over 1 Temperature value(s).
Reduced linepotential over 1 Temperature value(s).
Reduced estimator over 1 Temperature value(s).
Reduced linedensity over 1 Temperature value(s).
Reduced linepotential over 1 Temperature value(s).
Reduced estimator over 1 Temperature value(s).
Reduced linedensity over 1 Temperature value(s).
Reduced linepotential over 1 Temperature value(s).
Reduced estimator over 1 Temperature value(s).
Reduced linedensity over 1 Temperature value(s).
Reduced linepotential over 1 Temperature value(s).

CPU times: user 1.75 ms, sys: 1.83 ms, total: 3.58 ms
Wall time: 7.05 s


## Copy Reduced Data into Repository

In [35]:
if run['copy']:
    for cΔR in ΔR:
        dirName = lab(cΔR)
        !cp $data_dir/$dirName/OUTPUT/MERGED/CYLINDER/*-reduce-*.dat ../data/$dirName/CYLINDER/ 

## This is the serial way to do it! 

In [6]:
%%time
if run['serial']:
    for cΔR in ΔR:
        print(f'Processing ΔR = {cΔR}')

        # delete
        dirName = f'{base_dir(cΔR,cylinder=False)}'
        ! rm -rf $dirName

        # merge
        dirName = f'{data_dir}/{lab(cΔR)}/OUTPUT/'
        %run $pimc_bin_path/merge.py $dirName

        # reduce
        dirName = f'{base_dir(cΔR)}'
        %run $pimc_bin_path/reduce-one.py -r T -L 100 -u -7.2 $dirName

## Load and Compress Position Data

In [30]:
match_number = re.compile('-?\ *[0-9]+\.?[0-9]*(?:[Ee]\ *[-+?]\ *[0-9]+)?')

for cΔR in ΔR:
    position_file = glob.glob(f'{base_dir(cΔR,cylinder=False)}/gce-position-*.dat')[0]
    est_info = linecache.getline(position_file, 2)

    # get the discretization info
    nums = [x for x in re.findall(match_number, est_info)]
    _δ = np.array([float(num) for num in nums[:3]])
    _num_grid_sep = int(nums[-1])

    # load the density and reshape
    _ρ = np.loadtxt(position_file).reshape([_num_grid_sep]*3)

    # save as a compressed .npz file
    np.savez_compressed(f'../data/{lab(cΔR)}/gce-position-{lab(cΔR)}', δ=_δ, ρ=_ρ, num_grid_sep=_num_grid_sep)