In [4]:
%load_ext autoreload
%autoreload 2
%run ~/FKMC/notebooks/notebook_preamble.py
%matplotlib inline
np.seterr(under = 'ignore')

from munch import munchify
from operator import mul
from functools import reduce
from itertools import zip_longest
import logging
import multiprocessing as mp
from FKMC.general import index_histogram_array, sort_IPRs, smooth, shapes, normalise_IPR
from FKMC.import_funcs import shape_hints, timefmt
from FKMC.import_funcs import  mean_over_MCMC, IPRandDOS, extract, get_data_funcmap_chain_ext, extractStates
from FKMC.import_funcs import incremental_get_data_funcmap_chain_ext, incremental_load
from scipy.stats import sem
from FKMC.plotting import spread
from FKMC.general import scaling_dimension

def interpolate_IPR(E_bins, unsmoothed_DOS, IPR, dIPR):
    newshape = (IPR.size // IPR.shape[-1], IPR.shape[-1])
    _DOS = unsmoothed_DOS.reshape(newshape)
    _IPR = IPR.reshape(newshape)
    _dIPR = dIPR.reshape(newshape)
    
    for i, DOS, I, dI in zip(count(), _DOS, _IPR, _dIPR):
        ei = DOS > 0
        if any(ei):
            _I = I[ei]
            _dI = dI[ei]
            xI = E_bins[1:][ei]

            _IPR[i] = np.interp(E_bins[1:], xI, _I)
            _dIPR[i] = np.interp(E_bins[1:], xI, _dI)
        else:
            _IPR[i] = E_bins[1:] * np.NaN
            _dIPR[i] = E_bins[1:] * np.NaN

def fit_errors(X, Y, dY):
    try:
        (m, c), cov = np.ma.polyfit(X, Y, deg = 1, cov=True, w = 1 / dY)
        dm, dc = np.sqrt(np.diag(cov))
        return m, c, dm, dc
    except np.linalg.LinAlgError:
        return np.NaN, np.NaN, np.NaN, np.NaN

def fit_no_errors(X, Y):
    try:
        (m, c), cov = np.ma.polyfit(X, Y, deg = 1, cov=True)
        dm, dc = np.sqrt(np.diag(cov))
        return m, c, dm, dc
    except np.linalg.LinAlgError:
        return np.NaN, np.NaN, np.NaN, np.NaN

def scaling_dimension_multidim(Ns, IPR, dIPR, use_true_errors = True):
    original_shape = IPR.shape
    newshape = (IPR.shape[0], IPR.size // IPR.shape[0])
    finalshape = IPR.shape[1:]
    IPR = IPR.reshape(newshape)
    dIPR = dIPR.reshape(newshape)
    print(original_shape, newshape, finalshape)
    
    Y = np.log(IPR).T
    dY = dIPR.T / IPR.T #take the maximum error across the energy spectrum because we can't do it individually
    #set a minimum 5% error
    dY = np.maximum(dY, 5/100)
    X = np.broadcast_to(np.log(Ns), Y.shape)
    
    with mp.Pool(16) as pool:
        if use_true_errors:
            args = np.stack([X, Y, dY], axis = 1)
            fit = fit_errors
        else:
            args = np.stack([X, Y, dY], axis = 1)
            fit = fit_no_errors
        
        print(args.shape)
        m, c, dm, dc = np.array(pool.starmap(fit, args, chunksize = 1000)).T

    return m.reshape(finalshape), c.reshape(finalshape), dm.reshape(finalshape), dc.reshape(finalshape)

from FKMC.general import scaling_dimension

def interpolate_and_smooth(o):
    interpolate_IPR(o.E_bins, unsmoothed_DOS=o.DOS, IPR=o.IPR, dIPR=o.dIPR)

    o.dIPR = sem(o.IPR, axis = 1)
    o.IPR = np.mean(o.IPR, axis = 1)
    o.dDOS = sem(o.DOS, axis = 1)
    o.DOS = np.mean(o.DOS, axis = 1)

    o.IPR = smooth(o.IPR, scale = 0.5, axis = -1)
    o.dIPR = smooth(o.dIPR, scale = 0.5, axis = -1)
    o.DOS = smooth(o.DOS, scale = 0.5, axis = -1)

    try:
        o.m, o.c, o.dm, o.dc = scaling_dimension(o.Ns, o.IPR, o.dIPR, use_true_errors = True)
    except:
        print('Scaling dimension fit failed on at least one value, falling back to loop')
        o.m, o.c, o.dm, o.dc = scaling_dimension_multidim(o.Ns, o.IPR, o.dIPR, use_true_errors = True)
    
    return o

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Imported pathlib::Path, os, munch::Munch, pickle, logging, itertools::count, matplotlib::colors
Names are pyplot = plt, matplotlib = mpl, numpy = np


In [11]:
# %%time
functions = [
    mean_over_MCMC('Mf_moments', N_error_bins = 10),
    IPRandDOS(),
    extractStates(),
]
o = incremental_load(folder = '~/HPC_data/LSR_figures/1_single_small_system', functions = functions, force_reload = False)
o = interpolate_and_smooth(o)

with open(Path('~/HPC_data/pickled_data/1_single_small_system.pickle').expanduser(), 'wb') as file:
    pickle.dump(o, file)

DEBUG:FKMC.import_funcs:todo: []
INFO:FKMC.import_funcs:########################################################################

INFO:FKMC.import_funcs:Observables has keys: dict_keys(['flat', 'hints', 'processed_task_ids', 'this_run', 'datapath', 'codepath', 'py_script', 'batch_params', 'structure_names', 'structure_dims', 'N_tasks', 'chains', 'N_chains', 'original_N_steps', 'thin', 'N_steps', 'max_MC_step', 'E_bins', 'Ns', 'parameters', 'MCMC_params', 'structure_dimensions', 'chain_id', 'task_id', 'allow_pickle', 'desc', 'Rs', 'Mf_moments', 'DOS', 'IPR', 'dDOS', 'dIPR', 'state', 'time', 'accept_rates', 'proposal_rates', 'functions'])
INFO:FKMC.import_funcs:    Completed jobs:?
    MCMC Steps: 1 chains of 1000 for 1000 with thinning = 1 for 1000 recorded steps
    Burn in: 1000
    Structure_names: {'Rs': 2}
    Ns = [30 60 90]
    Runtimes: 
        Average: 3.0 minutes
        Min: 3.0 minutes
        Max: 3.0 minutes
        Total: 7.0 minutes
    
    


In [None]:
# %%time
functions = [
    mean_over_MCMC('Mf_moments', N_error_bins = 10),
    IPRandDOS(),
    extractStates(),
]
o = incremental_load(folder = '~/HPC_data/LSR_figures/1_single_small_system', functions = functions, force_reload = False)
o = interpolate_and_smooth(o)

with open(Path('~/HPC_data/pickled_data/1_single_small_system.pickle').expanduser(), 'wb') as file:
    pickle.dump(o, file)

In [13]:
# %%time
functions = [
    mean_over_MCMC('Mf_moments', N_error_bins = 10),
    IPRandDOS(),
    extractStates(),
]
o = incremental_load(folder = '~/HPC_data/LSR_figures/2_correlations', functions = functions, force_reload = False)
o = interpolate_and_smooth(o)

with open(Path('~/HPC_data/pickled_data/2_correlations.pickle').expanduser(), 'wb') as file:
    pickle.dump(o, file)

INFO:FKMC.import_funcs:looking in /Users/tom/HPC_data/LSR_figures/2_correlations
DEBUG:FKMC.import_funcs:structure_names = ['Rs']
DEBUG:FKMC.import_funcs:structure_dims = (10,)
DEBUG:FKMC.import_funcs:Missing jobs, should all be up to 2
DEBUG:FKMC.import_funcs:task_id: chain_ids
INFO:FKMC.import_funcs:Expected number of tasks 10
INFO:FKMC.import_funcs:Measured number of tasks 10
INFO:FKMC.import_funcs:Expected number of chains None
INFO:FKMC.import_funcs:Shortest Chain 3
INFO:FKMC.import_funcs:Longest Chain 3
INFO:FKMC.import_funcs:Using chain length 3
INFO:FKMC.import_funcs:Logger keys: ['A', 'Fc', 'Ff', 'IPRs', 'Mf_moments', 'N_cumulants', 'N_sites', 'N_steps', 'Nc', 'Nf', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subc

[PosixPath('/Users/tom/HPC_data/LSR_figures/2_correlations/code/2_correlations.py')]
Tasks per chain: 3, Each doing 1000 steps,

3000 total chain length,

3000 samples,


len(config_product) = 10


DEBUG:FKMC.import_funcs:todo: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


0 1 2 3 4 5 6 7 8 

INFO:FKMC.import_funcs:########################################################################

INFO:FKMC.import_funcs:Observables has keys: dict_keys(['functions', 'flat', 'hints', 'processed_task_ids', 'this_run', 'datapath', 'codepath', 'py_script', 'batch_params', 'structure_names', 'structure_dims', 'N_tasks', 'chains', 'N_chains', 'original_N_steps', 'thin', 'N_steps', 'max_MC_step', 'E_bins', 'Ns', 'parameters', 'MCMC_params', 'structure_dimensions', 'chain_id', 'task_id', 'allow_pickle', 'desc', 'Rs', 'Mf_moments', 'DOS', 'IPR', 'dDOS', 'dIPR', 'state', 'time', 'accept_rates', 'proposal_rates'])
INFO:FKMC.import_funcs:    Completed jobs:?
    MCMC Steps: 3 chains of 1000 for 3000 with thinning = 1 for 3000 recorded steps
    Burn in: 1000
    Structure_names: {'Rs': 10}
    Ns = [150]
    Runtimes: 
        Average: 17.0 minutes
        Min: 13.0 minutes
        Max: 21.0 minutes
        Total: 2.0 hours
    


9 Scaling dimension fit failed on at least one value, falling back to loop
(1, 2000) (1, 2000) (2000,)


  (m, c), cov = np.ma.polyfit(X, Y, deg = 1, cov=True, w = 1 / dY)


(2000, 3, 1)




In [None]:
# %%time
functions = [
    mean_over_MCMC('Mf_moments', N_error_bins = 10),
    IPRandDOS(),
    extractStates(),
]
o = incremental_load(folder = '~/HPC_data/LSR_figures/3_different_update_steps', functions = functions, force_reload = False)
o = interpolate_and_smooth(o)

with open(Path('~/HPC_data/pickled_data/3_different_update_steps.pickle').expanduser(), 'wb') as file:
    pickle.dump(o, file)

INFO:FKMC.import_funcs:looking in /Users/tom/HPC_data/LSR_figures/3_different_update_steps
DEBUG:FKMC.import_funcs:structure_names = ['Algo_is', 'Rs', 'Ts']
DEBUG:FKMC.import_funcs:structure_dims = (6, 5, 3)
DEBUG:FKMC.import_funcs:Missing jobs, should all be up to 2
DEBUG:FKMC.import_funcs:task_id: chain_ids
INFO:FKMC.import_funcs:Expected number of tasks 90
INFO:FKMC.import_funcs:Measured number of tasks 90
INFO:FKMC.import_funcs:Expected number of chains None
INFO:FKMC.import_funcs:Shortest Chain 3
INFO:FKMC.import_funcs:Longest Chain 3
INFO:FKMC.import_funcs:Using chain length 3
INFO:FKMC.import_funcs:Logger keys: ['A', 'Fc', 'Ff', 'IPRs', 'Mf_moments', 'N_cumulants', 'N_sites', 'N_steps', 'Nc', 'Nf', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '

[PosixPath('/Users/tom/HPC_data/LSR_figures/3_different_update_steps/code/3_different_update_steps.py')]
Tasks per chain: 3, Each doing 1000 steps,

3000 total chain length,

3000 samples,


len(config_product) = 90


DEBUG:FKMC.import_funcs:observables.flat['state'] = [np.array(shape = (N_jobs, observables.max_MC_step, N), dtype = float64)] approx size: 0.00Gb
DEBUG:FKMC.import_funcs:observables.flat['time'] = np.array(shape = (1, 90), dtype = float64) approx size: 0.00Gb
DEBUG:FKMC.import_funcs:observables.flat['accept_rates'] = np.array(shape = (1, 90), dtype = float64) approx size: 0.00Gb
DEBUG:FKMC.import_funcs:observables.flat['proposal_rates'] = np.array(shape = (1, 90), dtype = float64) approx size: 0.00Gb
DEBUG:FKMC.import_funcs:todo: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89]


0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 

INFO:FKMC.import_funcs:########################################################################

INFO:FKMC.import_funcs:Observables has keys: dict_keys(['functions', 'flat', 'hints', 'processed_task_ids', 'this_run', 'datapath', 'codepath', 'py_script', 'batch_params', 'structure_names', 'structure_dims', 'N_tasks', 'chains', 'N_chains', 'original_N_steps', 'thin', 'N_steps', 'max_MC_step', 'E_bins', 'Ns', 'parameters', 'MCMC_params', 'structure_dimensions', 'Algo_names', 'Algos', 'chain_id', 'task_id', 'allow_pickle', 'desc', 'Algo_is', 'Rs', 'Ts', 'Mf_moments', 'DOS', 'IPR', 'dDOS', 'dIPR', 'state', 'time', 'accept_rates', 'proposal_rates'])
INFO:FKMC.import_funcs:    Completed jobs:?
    MCMC Steps: 3 chains of 1000 for 3000 with thinning = 1 for 3000 recorded steps
    Burn in: 0
    Structure_names: {'Algo_is': 6, 'Rs': 5, 'Ts': 3}
    Ns = [150]
    Runtimes: 
        Average: 15.0 minutes
        Min: 1.0 minutes
        Max: 1.0 hours
        Total: 23.0 hours
    


89 Scaling dimension fit failed on at least one value, falling back to loop
(1, 5, 3, 2000) (1, 30000) (5, 3, 2000)
(30000, 3, 1)


