**Testing of data generation (resampling) methods.**

In [None]:
### imports

# external modules
import sys
import os
import matplotlib.pyplot as plt
import importlib

# local modules
sys.path.append('../utils')
import hist_utils as hu
import dataframe_utils as dfu
import generate_data_utils as gdu
import plot_utils as pu
importlib.reload(hu)
importlib.reload(dfu)
importlib.reload(gdu)
importlib.reload(pu)
sys.path.append('../src')
import DataLoader
importlib.reload(DataLoader)

In [None]:
### load the data

# note: this cell assumes you have a csv file stored at the specified location,
#       containing only histograms of the specified type;
#       see the tutorial read_and_write_data for examples on how to create such files!

histname = 'chargeInner_PXLayer_2'
filename = 'DF2017_'+histname+'.csv'
datadir = '../data'

dloader = DataLoader.DataLoader()
df = dloader.get_dataframe_from_file( os.path.join(datadir, filename) )
print('raw input data shape: {}'.format( dfu.get_hist_values(df)[0].shape ))
allhists = hu.preparedatafromdf(df)

# note: depending on which histogram you are looking at, the 'good' and 'bad' runs defined below might not be good or bad at all!
#       you will need to find a set of clearly good and bad runs for you type(s) of histogram by eye.
#       for the good ones, this is rather easy, as there are many (and almost(!) everything in the golden json is good for all types of histograms)
#       for the bad ones, you can start from the run registry or other prior knowlege, or purely visual using the plot_histograms_loop tutorial!
goodrunsls = {'2017':
              {
                "297056":[[-1]],
                "297177":[[-1]],
                "301449":[[-1]]
              }} 

badrunsls = {'2017':
                {
                "297287":[[-1]],
                "297288":[[-1]],
                "297289":[[-1]],
                "299316":[[-1]],
                "299324":[[-1]],
                }}

goodhists = hu.preparedatafromdf(dfu.select_runsls(df,goodrunsls['2017']),donormalize=True)
badhists = hu.preparedatafromdf(dfu.select_runsls(df,badrunsls['2017']),donormalize=True)

# plot some together
pu.plot_sets([goodhists,badhists],colorlist=['b','r'],labellist=['"good" histograms','"bad" histograms'])

In [None]:
### alternatively loading full set and selecting a seed on the run

dloader = DataLoader.DataLoader()
df = dloader.get_dataframe_from_file( os.path.join(datadir, filename) )
print('raw input data shape: {}'.format( dfu.get_hist_values(df)[0].shape ))
seed = dfu.select_runsls(df,{"297056":[[100,100]]})
test = dfu.select_runs(df,[297056])
allhists = hu.preparedatafromdf(df,donormalize=True)
seedhist = hu.preparedatafromdf(seed,donormalize=True)

plt.figure()
plt.plot(seedhist[0,:])
plt.title('seed')

In [None]:
### testing section

(reshists,_,_) = gdu.fourier_noise_on_mean(allhists, nresamples=10, nonnegative=True, doplot=True)

(greshists,_,_) = gdu.fourier_noise(goodhists,nresamples=10,nonnegative=True, doplot=True)
(breshists,_,_) = gdu.fourier_noise(badhists,nresamples=9,nonnegative=True,stdfactor=3., doplot=True)

(reshists,_,_) = gdu.resample_bin_per_bin(allhists,nresamples=10,nonnegative=True,smoothinghalfwidth=0, doplot=True)

(reshists,_,_) = gdu.resample_similar_bin_per_bin(allhists,goodhists,nresamples=3,nonnegative=True,keeppercentage=0.005, doplot=True)
(reshists,_,_) = gdu.resample_similar_bin_per_bin(allhists,badhists,nresamples=3,nonnegative=True,keeppercentage=0.003, doplot=True)

(greshists,_,_) = gdu.resample_similar_fourier_noise(allhists,goodhists,nresamples=3,nonnegative=True,keeppercentage=0.001, doplot=True)
(breshists,_,_) = gdu.resample_similar_fourier_noise(allhists,badhists,nresamples=3,nonnegative=True,keeppercentage=0.001, doplot=True)

(greshists,_,_) = gdu.resample_similar_lico(allhists,goodhists,nresamples=10,nonnegative=True,keeppercentage=0.1, doplot=True)
(greshists,_,_) = gdu.fourier_noise(greshists,nresamples=1,nonnegative=True,stdfactor=8., doplot=True)
(breshists,_,_) = gdu.resample_similar_lico(allhists,badhists,nresamples=1,nonnegative=False,keeppercentage=0.001, doplot=True)

(reshists,_,_) = gdu.mc_sampling(seedhist,nresamples=10,nMC=10000, doplot=True)

(breshists,_,_) = gdu.white_noise(badhists,stdfactor=3., doplot=True)

In [None]:
# plot some together

pu.plot_sets([greshists,breshists],colorlist=['b','r'],labellist=['"good" histograms','"bad" histograms'])