In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.ndimage import convolve
from scipy import signal
import h5py
import glob
import itertools
from ast import literal_eval
from scipy.interpolate import interp1d


def bug_array_fix(df, column):
    df_out = df[column]
    df_out = df_out.str.replace('\n',',').str.replace('  ',',').str.replace('[','').str.replace(']','').str.replace(' ',',').str.split(',')
    return df_out.apply(lambda x: (np.array(x)[np.array(x)!='']).astype(np.float))

def get_roc_params(dfx,dfy, sg_eff = 0.99):
    roc_x = np.array(np.matrix(dfx.tolist()))
    roc_y = np.array(np.matrix(dfy.tolist()))
    auc = abs(np.diff(np.array(roc_x))*np.array(roc_y)[:,:-1]).sum(axis = 1)
    bg_fix = []
    for i in range(0,roc_x.shape[0]):
        x = np.array(roc_x[i,:])
        y = np.array(roc_y[i,:])
        f = interp1d(x,y,fill_value = 'extrapolate')
        bg_fix.append(f(sg_eff))
    return auc, bg_fix, roc_y[:,index]


def roc_params(dfx, dfy, thresholds):
  bound_sup = 4
  bound_inf = -4
  basis = np.linspace(bound_inf, bound_sup, 70)
  roc_x = np.array(np.matrix(dfx.tolist()))
  roc_y = np.array(np.matrix(dfy.tolist()))
  auc = abs(np.diff(np.array(roc_x))*np.array(roc_y)[:,:-1]).sum(axis = 1)
  signal_eff = np.zeros((roc_x.shape[0], len(thresholds)))
  bg_eff = np.zeros((roc_x.shape[0], len(thresholds)))
  roc_point = np.zeros((roc_x.shape[0], len(thresholds)))
  for (n, t) in list(enumerate(thresholds)):
    for i in range(0,roc_x.shape[0]):
      x = np.array(roc_x[i,:])
      y = np.array(roc_y[i,:])
      fx = interp1d(basis, x, fill_value = 'extrapolate')
      fy = interp1d(basis, y, fill_value = 'extrapolate')
      fxy = interp1d(x, y, fill_value = 'extrapolate')
      signal_eff[i, n] = fx(t)
      bg_eff[i, n] = fy(t)
      roc_point[i, n] = fxy(t)
   
  return auc, signal_eff, bg_eff, roc_point 

In [2]:
df = pd.read_csv('result_5.csv')
df.head(1)

Unnamed: 0,SNR,w_size,Energy,Area,filter,window,ROCx,ROCy,Energy Estimated
0,-55.833883,1.0,2539.0,561.0,wierner,1.0,[1. 1. 1. 1. 1...,[0.00000000e+00 0.00000000e+00 0.00000000e+00 ...,[2.60119056e+07 2.60119056e+07 2.60119056e+07 ...


In [3]:
df['ROCx'] = bug_array_fix(df,'ROCx')
df['ROCy'] = bug_array_fix(df,'ROCy')

In [4]:
t_values    = [1,1.5,2]
new_sg_columns = ['sg_eff_'+str(i) for i in t_values]
new_bg_columns = ['bg_eff_'+str(i) for i in t_values] 
auc,sg,bg,_ = roc_params(df['ROCx'], df['ROCy'],[1,1.5,2])
efficiency_df = pd.DataFrame(np.append(sg,bg,axis = 1), columns=new_sg_columns+new_bg_columns)
df = pd.concat([df,efficiency_df],axis = 1)

  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]
  slope = (y_hi - y_lo) / (x_hi - x_lo)[:, None]


In [5]:
index = list(range(0,100))
n_size = df.shape[0]//100
indexes = n_size*index
df['image_num'] = indexes
df.head(3)

Unnamed: 0,SNR,w_size,Energy,Area,filter,window,ROCx,ROCy,Energy Estimated,sg_eff_1,sg_eff_1.5,sg_eff_2,bg_eff_1,bg_eff_1.5,bg_eff_2,image_num
0,-55.833883,1.0,2539.0,561.0,wierner,1.0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[2.60119056e+07 2.60119056e+07 2.60119056e+07 ...,0.0,0.0,0.0,0.999861,1.0,1.0,0
1,-54.941974,1.0,1683.0,677.0,wierner,1.0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[2.60088814e+07 2.60088814e+07 2.60088814e+07 ...,0.0,0.0,0.0,0.999858,1.0,1.0,1
2,-51.341407,1.0,4110.0,925.0,wierner,1.0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[2.60109443e+07 2.60109443e+07 2.60109443e+07 ...,0.000946,0.0,0.0,0.999874,1.0,1.0,2


In [6]:
deltas = ['delta_' + st for st in list(efficiency_df.columns) + ['SNR']]
cumulatives = [i + '_cumulative' for i in deltas]
columns = list(df.columns) + deltas + cumulatives
dfObj = pd.DataFrame(columns=columns)
dfObj

Unnamed: 0,SNR,w_size,Energy,Area,filter,window,ROCx,ROCy,Energy Estimated,sg_eff_1,...,delta_bg_eff_1.5,delta_bg_eff_2,delta_SNR,delta_sg_eff_1_cumulative,delta_sg_eff_1.5_cumulative,delta_sg_eff_2_cumulative,delta_bg_eff_1_cumulative,delta_bg_eff_1.5_cumulative,delta_bg_eff_2_cumulative,delta_SNR_cumulative


In [10]:
values = list(df.groupby(by=['image_num','filter']))
data = pd.DataFrame([],columns = values[0][1].columns)

In [12]:
data = dict((k,[]) for k in deltas+cumulatives)
data = pd.DataFrame([],columns = aux.columns)
for i in range(0,len(values)):
    aux = values[i][1]
    for key1,key2 in zip(deltas, cumulatives):
        diff = np.pad(np.diff(aux[key1.split('delta_')[1]]),(1,0),'constant')
        diff_cum = diff.cumsum()
        #data[key1] = np.append(data[key1],diff)
        #data[key2] = np.append(data[key2],diff_cum)
        aux[key1] = diff
        aux[key2] = diff_cum
        #data.columns = aux.columns
    data = data.append(aux)
    #aux = aux[aux.columns]    
    #dfObj = pd.concat([dfObj,aux],axis = 0)    
#dfObj.head(3)        

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


In [13]:
df = data
df.head()

Unnamed: 0,Area,Energy,Energy Estimated,ROCx,ROCy,SNR,bg_eff_1,bg_eff_1.5,bg_eff_2,delta_SNR,...,delta_sg_eff_1_cumulative,delta_sg_eff_2,delta_sg_eff_2_cumulative,filter,image_num,sg_eff_1,sg_eff_1.5,sg_eff_2,w_size,window
100,561.0,2539.0,[25995018.59074217 25993497.3838523 25991125....,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.000672826598, 0.000737815531, 0.00083721036...",-58.244543,0.785472,0.891431,0.950832,0.0,...,0.0,0.0,0.0,gauss,0,0.695187,0.568182,0.451426,1.0,1.0
400,561.0,2539.0,[2.59679701e+07 2.59662873e+07 2.59649060e+07 ...,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.00169354, 0.00175852, 0.00181204, 0.0018961...",-44.448274,0.931679,0.981147,0.993949,13.796269,...,0.007353,-0.043672,-0.043672,gauss,0,0.70254,0.558712,0.407754,3.0,3.0
700,561.0,2539.0,[2.59721104e+07 2.59705256e+07 2.59690491e+07 ...,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.00153297, 0.00159414, 0.00165148, 0.0017164...",-39.102234,0.973538,0.995806,0.999244,5.34604,...,0.005125,-0.06016,-0.103832,gauss,0,0.700312,0.503899,0.347594,5.0,5.0
1000,561.0,2539.0,[2.59768338e+07 2.59758365e+07 2.59743584e+07 ...,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.0013533, 0.00139153, 0.00144887, 0.00151004...",-36.50931,0.990539,0.999233,0.999872,2.592925,...,-0.041667,-0.079768,-0.183601,gauss,0,0.65352,0.43037,0.267825,7.0,7.0
200,561.0,2539.0,[25995018.59074217 25993497.3838523 25991125....,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.000672826598, 0.000737815531, 0.00083721036...",-58.244543,0.785472,0.891431,0.950832,0.0,...,0.0,0.0,0.0,mean,0,0.695187,0.568182,0.451426,1.0,1.0


In [14]:
df['Intensity mean'] = df['Energy']/df['Area']
df.head(10)

Unnamed: 0,Area,Energy,Energy Estimated,ROCx,ROCy,SNR,bg_eff_1,bg_eff_1.5,bg_eff_2,delta_SNR,...,delta_sg_eff_2,delta_sg_eff_2_cumulative,filter,image_num,sg_eff_1,sg_eff_1.5,sg_eff_2,w_size,window,Intensity mean
100,561.0,2539.0,[25995018.59074217 25993497.3838523 25991125....,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.000672826598, 0.000737815531, 0.00083721036...",-58.244543,0.785472,0.891431,0.950832,0.0,...,0.0,0.0,gauss,0,0.695187,0.568182,0.451426,1.0,1.0,4.525847
400,561.0,2539.0,[2.59679701e+07 2.59662873e+07 2.59649060e+07 ...,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.00169354, 0.00175852, 0.00181204, 0.0018961...",-44.448274,0.931679,0.981147,0.993949,13.796269,...,-0.043672,-0.043672,gauss,0,0.70254,0.558712,0.407754,3.0,3.0,4.525847
700,561.0,2539.0,[2.59721104e+07 2.59705256e+07 2.59690491e+07 ...,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.00153297, 0.00159414, 0.00165148, 0.0017164...",-39.102234,0.973538,0.995806,0.999244,5.34604,...,-0.06016,-0.103832,gauss,0,0.700312,0.503899,0.347594,5.0,5.0,4.525847
1000,561.0,2539.0,[2.59768338e+07 2.59758365e+07 2.59743584e+07 ...,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.0013533, 0.00139153, 0.00144887, 0.00151004...",-36.50931,0.990539,0.999233,0.999872,2.592925,...,-0.079768,-0.183601,gauss,0,0.65352,0.43037,0.267825,7.0,7.0,4.525847
200,561.0,2539.0,[25995018.59074217 25993497.3838523 25991125....,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.000672826598, 0.000737815531, 0.00083721036...",-58.244543,0.785472,0.891431,0.950832,0.0,...,0.0,0.0,mean,0,0.695187,0.568182,0.451426,1.0,1.0,4.525847
500,561.0,2539.0,[2.59623259e+07 2.59606492e+07 2.59588664e+07 ...,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.00191144, 0.00197643, 0.00204524, 0.0021102...",-43.976256,0.928842,0.978401,0.992666,14.268287,...,-0.054813,-0.054813,mean,0,0.700535,0.553587,0.396613,3.0,3.0,4.525847
800,561.0,2539.0,[2.59730341e+07 2.59717418e+07 2.59704451e+07 ...,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.00149857, 0.00154827, 0.00159796, 0.0016361...",-39.201473,0.976214,0.996843,0.999495,4.774783,...,-0.085561,-0.140374,mean,0,0.69385,0.473819,0.311052,5.0,5.0,4.525847
1100,561.0,2539.0,[2.59764468e+07 2.59744573e+07 2.59730518e+07 ...,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.00136859, 0.00144505, 0.00149857, 0.0015979...",-37.090509,0.992771,0.999406,0.999877,2.110964,...,-0.081996,-0.222371,mean,0,0.594029,0.371881,0.229055,7.0,7.0,4.525847
0,561.0,2539.0,[2.60119056e+07 2.60119056e+07 2.60119056e+07 ...,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",-55.833883,0.999861,1.0,1.0,0.0,...,0.0,0.0,wierner,0,0.0,0.0,0.0,1.0,1.0,4.525847
300,561.0,2539.0,[2.60119056e+07 2.60119056e+07 2.60119056e+07 ...,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",-54.133355,0.999875,1.0,1.0,1.700528,...,0.0,0.0,wierner,0,0.0,0.0,0.0,3.0,3.0,4.525847


In [15]:
## filtrando dados para visualizacao
filtro = ['mean']   ## somente filtros listados
window = [1,3,5,7]              ## somente nestas janelas

df_filt    = df[df.window.isin(window)]
pattern    = '|'.join(filtro)
df_filt    = df_filt[df_filt['filter'].str.contains(pattern)]
new_column = df_filt['filter']  + ', window = ' + df_filt['window'].astype('str') 
df_filt.drop(['filter','window'],axis = 1,inplace = True)
df_filt['ftype']  = new_column
df_filt.head(3)

Unnamed: 0,Area,Energy,Energy Estimated,ROCx,ROCy,SNR,bg_eff_1,bg_eff_1.5,bg_eff_2,delta_SNR,...,delta_sg_eff_1_cumulative,delta_sg_eff_2,delta_sg_eff_2_cumulative,image_num,sg_eff_1,sg_eff_1.5,sg_eff_2,w_size,Intensity mean,ftype
200,561.0,2539.0,[25995018.59074217 25993497.3838523 25991125....,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.000672826598, 0.000737815531, 0.00083721036...",-58.244543,0.785472,0.891431,0.950832,0.0,...,0.0,0.0,0.0,0,0.695187,0.568182,0.451426,1.0,4.525847,"mean, window = 1.0"
500,561.0,2539.0,[2.59623259e+07 2.59606492e+07 2.59588664e+07 ...,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.00191144, 0.00197643, 0.00204524, 0.0021102...",-43.976256,0.928842,0.978401,0.992666,14.268287,...,0.005348,-0.054813,-0.054813,0,0.700535,0.553587,0.396613,3.0,4.525847,"mean, window = 3.0"
800,561.0,2539.0,[2.59730341e+07 2.59717418e+07 2.59704451e+07 ...,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...","[0.00149857, 0.00154827, 0.00159796, 0.0016361...",-39.201473,0.976214,0.996843,0.999495,4.774783,...,-0.001337,-0.085561,-0.140374,0,0.69385,0.473819,0.311052,5.0,4.525847,"mean, window = 5.0"


## Rejeição de background 1 sigma

In [None]:
import matplotlib
key_analysis = ['Intensity mean','delta_bg_eff_1_cumulative']
fig         = plt.figure(figsize=(12,8))
x           = df_filt[key_analysis[0]]
y           = df_filt[key_analysis[1]]
label_name  = df_filt.ftype

## enconding labels
from sklearn import preprocessing
le          = preprocessing.LabelEncoder()
le.fit(label_name)
label       = le.transform(label_name)

## selecionando cores para plot de forma randomica
np.random.seed(10)
colors = np.random.rand(len(np.unique(label)),3)

## scatter plots pelas cores de cada classe (w)
plt.scatter(x, 100*y, c=label, alpha=0.3,cmap=matplotlib.colors.ListedColormap(colors))

## ajustes do plot e barra de exibicao
plt.xlabel('Energy mean',fontsize = 14)
plt.ylabel('Background detection gain (%)',fontsize = 12)
plt.title('Filter Analysis')
plt.grid()
cb = plt.colorbar()
loc = np.arange(0,max(label),max(label)/float(len(colors)))
cb.set_ticks(loc+0.25)
## criando labels na colorbar
ticklabels = []
for i in np.unique(label_name):
    ticklabels.append(i)

cb.set_ticklabels(ticklabels)

## Rejeição de background 1.5 sigma

In [None]:
import matplotlib
key_analysis = ['Intensity mean','delta_bg_eff_1.5_cumulative']
fig         = plt.figure(figsize=(12,8))
x           = df_filt[key_analysis[0]]
y           = df_filt[key_analysis[1]]
label_name  = df_filt.ftype

## enconding labels
from sklearn import preprocessing
le          = preprocessing.LabelEncoder()
le.fit(label_name)
label       = le.transform(label_name)

## selecionando cores para plot de forma randomica
np.random.seed(10)
colors = np.random.rand(len(np.unique(label)),3)

## scatter plots pelas cores de cada classe (w)
plt.scatter(x, 100*y, c=label, alpha=0.3,cmap=matplotlib.colors.ListedColormap(colors))

## ajustes do plot e barra de exibicao
plt.xlabel('Energy mean',fontsize = 14)
plt.ylabel('Background detection gain (%)',fontsize = 12)
plt.title('Filter Analysis')
plt.grid()
cb = plt.colorbar()
loc = np.arange(0,max(label),max(label)/float(len(colors)))
cb.set_ticks(loc+0.25)
## criando labels na colorbar
ticklabels = []
for i in np.unique(label_name):
    ticklabels.append(i)

cb.set_ticklabels(ticklabels)

## Rejeição de background 2 sigma

In [None]:
import matplotlib
key_analysis = ['Intensity mean','delta_bg_eff_2_cumulative']
fig         = plt.figure(figsize=(12,8))
x           = df_filt[key_analysis[0]]
y           = df_filt[key_analysis[1]]
label_name  = df_filt.ftype

## enconding labels
from sklearn import preprocessing
le          = preprocessing.LabelEncoder()
le.fit(label_name)
label       = le.transform(label_name)

## selecionando cores para plot de forma randomica
np.random.seed(10)
colors = np.random.rand(len(np.unique(label)),3)

## scatter plots pelas cores de cada classe (w)
plt.scatter(x, 100*y, c=label, alpha=0.3,cmap=matplotlib.colors.ListedColormap(colors))

## ajustes do plot e barra de exibicao
plt.xlabel('Energy mean',fontsize = 14)
plt.ylabel('Background detection gain (%)',fontsize = 12)
plt.title('Filter Analysis')
plt.grid()
cb = plt.colorbar()
loc = np.arange(0,max(label),max(label)/float(len(colors)))
cb.set_ticks(loc+0.25)
## criando labels na colorbar
ticklabels = []
for i in np.unique(label_name):
    ticklabels.append(i)

cb.set_ticklabels(ticklabels)

## Sintetizando resultados

In [18]:
for ii,t in list(enumerate(t_values)):
    filters = df_filt['ftype'].unique()
    mean_output = np.zeros((len(filters),len(t_values)))
    std_output = np.zeros_like(mean_output)
    for jj,f in list(enumerate(filters)):
        filt_df = df_filt[df_filt['ftype'] == f]['delta_bg_eff_'+ str(t)+'_cumulative']
        mean_output[jj,ii] = filt_df.mean()
        std_output[jj,ii] = filt_df.std()
    

In [19]:
mean_output

array([[0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.04085456],
       [0.        , 0.        , 0.0474581 ],
       [0.        , 0.        , 0.04783097]])

In [None]:
#df_filt[['ROCx','ROCy']]
index = abs(threshold-0).argmin()
plt.plot(df_filt['ROCx'][3],df_filt['ROCy'][3],'b', label = 'window = 1')
plt.plot(df_filt['ROCx'][4],df_filt['ROCy'][4],'r', label = 'window = 3')
plt.plot(df_filt['ROCx'][5],df_filt['ROCy'][5],'g', label = 'window = 5')
plt.plot(df_filt['ROCx'][3][index]*np.ones((70,)),df_filt['ROCy'][3],'--k',label = 'sigma = 1.5')
plt.grid()
plt.title('Intensity mean = ' + str(df_filt['Intensity mean'][3]))
plt.legend()