In [None]:
from extra_data import RunDirectory 

from scipy import stats

import h5py
import sys 

import numpy as np
import matplotlib.pyplot as plt 

In [None]:
run_sel = 195 
run = RunDirectory(path=f'/gpfs/exfel/d/raw/SPB/202202/p003046/r0{run_sel}') 

trainIDs_array = np.array(run.train_ids)  
printIDFull = False   

if printIDFull: 
    print(trainIDs_array) 
print("Number of pulse trains:", trainIDs_array.shape[0]) 

# XGM source(s) 
xgm_src1 = 'SA1_XTD2_XGM/XGM/DOOCS:output' # before attenuator 
xgm_src2 = 'SPB_XTD9_XGM/XGM/DOOCS:output' # after attenuator 

In [None]:
# Loading radial integrals 
directory = '/gpfs/exfel/u/scratch/SPB/202202/p003046/data' 

with h5py.File(directory+'/r0'+f'{run_sel}'+'_proc_radavg.h5') as rad: 
    radavg = rad['entry_1']['radialavg'][:]
    trainIds = rad['entry_1']['trainId'][:]
    q = rad['entry_1']['q'][:] 

In [None]:
# Selecting the q-range to look at 
num_pulses = 202 # maximum number of pulses 
integrateQ = True # integrate the radial average over a certain q-range 

q_sel = 50
q_max = 60  

if integrateQ: 
    q_sel = np.arange(q_sel,q_max) 
    print(f'Integrating between {q[q_sel.min()]}-{q[q_sel.max()]} 1/Å') 
else:
    print(f'Integrating at {q[q_sel]} 1/Å') 

In [None]:
# Multi-train correlations
agipd_pulses, xgm_pulses = [], [] 
agipd_per_train, xgm_per_train = [], []

n_good, n_bad = 0, 0
hasnoPulse, hasPulse = [], [] 

xgm = np.array(run.get_array(xgm_src2,'data.intensitySa1TD')) 
n_pulses = np.zeros(shape=(xgm.shape[0],),dtype=int) 

for t in range(xgm.shape[0]): 
    
    # ignore all the first pulses, select one part of the radial average for now 
    if integrateQ: 
        agipd_train = radavg[t*num_pulses:(1+t)*num_pulses][1:,q_sel].sum(axis=1) 
    else:
        agipd_train = radavg[t*num_pulses:(1+t)*num_pulses][1:,q_sel] 

    xgm_train = xgm[t][xgm[t]>1.] # ignore "pulses" that are place-holders (those with values of 1.), also ignores trains that have no pulses at all 
    
    if xgm_train.shape[0] == 0: 
        n_bad+=1
        hasnoPulse.append(t) 
        continue  
    else: 
        n_good+=1
        hasPulse.append(t)
        n_pulses[t] = xgm_train.shape[0] 
    
        # Select pulses present in both XGM and radial averages
        agipd_sel = agipd_train[:xgm_train.shape[0]] 
        xgm_sel = xgm_train 
    
        agipd_pulses.extend(agipd_sel.flatten()) 
        xgm_pulses.extend(xgm_sel.flatten()) 
        
        agipd_per_train.append(list(agipd_sel)) 
        xgm_per_train.append(list(xgm_sel)) 

# Saving all pulses stacked together 
agipd_pulses = np.array(agipd_pulses) 
xgm_pulses = np.array(xgm_pulses) 

# Saving all pulses per train separately 
agipd_per_train = np.array(agipd_per_train)
xgm_per_train = np.array(xgm_per_train) 

# Saving trains with/without pulses 
hasPulse = np.array(hasPulse) 
hasnoPulse = np.array(hasnoPulse) 

print(f'{n_good} trains left for correlation analysis')
print(f'{n_bad} trains removed from correlation analysis')
print(f'Trains have these numbers of pulses: {np.unique(n_pulses)}') 

In [None]:
# Plotting selected train for XGM and AGIPD 
t = 900 
n_p = len(agipd_per_train[t]) 

fig_handle = plt.figure(1,constrained_layout = True,dpi=150) 
fig_handle.patch.set_facecolor(f'white') 
spec_handle = fig_handle.add_gridspec(nrows = 2, ncols = 2) 

ax_i = fig_handle.add_subplot(spec_handle[0,:2]) 
im_i = plt.plot(agipd_pulses[t*n_p:(1+t)*n_p],'b') 
ax_i.set_xlim([0,n_p])
ax_i.set_xticks([0,n_p],minor=True) 
ax_i.set_title(f'Radial signal - (train {t})',fontsize=7) 
ax_i.set_xlabel('Pulse #') 

ax_i = fig_handle.add_subplot(spec_handle[1,:2]) 
im_i = plt.plot(xgm_pulses[t*n_p:(1+t)*n_p],'r') 
ax_i.set_xlim([0,n_p])
ax_i.set_xticks([0,n_p],minor=True)
ax_i.set_title(f'XGM signal - (train {t})',fontsize=7) 
ax_i.set_xlabel('Pulse #'); 

In [None]:
# Single-train correlation 
sel_agipd = agipd_pulses[t*n_p:(1+t)*n_p] 
sel_xgm = xgm_pulses[t*n_p:(1+t)*n_p] 

fit_hypothesis = 'two-sided'

fit_1 = stats.linregress(sel_agipd, sel_xgm,alternative=fit_hypothesis) 

# Multi-train correlation 
min_train,max_train = 0, 3100 
train_range = n_pulses[min_train:max_train] 

agipd_sel, xgm_sel = [], [] 

for p in range(train_range.shape[0]): 
    sys.stderr.write(f'\r{p}')
    if train_range[p]!=0: 
        agipd_sel.extend(agipd_pulses[min_train*train_range[p]:(1+max_train)*train_range[p]])
        xgm_sel.extend(xgm_pulses[min_train*train_range[p]:(1+max_train)*train_range[p]])

#agipd_sel = np.array(agipd_sel) 
#xgm_sel = np.array(xgm_sel) 
#fit_2 = stats.linregress(agipd_sel, xgm_sel,alternative=fit_hypothesis) 

In [None]:
fig_handle = plt.figure(2,constrained_layout = True,dpi=150) 
fig_handle.patch.set_facecolor(f'white') 
spec_handle = fig_handle.add_gridspec(nrows = 1, ncols = 2) 

ax_i = fig_handle.add_subplot(spec_handle[0,0]) 
im_i = plt.scatter(sel_agipd, sel_xgm,s=3,c='b',marker='o') 
ax_i.plot(sel_agipd, (fit_1[0] * sel_agipd + fit_1[1]) , "r", linewidth = 2) 
ax_i.set_title(f'Correlation of XGM versus AGIPD - (train {t})',fontsize=7) 
ax_i.set_xlabel('AGIPD signal')
ax_i.set_ylabel('XGM signal')
ax_i.annotate("$R^2$= " + str("%0.5f" % fit_1[2]**2), xy = (0.05, 0.90), xycoords = "axes fraction", weight = "bold", size = 10); 

ax_i = fig_handle.add_subplot(spec_handle[0,1]) 
im_i = plt.scatter(agipd_sel,xgm_sel,s=3,c='b',marker='x') 
ax_i.plot(agipd_sel, (fit_2[0] * agipd_sel + fit_2[1]) , "r", linewidth = 2)    
ax_i.set_title(f'Correlation of XGM versus AGIPD - ({max_train-min_train} trains)',fontsize=7) 
ax_i.set_xlabel('AGIPD signal') 
ax_i.set_ylabel('XGM signal') 
ax_i.annotate("$R^2$= " + str("%0.5f" % fit_2[2]**2), xy = (0.05, 0.90), xycoords = "axes fraction", weight = "bold", size = 10); 