In [None]:
from extra_data import RunDirectory 

from scipy import stats

import h5py 
import sys 

import numpy as np 
import numpy.matlib as matlib 

import matplotlib.pyplot as plt 

In [None]:
run_sel = 193 
run = RunDirectory(path=f'/gpfs/exfel/d/raw/SPB/202202/p003046/r0{run_sel}') 

trainIDs_array = np.array(run.train_ids) 

# XGM source(s) 
xgm_src1 = 'SA1_XTD2_XGM/XGM/DOOCS:output' # before attenuator 
xgm_src2 = 'SPB_XTD9_XGM/XGM/DOOCS:output' # after attenuator 

xgm_src = xgm_src2 

# Motor source(s) 
motor_src = 'SPB_IRU_MOTORS/MDL/DATA_SELECT' 

# Loading motor positions 
motor_x = np.array(run.get_array(motor_src,'SPB_IRU_INJMOV_MOTOR_X.actualPosition.value')) 
motor_y = np.array(run.get_array(motor_src,'SPB_IRU_INJMOV_MOTOR_Y.actualPosition.value')) 

# Loading radial integrals 
directory = '/gpfs/exfel/u/scratch/SPB/202202/p003046/data' 

with h5py.File(directory+'/r0'+f'{run_sel}'+'_proc_radavg.h5') as rad: 
    radavg = rad['entry_1']['radialavg'][:]
    trainIds = rad['entry_1']['trainId'][:]
    q = rad['entry_1']['q'][:] 
    
xgm = np.array(run.get_array(xgm_src,'data.intensitySa1TD')) 
agipd_tids = trainIds 
xgm_tids = np.array(run.get_array(xgm_src,'data.trainId')) 

# Print mismatch between AGIPD and XGM signals 
unique_agipd = np.unique(agipd_tids) 
unique_xgm = np.unique(xgm_tids) 

mismatch = np.fromiter(set(unique_agipd) - set(unique_xgm),int) 
print(f'Train IDs missing in XGM data: {mismatch}') 
print(f'{mismatch.shape[0]} trains are missing in the XGM data') 

# Train ID mask 
good_train_mask = np.ones(shape=(trainIDs_array.shape[0],))
for train in range(good_train_mask.shape[0]): 
    if trainIDs_array[train] in mismatch:
        good_train_mask[train]=0 

# Remove data from AGIPD not in XGM data 
good_pulse_mask = np.ones(shape=(radavg.shape[0],)) 
for pulse in range(good_pulse_mask.shape[0]): 
    if agipd_tids[pulse] in mismatch: 
        good_pulse_mask[pulse] = 0 

# Selecting the q-range to look at 
num_pulses = 202 # maximum number of pulses 
integrateQ = True # integrate the radial average over a certain q-range 

q_sel = 10 
q_max = 150 

if integrateQ: 
    q_sel = np.arange(q_sel,q_max) 
    print(f'Integrating between {q[q_sel.min()]}-{q[q_sel.max()]} 1/Å') 
else:
    print(f'Integrating at {q[q_sel]} 1/Å') 
    
# Mask out data in AGIPD radial average and train IDs not matching XGM
agipd = radavg[good_pulse_mask==1]
agipd_tids = agipd_tids[good_pulse_mask==1] 

pulse_tids = trainIds[good_pulse_mask==1] # use to color the pulses 

# Mask out data where train not matched between XGM and AGIPD 
motor_x = motor_x[good_train_mask==1] 
motor_y = motor_y[good_train_mask==1] 

# Expanding dimensions before padding with dummy pulses
motor_x = np.expand_dims(motor_x,axis=1)
motor_y = np.expand_dims(motor_y,axis=1)

# Padding dummy pulses with motor positions and flatten 
motor_x = matlib.repmat(motor_x,m=1,n=num_pulses).flatten() 
motor_y = matlib.repmat(motor_y,m=1,n=num_pulses).flatten() 

In [None]:
# Multi-train correlations 
agipd_pulses, xgm_pulses = [], [] 
agipd_per_train, xgm_per_train = [], [] 

n_good, n_bad = 0, 0 
hasnoPulse, hasPulse = [], [] 

n_pulses = np.zeros(shape=(xgm.shape[0],),dtype=int) 
pulse_tid = []

mot_x, mot_y = [],[]

for t in range(xgm.shape[0]): 
    
    # Ignore all the first pulses, select one part of the radial average for now 
    if integrateQ: 
        agipd_train = agipd[t*num_pulses:(1+t)*num_pulses][1:,q_sel].sum(axis=1) 
    else:
        agipd_train = agipd[t*num_pulses:(1+t)*num_pulses][1:,q_sel] 
        
    tid_train = pulse_tids[t*num_pulses:(1+t)*num_pulses][1:]
    mx_train = motor_x[t*num_pulses:(1+t)*num_pulses][1:]
    my_train = motor_y[t*num_pulses:(1+t)*num_pulses][1:]
        
    sel = xgm[t]>1. 
    xgm_train = xgm[t][sel] 
    if(xgm_train.shape[0] > 176):
        xgm_train = xgm_train[:176] 
        tid_train = tid_train[:176] 
        mx_train,my_train = mx_train[:176],my_train[:176] 

    if xgm_train.shape[0] == 0: 
        n_bad+=1
        hasnoPulse.append(t) 
        continue  
    else: 
        step = 176//xgm_train.shape[0] 
        agipd_train = agipd_train[::step] 
        
        n_good+=1
        hasPulse.append(t) 
        n_pulses[t] = xgm_train.shape[0] 
    
        # Put train IDs 
        id_sel = tid_train[:xgm_train.shape[0]]
        pulse_tid.extend(id_sel.flatten()) 
        
        # Put motor positions 
        mx_sel,my_sel = mx_train[:xgm_train.shape[0]],my_train[:xgm_train.shape[0]]
        mot_x.extend(mx_sel.flatten()),mot_y.extend(my_sel.flatten())
    
        # Select pulses present in both XGM and radial averages
        agipd_sel = agipd_train[:xgm_train.shape[0]] 
        xgm_sel = xgm_train[:xgm_train.shape[0]]
    
        agipd_pulses.extend(agipd_sel.flatten()) 
        xgm_pulses.extend(xgm_sel.flatten()) 
        
        agipd_per_train.append(list(agipd_sel)) 
        xgm_per_train.append(list(xgm_sel)) 

# Saving all pulses stacked together 
agipd_pulses = np.array(agipd_pulses) 
xgm_pulses = np.array(xgm_pulses)  
pulse_tid = np.array(pulse_tid) 
mot_x = np.array(mot_x) 
mot_y = np.array(mot_y) 

# Saving all pulses per train separately 
agipd_per_train = np.array(agipd_per_train)
xgm_per_train = np.array(xgm_per_train) 

# Saving trains with/without pulses 
hasPulse = np.array(hasPulse) 
hasnoPulse = np.array(hasnoPulse) 

print(f'{n_good} trains left for correlation analysis') 
print(f'{n_bad} trains removed from correlation analysis') 
print(f'Number of pulses after removal: {agipd_pulses.shape[0]}') 
print(f'Trains have these numbers of pulses: {np.unique(n_pulses)}') 

In [None]:
# Plotting AGIPD/XGM signals for single-train 
checkSignal = False
t = 510 
n_p = len(agipd_per_train[t]) 
if checkSignal:
    # Plotting selected train for XGM and AGIPD 
    fig_handle = plt.figure(1,constrained_layout = True,dpi=150) 
    fig_handle.patch.set_facecolor(f'white') 
    spec_handle = fig_handle.add_gridspec(nrows = 2, ncols = 2) 

    ax_i = fig_handle.add_subplot(spec_handle[0,:2]) 
    im_i = plt.plot(agipd_pulses[t*n_p:(1+t)*n_p],'b')  
    ax_i.set_xlim([0,n_p])
    ax_i.set_xticks([0,n_p],minor=True) 
    ax_i.set_title(f'Radial signal - (train {t})',fontsize=7) 
    ax_i.set_xlabel('Pulse #') 

    ax_i = fig_handle.add_subplot(spec_handle[1,:2]) 
    im_i = plt.plot(xgm_pulses[t*n_p:(1+t)*n_p],'r') 
    ax_i.set_xlim([0,n_p]) 
    ax_i.set_xticks([0,n_p],minor=True)
    ax_i.set_title(f'XGM signal - (train {t})',fontsize=7) 
    ax_i.set_xlabel('Pulse #'); 

In [None]:
# Single-train correlation 
sel_agipd = agipd_pulses[t*n_p:(1+t)*n_p] 
sel_xgm = xgm_pulses[t*n_p:(1+t)*n_p] 

fit_1 = stats.linregress(sel_agipd, sel_xgm) 

# Multi-train correlation 
min_train,max_train = 0,n_good
tc = 0 
train_range = n_pulses[min_train:max_train] 

agipd_sel, xgm_sel,id_sel = [], [], [] 
mot_x_sel, mot_y_sel = [], []

for tr in range(min_train,max_train): 
    if train_range[tc]!=0: 
        agipd_sel.extend(agipd_pulses[tr*train_range[tc]:(1+tr)*train_range[tc]]) 
        xgm_sel.extend(xgm_pulses[tr*train_range[tc]:(1+tr)*train_range[tc]]) 
        id_sel.extend(pulse_tid[tr*train_range[tc]:(1+tr)*train_range[tc]]) 
        mot_x_sel.extend(mot_x[tr*train_range[tc]:(1+tr)*train_range[tc]])
        mot_y_sel.extend(mot_y[tr*train_range[tc]:(1+tr)*train_range[tc]])
    tc+=1 
    
# Linear regression on nan-removed AGIPD/XGM pulses 
agipd_sel = np.array(agipd_sel) 
xgm_sel = np.array(xgm_sel) 
id_sel = np.array(id_sel) 
mot_x_sel = np.array(mot_x_sel)
mot_y_sel = np.array(mot_y_sel) 

# Check if XGM/AGIPD contains NaNs or Infs 
# isfinite returns True if element is good and False is NaN or +/-Inf, so we invert the result to check for NaNs
agipdCheck = ~np.isfinite(agipd_sel) 
xgmCheck = ~np.isfinite(xgm_sel) 

if agipdCheck.any(): 
    print('Only AGIPD signal has NaNs!') 
    agipdNaNs = np.squeeze(np.argwhere(agipdCheck==True)) # find pulses with NaNs
    agipdNoNaNs = np.squeeze(np.argwhere(agipdCheck==False)) # find pulses without NaNs
    print(f'There are {agipdNaNs.shape[0]} pulses with NaNs!') 
    agipd_sel = agipd_sel[agipdNoNaNs] 
    xgm_sel = xgm_sel[agipdNoNaNs] 
    id_sel = id_sel[agipdNoNaNs]
    mot_x_sel = mot_x_sel[agipdNoNaNs] 
    mot_y_sel = mot_y_sel[agipdNoNaNs]
elif xgmCheck.any(): 
    print('Only XGM signal has NaNs!') 
    xgmNaNs = np.squeeze(np.argwhere(xgmCheck==True)) # find pulses with NaNs
    xgmdNoNaNs = np.squeeze(np.argwhere(xgmCheck==False)) # find pulses without NaNs
    print(f'There are {xgmNaNs.shape[0]} pulses with NaNs!') 
    xgm_sel = xgm_sel[xgmdNoNaNs] 
    agipd_sel = agipd_sel[xgmdNoNaNs] 
    id_sel = id_sel[agipdNoNaNs]
    mot_x_sel = mot_x_sel[agipdNoNaNs]
    mot_y_sel = mot_y_sel[agipdNoNaNs] 
elif agipdCheck.any() and xgmCheck.any(): 
    # TODO - fix code when both AGIPD/XGM have NaNs-especially if they occur at different locations 
    print('Both AGIPD and XGM have NaNs!') 
else: 
    print('No NaNs present!') 
    
# Performing linear fit 
fit_2 = stats.linregress(agipd_sel, xgm_sel) 
          
# Saving the linear regression to a text file 
saveText = bool(int(input(prompt='Save linear regression results: Y/N (1/0):'))) 
if saveText: 
    with open(f'linear_regression/lin_fit_run_{run_sel}_trains_{min_train}-{max_train}.txt','w') as handle: 
        handle.write(f'number of trains used: {max_train-min_train}\n') 
        handle.write(f'fitting results\n') 
        handle.write(f'slope: {fit_2.slope}\n') 
        handle.write(f'intercept: {fit_2.intercept}\n') 
        handle.write(f'R^2: {fit_2.rvalue**2}\n') 
        handle.write(f'R: {fit_2.rvalue}\n') 
        handle.write(f'pvalue: {fit_2.pvalue}\n') 
        handle.write(f'stderr: {fit_2.stderr}\n') 
        handle.write(f'intercept_stderr: {fit_2.intercept_stderr}\n') 
        
# Plotting the scatter plots 
fig_handle = plt.figure(2,constrained_layout = True,dpi=150) 
fig_handle.patch.set_facecolor(f'white') 
spec_handle = fig_handle.add_gridspec(nrows = 1, ncols = 2) 

# Select a way to color points
selectMotor = bool(int(input(prompt='Color pulses using motor (1) or train ID (0):'))) 
if selectMotor: 
    motorXY = bool(int(input(prompt='Motor X (1) or motor Y (0):'))) 
    if motorXY: 
        c_points = mot_x_sel # uses motor X to color individual pulses 
    else: 
        c_points = mot_y_sel # uses motor Y to color individual pulses 
    label = 'motor position' 
    fmt = '%.3f' 
else: 
    c_points = id_sel # uses train ID to color individual pulses 
    label = 'train ID'
    fmt = '%.0f' 

# Plotting selected single-train AGIPD/XGM correlation 
ax_i = fig_handle.add_subplot(spec_handle[0,0]) 
im_i = plt.scatter(sel_agipd, sel_xgm,s=3,c='b',marker='o') 
ax_i.plot(sel_agipd, (fit_1.slope * sel_agipd + fit_1.intercept) , "r", linewidth = 2) 
ax_i.set_title(f'Correlation of XGM versus AGIPD - (train {t})',fontsize=7) 
ax_i.set_xlabel('AGIPD signal') 
ax_i.set_ylabel('XGM signal') 
ax_i.annotate("$R^2$= " + str("%0.5f" % fit_1.rvalue**2), xy = (0.05, 0.90), 
              xycoords = "axes fraction", weight = "bold", size = 10); 

# Plotting selected multi-train AGIPD/XGM correlation 
ax_i = fig_handle.add_subplot(spec_handle[0,1]) 
im_i = plt.scatter(agipd_sel,xgm_sel,s=0.1,c=c_points,cmap='jet',marker='x',alpha=0.5) 
ax_i.plot(agipd_sel, (fit_2.slope * agipd_sel + fit_2.intercept) , "r", linewidth = 1) 
ax_i.set_title(f'Correlation of XGM versus AGIPD - ({max_train-min_train} trains)',fontsize=7) 
ax_i.set_xlabel('AGIPD signal') 
ax_i.set_ylabel('XGM signal') 
ax_i.annotate("$R^2$= " + str("%0.5f" % fit_2.rvalue**2), xy = (0.05, 0.90), 
              xycoords = "axes fraction", weight = "bold", size = 10)
c_bar_i = plt.colorbar(im_i,ax=ax_i,ticklocation = 'top',orientation='horizontal',format=fmt)
c_bar_i.set_ticks([c_points.min(),c_points.max()])
c_bar_i.set_label(label,fontsize=8); 

In [None]:
# Plotting clusters of pulses based on motor positions (dependend on the scan direction) 