# Nightly Look at Data

This notebook is a nightly look at the data. It is run and commited to <github> every night. Maybe even push it to a slack channel? Make sure to set the HERA_DATA_DIR environment variable to the directory that has the nightly data in it. 

In [None]:
import numpy as np, capo.omni as omni, sys,os, optparse, glob, capo, aipy as a, capo.zsa as zsa, capo.plot as plot
%matplotlib inline
import matplotlib.pyplot as p
import seaborn as sns
sns.set_context('poster')
sns.set_style('darkgrid') 
p.rcParams['figure.figsize'] = 12, 8


colors = ["#006BA4", "#FF7F0E", "#2CA02C", "#D61D28", "#9467BD", "#8C564B", "#E377C2", "#7F7F7F", "#BCBD22", "#17BECF"]
symbols = ["o", "v", "^", "<", ">", "*"]
pols = ['xx','yy']
data_dir=os.environ['HERA_DATA_DIR']

#Get list of redundant antennas to look at
aa = a.cal.get_aa('hsa7458_v000', np.array([.150]))
#ex_ants = os.environ('EX_ANTS_X').split(',')
info = omni.aa_to_info(aa, fcal=True) 
antstrs = []
for k in info.get_reds(): antstrs.append(capo.zsa.list2str(k))
antstr = '9_88,10_81,22_105,31_53,43_89,53_64,96_104,97_112,20_9,89_20,81_22,65_31,112_72,104_80'

# Useful Information
### Baselines in each redundant group.
     '31_97,53_112,64_72,96_10,80_22,104_81',
     '72_80,97_96,112_104',
     '88_97,80_43,64_10',
     '22_112,81_97,9_22,20_81,31_89,88_105,104_31,80_53,96_65,105_72,64_9,89_10,53_20,65_43',
     '10_88,43_64',
     '31_72,65_112,104_105,96_22',
     '10_20,20_64,22_88,31_80,65_104,81_9,43_31,89_53,97_22,112_105',
     '43_104,10_53,81_64,97_9,89_80,112_88',
     '9_65,22_43,64_96,88_31,105_89,72_10',
     '65_105,43_72,96_88',
     '10_104,97_53,112_64,81_80',
     '43_53,81_88,65_80,97_105,10_9,89_64',
     '9_31,20_65,22_89,53_96,64_104,72_81,105_20,81_43,88_53,112_10',
     '10_105,43_9,65_64,89_88',
     '31_81,64_105,9_72,20_112,89_97,80_9,104_20,96_89,65_10,53_22',
     '9_88,10_81,22_105,31_53,43_89,53_64,96_104,97_112,20_9,89_20,81_22,65_31,112_72,104_80',
     '22_104,81_96,112_31,72_53,97_65,105_80',
     '96_112,104_72',
     '9_105,20_22,64_88,65_89,10_97,22_72,81_112,96_31,104_53,80_64,53_9,43_10,31_20,89_81',
     '53_105,65_81,20_72,43_97,80_88,89_112,104_9,96_20,31_22',
     '9_112,20_97,53_81,80_20,96_43,88_72,104_89,31_10,64_22',
     '10_80,97_64',
     '10_22,20_88,31_64,81_105,89_9,43_20,65_53,97_72,96_80',
     '80_112,104_97',
     '31_105,43_112,96_9,89_72,104_88,65_22',
     '65_72,96_105',
     '43_105,65_88'

# Auto Correlations

In [None]:
#Load in original data
files = glob.glob('{0}/zen.*.*.xx.HH.uvc'.format(data_dir))
randomfilexx = files[np.random.randint(0,len(files))]
randomfileyy = randomfilexx.replace('xx','yy')
_, autos, _ = capo.miriad.read_files([randomfilexx,randomfileyy], 'auto', ','.join(pols))

In [None]:
#plot autos
ant_keys = autos.keys()
nants = len(ant_keys)
nc = 6
nr = int(divmod(nants,nc)[0] + np.ceil(divmod(nants,nc)[1]/float(nc)))
integration=20
polcolors=['r','b']
for i,k in enumerate(ant_keys):
    ax = p.subplot(nr,nc,i+1)
    for ip,pol in enumerate(autos[k].keys()):
        p.plot(np.log10(np.abs(autos[k][pol][integration])), color=polcolors[ip], label=pol[0])
        p.text(0+ip*128, 2.3, pol[0], color=polcolors[ip], size=12)
#    ax.get_yaxis().set_visible(False)
    ax.get_xaxis().set_visible(False)
    ax.tick_params(axis='both', which='major', labelsize=10)
    p.xlim(0,1024)
    p.ylim(-3,3)
    p.title(k[0])
    if divmod(i+1,nc)[1]==1: p.ylabel('Log(Jy)', size=10)
#    leg = p.legend()
#    text1, text2 = leg.get_texts()
#    text1.set_color(polcolors[0])
#    text2.set_color(polcolors[1])
p.tight_layout()

In [1]:
POL = autos.values()[0].keys()[0]
def plot_rms(data, POL='xx'):
    CHUNK = 256
    BINS = 24
    rmscolors = 'bgrcmy'
    N = np.ceil(np.sqrt(len(data)))
    M = np.ceil(len(data) / float(N))
    bins = np.logspace(-2,4,BINS,base=2.)
    ants = [i for (i,_) in data]; ants.sort()
    for cnt,i in enumerate(ants):
        ax = p.subplot(N,M,cnt+1)
        for j,ch in enumerate(xrange(0,1024,CHUNK)):
            d = data[(i,i)][POL][:,ch:ch+CHUNK].flatten()
            h,b = np.histogram(np.sqrt(d/2),bins)
            h = 10**np.log10(h+.1)
            b = 0.5*(b[1:] + b[:-1])
            ax.fill_between(np.log2(b), h, .1, where=h>.1, color=rmscolors[j], alpha=.5)
        bounds = np.where(bins < 2**0, d.size, np.where(bins > 2**2, d.size, 0))
        ax.fill_between(np.log2(bins), bounds, .1, where=bounds>.1, color='black', alpha=.6)
        ax.set_yscale('log')
        p.xlim(-2,3)
        p.ylim(d.size/1e2, d.size)
        p.title(str(i)+POL)
        ax.get_yaxis().set_visible(False)
        if cnt < (N-1)*M:
            ax.get_xaxis().set_ticklabels([])
        else: p.xlabel(r'$V_{\rm rms}$ [bits]')
        p.grid()
    #p.subplots_adjust(wspace=.05, hspace=.4)
    p.tight_layout()

for ip,pol in enumerate(autos.values()[0].keys()):
    p.figure(ip)
    plot_rms(autos,POL=pol)

NameError: name 'autos' is not defined

In [None]:
#delete autos namespace to free up memory.
del(autos)

## Firstcal delay solutions

In [None]:
%%bash
#running firstcal
if [ ${REDO_FIRSTCAL} -gt 0 ]; then 
    for f in ${HERA_DATA_DIR}/zen.*.*.xx.HH.uvc; do ${FIRSTCAL_SCRIPT} -C hsa7458_v000 -p xx --ex_ants=${EX_ANTS_X} ${f} > /dev/null; done 
    for f in ${HERA_DATA_DIR}/zen.*.*.yy.HH.uvc; do ${FIRSTCAL_SCRIPT} -C hsa7458_v000 -p yy --ex_ants=${EX_ANTS_Y} ${f} > /dev/null; done
fi

In [None]:
def load_dmv(filestring, offsets=False, verbose=False):
    files = glob.glob(filestring)
    delays = {}
    means = {}
    medians = {}
    vars = {}
    if offsets: ss = 'o'
    else: ss = 'd'
    for f in files:
        if verbose:
            print 'Reading %s'%f    
        npz = np.load(f)
        for key in npz.files:
            if key.startswith(ss):
                if key not in delays.keys(): 
                    delays[key] = npz[key]
                    continue 
                delays[key] = np.hstack((delays[key],npz[key]))
    
    for k in delays.keys():
        delays[k] = np.array(delays[k]).flatten()
        means[k] = np.mean(delays[k])
        medians[k] = np.median(delays[k])
        vars[k] = np.var(delays[k])
        
    return delays,means,medians,vars
    

delays = {}
means = {}
medians = {}
vars = {}
for pol in pols:
    delays[pol], means[pol], medians[pol], vars[pol] = load_dmv('{0}/zen.*.{1}.HH.uvc.fc.npz'.format(data_dir,pol),offsets=False)

In [None]:
fig = p.figure(figsize=(15,20))
axes=[]
for ip,pol in enumerate(delays.keys()):
    if len(axes)!=0: axes.append(p.subplot(2,1,1+ip, sharex=axes[0]))
    else: axes.append(p.subplot(2,1,1+ip))
    for i,k in enumerate(delays[pol].keys()): 
        p.plot(delays[pol][k],'.', color=colors[i%10], label=k[1:]+pol[0])
        p.fill_between(np.arange(delays[pol][k].size), means[pol][k] - vars[pol][k]**.5, means[pol][k] + vars[pol][k]**.5, alpha=.5, color=colors[i%10])
    p.xlim(0,delays[pol][k].size + delays[pol][k].size*.2)   
    p.ylabel('Delay (ns)')
    p.legend()
p.setp(axes[0].get_xticklabels(), visible=False)
p.subplots_adjust(hspace=.02)    
p.xlabel('Integration') 

axes[0].set_title('Firstcal Delay Solutions vs. Integration')

In [None]:
def gaussian(x,mu,std,scale):
    return scale/np.sqrt(np.pi*2*std) * np.exp(-.5*(x-mu)**2/(std**2))
    
nants = len(delays[pol].keys())
nc = 4
nr = int(divmod(nants,nc)[0] + np.ceil(divmod(nants,nc)[1]/float(nc)))

for ip, pol in enumerate(delays.keys()):
    fig = p.figure(ip, figsize=(16,16))
    for i,k in enumerate(delays[pol].keys()):
        p.subplot(nr,nc,i+1)
        h = p.hist(delays[pol][k], bins=30)
        p.tick_params(labelsize=10)
        p.title(k[1:]+pol + ' ;mean:%1.2f'%means[pol][k])
        p.grid(1)
        mx = np.max(h[0])
        p.plot(h[1],gaussian(h[1],means[pol][k],vars[pol][k]**.5,scale=mx))
        p.vlines(means[pol][k], 0, mx, linestyle='--')
        p.vlines(medians[pol][k],0,mx, linestyle='-.', color='red')
        p.vlines(means[pol][k]-np.sqrt(vars[pol][k]),0,mx)
        p.vlines(means[pol][k]+np.sqrt(vars[pol][k]),0,mx)
        p.ylim(0,mx)
#        if divmod(i,nc)[-1] == 0:  p.ylabel('Counts')
#        if divmod(i,nc)[0] == nr-1:  p.xlabel('Delay (ns)')
    p.tight_layout(pad=1.0)
    fig.text(.01, .5, 'Counts', ha='center', va='center',rotation='vertical')
    fig.text(.5, .001, 'Delays (ns)', ha='center', va='center')

# Rewriting solutions with mean delays



In [None]:
%%bash 
if [ ${REDO_REWRITE} -gt 0 ]; then 
    #make median files
    for pol in xx yy; do write_median_firstcal_files.py ${HERA_DATA_DIR}/zen.*.${pol}.HH.uvc.fc.npz > /dev/null; done
fi
if [ ${REDO_FIRSTCAL_APPLY} -gt 0 ]; then 
    #apply solutions 
    cd ${HERA_DATA_DIR}; for pol in xx yy; do ${OMNI_APPLY_SCRIPT}  -p $pol --omnipath="%s.HH.uvc.median.fc.npz" zen.*${pol}.HH.uvc --firstcal > /dev/null; done
fi

In [None]:
del(delays)
del(means)
del(medians)
del(vars)

In [1]:
_, fdata, _ = capo.miriad.read_files(np.sort(glob.glob('{0}/zen.*.*.HH.uvcF'.format(data_dir))), antstr, ','.join(pols))
#_, pdata, _ = capo.miriad.read_files(np.sort(glob.glob('{0}/zen.*.*.HH.uvc'.format(data_dir))), antstr, ','.join(pols))
#Conjugates data correctly so all baselines are pointing in the same direction.
#pdata = zsa.order_data(pdata,info)
fdata = zsa.order_data(fdata,info)

NameError: name 'capo' is not defined

In [None]:
nbls = len(fdata.keys())

In [None]:
p.figure(figsize=(12,9))
integration_number = 12
for i,k in enumerate(fdata.keys()):
    for ip,pol in enumerate(fdata[k].keys()):
        p.subplot(211+ip)
        p.plot(np.angle(fdata[k][pol][integration_number]), label=str(k)+pol)
        p.ylabel('Phase (radians)')
        p.xlabel('Channel Number')
for i in range(len(pols)):
    p.subplot(211+i)
    p.legend(fontsize='xx-small')
p.subplot(211); p.title('Phase for Firstcal\'d Redundant Baselines')


In [None]:
nratios = (nbls * (nbls-1))/2
nc = 3
div,mod = divmod(nratios,nc)
nr = int(div + np.ceil(mod/3.)) * len(pols)

crosses = []
bls = fdata.keys()
for k in range(nbls): 
    for i in range(k+1,nbls): 
        crosses.append((bls[k],bls[i]))
ncross = len(crosses)

fig = p.figure(figsize=(16,150))
for ip,pol in enumerate(pols):
    for i,k in enumerate(crosses):
        ax = p.subplot(nr,nc,i+1+ncross*ip)
        g = 1.0
        capo.plot.waterfall(fdata[k[0]][pol]*np.conj(fdata[k[-1]][pol])*g, mode='phs', cmap='jet', mx=np.pi, drng=2*np.pi)    
        p.grid(0)
#        if divmod(i,nc)[-1] != 0:  ax.yaxis.set_visible(False) 
#        if divmod(i,nc)[0] != nr-1: ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)
        ax.xaxis.set_visible(False)
#        ax.text(.5,.5,str(k)+pol,color='white', fontweight='bold', ha='center', va='center', transform=ax.transAxes)
        p.title(str(k)+pol,color='black', fontweight='bold', fontsize=16)
        
p.xlabel('Channel Number')
p.ylabel('Integration')
p.suptitle('Phase Ratios of Redundant Visibilities: Post Firstcal', fontsize=26)

cax = fig.add_axes([0.2, 0.02, 0.6, 0.01])
p.colorbar(cax=cax, orientation='horizontal')
p.tight_layout(rect=(0,.022,1,.975))

In [None]:
#free up memory
del(fdata)

## Looking at omnical'd data

In [None]:
%%bash
if [ ${REDO_OMNIRUN} -gt 0 ]; then
#do a look for each polariztion
    cd ${HERA_DATA_DIR}; for f in zen.*xx.HH.uvc; do ${OMNI_RUN_SCRIPT} -C ${HERA_CAL_FILE} -p xx --ba=${EX_ANTS_X} ${f} --fc2="${f}.median.fc.npz" > /dev/null; done
    cd ${HERA_DATA_DIR}; for f in zen.*yy.HH.uvc; do ${OMNI_RUN_SCRIPT} -C ${HERA_CAL_FILE} -p yy --ba=${EX_ANTS_Y} ${f} --fc2="${f}.median.fc.npz" > /dev/null; done
fi
if [ ${REDO_OMNIAPPLY} -gt 0 ]; then    
    cd ${HERA_DATA_DIR}; ${OMNI_APPLY_SCRIPT} -p xx --omnipath="%s.npz" zen.*xx.HH.uvc > /dev/null
    cd ${HERA_DATA_DIR}; ${OMNI_APPLY_SCRIPT} -p yy --omnipath="%s.npz" zen.*yy.HH.uvc > /dev/null
fi

In [None]:
_, data, _ = capo.miriad.read_files(np.sort(glob.glob('{0}/zen.*.*.HH.uvcO'.format(data_dir))), antstr, ','.join(pols))
data = zsa.order_data(data,info)

In [None]:
nratios = (nbls * (nbls-1))/2
nc = 3
div,mod = divmod(nratios,nc)
nr = int(div + np.ceil(mod/3.)) * len(pols)

crosses = []
bls = data.keys()
for k in range(nbls): 
    for i in range(k+1,nbls): 
        crosses.append((bls[k],bls[i]))
ncross = len(crosses)

fig = p.figure(figsize=(16,150))
for ip,pol in enumerate(pols):
    for i,k in enumerate(crosses):
        ax = p.subplot(nr,nc,i+1+ncross*ip)
        g = 1.0
        capo.plot.waterfall(data[k[0]][pol]*np.conj(data[k[-1]][pol])*g, mode='phs', cmap='jet', mx=np.pi, drng=2*np.pi)    
        p.grid(0)
#        if divmod(i,nc)[-1] != 0:  ax.yaxis.set_visible(False) 
#        if divmod(i,nc)[0] != nr-1: ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)
        ax.xaxis.set_visible(False)
        #ax.text(.5,.5,str(k)+pol,color='white', fontweight='bold', ha='center', va='center', transform=ax.transAxes)
        p.title(str(k)+pol,color='black', fontweight='bold', fontsize=16)

        
p.xlabel('Channel Number')
p.ylabel('Integration')
p.suptitle('Phase Ratios of Redundant Visibilities: Post Omnical', fontsize=26)
cax = fig.add_axes([0.2, 0.02, 0.6, 0.01])
p.colorbar(cax=cax, orientation='horizontal')
p.tight_layout(rect=(0,.022,1,.975))

In [None]:
#free up data namespace.
del(data)

In [None]:
files=glob.glob('{0}/zen.*.*.xx.HH.uvc'.format(data_dir))
filexx = files[np.random.randint(0,len(files))]
fileyy = filexx.replace('xx','yy')
print filexx, fileyy
_, view_data, _ = capo.miriad.read_files([filexx,fileyy], 'cross', ','.join(pols) ,verbose=False)
_, view_data_fcal, _ = capo.miriad.read_files([filexx+'F',fileyy+'F'], 'cross', ','.join(pols) ,verbose=False)
_, view_data_ocal, _ = capo.miriad.read_files([filexx+'O', fileyy+'O'], 'cross', ','.join(pols) ,verbose=False)
view_data = zsa.order_data(view_data,info)
view_data_fcal = zsa.order_data(view_data_fcal,info)
view_data_ocal = zsa.order_data(view_data_ocal,info)

In [None]:
fig = p.figure(figsize=(15,8))
for ip,pol in enumerate(pols):
    p.subplot(231 + 3*ip)
    plot.omni_view(info.get_reds(), view_data, pol, int=10, chan=425, cursor=False)
    if ip==0: p.title('Pre Calibration')
    p.subplot(232 + 3*ip)
    plot.omni_view(info.get_reds(), view_data_fcal, pol, int=10, chan=425, cursor=False)
    if ip==0: p.title('Post Firstcal')
    p.subplot(233 + 3*ip)
    plot.omni_view(info.get_reds(), view_data_ocal, pol, int=10, chan=425, cursor=False)
    if ip==0: p.title('Post Omnical')
p.tight_layout(rect=(0,0,1,1))

In [None]:
#Free up more memory!!
del(view_data)
del(view_data_fcal)
del(view_data_ocal)

### See omnical notebook for a look at the solutions

In [None]:
# m,g,v,x = omni.from_npz(glob.glob('{0}/zen.*.*.npz'.format(data_dir)))

In [None]:
# def plot_chisq(pol='x'):
#     naxes = 19 # 1 bad antenna
#     r = np.ceil(np.sqrt(naxes)) + 1
#     c = np.ceil(naxes / float(r))
#     i=0
#     print r,c
#     for k in m:
#         if k.startswith('c') and k.endswith(pol):
#             ax = p.subplot(r,c,i+1)
#             p.title(k)
#             p.grid(0)
#             im = capo.plot.waterfall(m[k], mx =0, drng = 5, cmap='jet')
#             if divmod(i,c)[-1] != 0:  ax.yaxis.set_visible(False) 
#             else: p.ylabel('Integration')
#     #        if divmod(i,c)[0] != r-1: ax.xaxis.set_visible(False)
#             if i < (r-1)*c:
#                 ax.get_xaxis().set_ticklabels([])
#             else: p.xlabel('Frequency Bins')
#             i+=1
#     #rect=0.2, 0.02, 0.6, 0.01
#     #cax = fig.add_axes(rect)
#     #p.colorbar(im,cax=cax,orientation='horizontal')
#     p.tight_layout(rect=(0,.022,1,.975))

# for ip,pol in enumerate('xy'):
#     fig = p.figure(ip,figsize=(16,16))
#     plot_chisq(pol=pol)
    


In [None]:
# p.figure(figsize=(12,10))
# for ant in g['x'].keys():
#     p.subplot(211)
#     p.semilogy(range(1024),np.abs(g['x'][ant][10]),label=str(ant))
#     p.ylim(.05,5)
#     p.title('Omnical Gains (single integration)')
#     p.ylabel('Log Amplitude')
#     p.subplot(212)
#     p.title('Omnical Gains in Phase(single integration)')
#     p.xlabel('Channel Number')
#     p.ylabel('Phase (radians)')
#     p.plot(np.angle(g['x'][ant][10]),label=str(ant))

In [None]:
# p.figure(figsize=(11,8))
# fqs = np.linspace(.1,.2,1024)
# for k in g['x']:
#     p.plot(fqs[100:-100],np.unwrap(np.angle(g['x'][k][10,100:-100])))
        
# p.ylabel('Phase (radians)')
# p.xlabel('Frequency (GHz)')
# p.title('Unwrapped gain solutions in phase')

In [None]:
# #pick some random baselines
# bls = []
# for k in info.get_reds():
#     bls+=k
# bls = np.array(bls)
# plot_these = bls[np.random.randint(0,len(bls),5)]

# #get the mappings of visibilities to the stored mdl-visibilities in the 
# redmapping = {}
# for k in v['xx'].keys():
#     for gp in info.get_reds():
#         if k in gp:
#             for kk in gp : redmapping[kk] = k
# #get the baselines in string format.                
# s = ''
# for k in plot_these:
#     s += str(k[0])+'_'+str(k[1])+','
# s = s[:-1]

In [None]:
#stack = np.concatenate([data[k]['xx'] for k in data if not 81 in k]).reshape(-1,840,1024)

In [None]:
# fig = p.figure(figsize=(16,20))
# for i,st in enumerate(stack):
#     res = st - v['xx'][redmapping[keys[i]]]
#     ax = p.subplot(4,5,i+1)
#     p.title('{0}'.format(keys[i]))
#     plot.waterfall(np.vstack((st,res)), mode='phs', cmap='jet', mx=np.pi, drng=2*np.pi)
#     ax.get_xaxis().set_visible(False)
#     ax.get_yaxis().set_visible(False)
    
# p.tight_layout(rect=(0,0,1,.96))    
# p.suptitle('Deviations From Redundancy', fontsize=26)