In [1]:
from datetime import datetime
import traceback
import functools
import multiprocessing
from multiprocessing import Pool

import time
import os
import sys
from pathlib import Path

import numpy as np
import scipy
import pandas as pd
import ipympl
import matplotlib
#matplotlib.use('nbagg')
from matplotlib import style
style.use('ggplot')
import matplotlib.pyplot as plt

%load_ext autoreload
%autoreload 2

from astropy import stats

import pandas as pd

import astropy.units as u
from astropy.io import fits
from mmtwfs.wfs import WFSFactory

In [2]:
# instantiate all of the WFS systems...
wfs_keys = ['f9', 'newf9', 'f5', 'mmirs', 'binospec']
wfs_systems = {}
wfs_names = {}
for w in wfs_keys:
    wfs_systems[w] = WFSFactory(wfs=w)
    wfs_names[w] = wfs_systems[w].name
plt.close('all')

# give mmirs a default
wfs_systems['mmirs'].default_mode = 'mmirs1'

# map f9 to oldf9
wfs_systems['oldf9'] = wfs_systems['f9']

In [3]:
def hist_compare(df1, df2, column, bins=100, mini=None, maxi=None):
    f, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
    if mini is None:
        mini = df1[column].min()
    if maxi is None:
        maxi = df1[column].max()
    df1.hist(column=column, bins=bins, range=(mini, maxi), alpha=0.6, ax=ax1)
    df2.hist(column=column, bins=bins, range=(mini, maxi), alpha=0.6, ax=ax2)
    plt.xlabel(column)
    plt.ylabel("N")
    f.show()

In [4]:
def seeing(df, wfs):
    """
    Given a sigma derived from a gaussian fit to a WFS spot, deconvolve the systematic width from the reference image
    and relate the remainder to r_0 and thus a seeing FWHM.
    """
    # the effective wavelength of the WFS imagers is about 600-650 nm. we use 650 nm to maintain consistency
    # with the value used by the old SHWFS system.
    wave = 650 * u.nm
    wave = wave.to(u.m).value  # r_0 equation expects meters so convert

    mode = wfs.default_mode
    
    # calculate the physical size of each aperture.
    ref = wfs.modes[mode]['reference']
    apsize_pix = np.max((ref.xspacing, ref.yspacing))
    d = wfs.telescope.diameter * apsize_pix / wfs.pup_size
    d = d.to(u.m).value  # r_0 equation expects meters so convert

    # we need to deconvolve the instrumental spot width from the measured one to get the portion of the width that
    # is due to spot motion
    ref_sigma = stats.funcs.gaussian_fwhm_to_sigma * ref.fwhm
    sigma = stats.funcs.gaussian_fwhm_to_sigma * df['gauss_fwhm']
    
    corr_sigma = np.sqrt(sigma**2 - ref_sigma**2)
    corr_sigma *= wfs.pix_size.to(u.rad).value  # r_0 equation expects radians so convert

    # this equation relates the motion within a single aperture to the characteristic scale size of the
    # turbulence, r_0.
    r_0 = ( 0.179 * (wave**2) * (d**(-1/3))/corr_sigma**2 )**0.6

    # this equation relates the turbulence scale size to an expected image FWHM at the given wavelength.
    raw_seeing = u.Quantity(u.rad * 0.98 * wave / r_0, u.arcsec)

    # correct seeing to zenith
    seeing = raw_seeing / df['AIRMASS']**0.6

    return raw_seeing, seeing

In [5]:
columns = ['filename', 'obstime', 'wfskey', 'AIRMASS', 'AZ', 'EL', 'ellipticity', 'gauss_rms', 'moffat_rms', 'gauss_fwhm', 'moffat_fwhm', 'gauss_amplitude',
           'moffat_amplitude', 'moffat_alpha', 'moffat_gamma']
rootdir = Path("/Volumes/Seagate2TB/spot_analysis")
dirs = sorted(list(rootdir.glob("20*")))  # pathlib, where have you been all my life!
dataframes = []
for d in dirs:
    if d.is_dir():
        mean_df = pd.read_csv(d / "spot_reduce_mean.csv")
        wfs_df = pd.read_csv(d / "wfskeys.csv")
        df = pd.merge(mean_df, wfs_df, on='filename')
        dataframes.append(df)
data = pd.concat(dataframes, sort=True)

In [6]:
g = data.groupby('wfskey')
g.wfskey.describe()

Unnamed: 0_level_0,count,unique,top,freq
wfskey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
binospec,16996,1,binospec,16996
f5,100654,1,f5,100654
mmirs,49707,1,mmirs,49707
newf9,1803,1,newf9,1803
oldf9,54128,1,oldf9,54128


In [7]:
# re-calculate the seeing using updated spot FWHM for binospec. filter out any NaN values...
fixed_list = []
for wfskey, group in g:
    wfs = wfs_systems[wfskey]
    group = g.get_group(wfskey)
    #group.apply(seeing, axis=1, args=(wfs,))
    group['raw_seeing'], group['seeing'] = seeing(group, wfs)
    fixed_list.append(group)
fixed = pd.concat(fixed_list)
fixed = fixed[np.isfinite(fixed['seeing'])]
fixed = fixed[fixed['gauss_amplitude'] > 10.]
#fixed = fixed[fixed['ellipticity'] < 0.2]
fixed = fixed[~fixed['filename'].str.contains("Ref")]
fixed = fixed[~fixed['filename'].str.contains("ref")]
fixed = fixed[~fixed['filename'].str.contains("back.fits")]
fixed = fixed[~fixed['filename'].str.contains("tmp.fits")]
fixed = fixed[fixed['seeing'] > 0.2]  # trim out reference or bogus images
fixed = fixed[fixed['seeing'] < 5.]  # spot finding doesn't work for bad seeing where motion fills the aperture

fixed = fixed[fixed['peak'] > 2500.]  # use only bright spots

# create a date-time index so we can group and analyze the data by timestamps
fixed = fixed.set_index(pd.DatetimeIndex(fixed['obstime'], name='ut'))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


In [8]:
fixed.hist(column='ellipticity', bins=100, range=(0.0, 1.0), alpha=0.6)
plt.title("Ellipticity")
plt.xlabel("e")
plt.ylabel("N")
plt.show()

FigureCanvasNbAgg()

In [9]:
# make raw histogram of the zenith-corrected seeing column
fixed.hist(column='seeing', bins=100, range=(0.0, 5.0), alpha=0.6)
plt.title("March 2003 through May 2018")
plt.xlabel("Seeing (arcsec)")
plt.ylabel("N")
plt.show()

FigureCanvasNbAgg()

In [109]:
toogood = fixed[fixed['seeing'] < 0.25]

In [98]:
toogood.columns

Index(['ABS_HUM', 'ACTUALX', 'ACTUALY', 'ADCBUSYE', 'ADCBUSYW', 'ADCCPRME',
       'ADCCPRMW', 'ADCELEV', 'ADCEPRSE', 'ADCEPRSW',
       ...
       'xcentroid_1', 'xcentroid_2', 'xmax', 'xmin', 'ycentroid_1',
       'ycentroid_2', 'ymax', 'ymin', 'raw_seeing', 'seeing'],
      dtype='object', length=355)

In [102]:
hist_compare(fixed, toogood, 'sharpness')

FigureCanvasNbAgg()

In [110]:
len(fixed)

121483

In [128]:
plt.close('all')

In [11]:
# fit a log-normal distribution to the seeing data
sigma, loc, exp_mu = scipy.stats.lognorm.fit(fixed['seeing'])
print(sigma, loc, exp_mu)
x = np.arange(0.0, 4.0, 0.01)
p = scipy.stats.lognorm.pdf(x, sigma, loc=loc, scale=exp_mu)
mu = np.log(exp_mu)
mode = np.exp(mu - sigma**2) + loc
median = np.nanmedian(fixed['seeing'])
fit_median = exp_mu + loc

0.452597022072 0.128774229134 0.801117555299


In [13]:
# plot normalized histogram with the fit and the median/mode calculated from the fit.
plt.hist(fixed['seeing'], density=True, bins=100, range=(0.0, 4.0), label="hist", alpha=0.6)
logp = plt.plot(x, p, label="logp")
plt.xlabel("Seeing (arcsec)")
plt.ylabel("Number Density")
plt.title("March 2003 through May 2018")
plt.legend(["median=%.2f\", mode=%.2f\"" % (fit_median, mode), "median=%.2f\"" % (
    median
)])
plt.savefig("all_seeing.pdf")
plt.show()

FigureCanvasNbAgg()

In [118]:
# panda time indexing makes it easy to group by month
months = {}
legends = []
months['January'] = fixed['2015']
months['February'] = fixed['2016']
months['March'] = fixed['2017']
for m in months:
    plt.hist(months[m]['seeing'], bins=100, range=(0.0, 4.0), label=m, alpha=0.6)
    legend = "%s: %.2f\"" % (m, np.median(months[m]['seeing']))
    legends.append(legend)
plt.legend(legends)
plt.xlabel("Seeing (arcsec)")
plt.ylabel("N")
#plt.title("Data and median seeing values grouped by month")
plt.show()

FigureCanvasNbAgg()

In [14]:
yearly_median = []
for i in range(2003, 2019):
    year = str(i)
    med = fixed[year]['seeing'].median()
    print(f"Median seeing for {year} is {med}")
    yearly_median.append(med)

Median seeing for 2003 is 0.7568061254333635
Median seeing for 2004 is 0.9728764760515064
Median seeing for 2005 is 0.9226822041832713
Median seeing for 2006 is 0.9027455009634998
Median seeing for 2007 is 1.001953018834358
Median seeing for 2008 is 0.9263999222372663
Median seeing for 2009 is 0.8746192925405927
Median seeing for 2010 is 0.9820726135335525
Median seeing for 2011 is 0.9325165225074862
Median seeing for 2012 is 0.9089754251139952
Median seeing for 2013 is 0.9361045079194369
Median seeing for 2014 is 0.902367266143806
Median seeing for 2015 is 0.9149721663862299
Median seeing for 2016 is 0.9411731761554689
Median seeing for 2017 is 0.8656835580917264
Median seeing for 2018 is 1.0703111104464562


In [15]:
plt.scatter(range(2003, 2019), yearly_median)
plt.title("Median Seeing by Year")
plt.xlabel("Year")
plt.ylabel("Seeing (arcsec)")
plt.savefig("yearly_median.pdf")
plt.show()

FigureCanvasNbAgg()

In [16]:
# now use the between_time() method to split the data into first and second halves of the night.
first_half = fixed.between_time(start_time='00:00', end_time='07:00')
second_half = fixed.between_time(start_time='07:00', end_time='14:00')
#first_half = months['December'].between_time(start_time='00:00', end_time='07:00')
#second_half = months['December'].between_time(start_time='07:00', end_time='14:00')
med_1st = np.median(first_half['seeing'])
med_2nd = np.median(second_half['seeing'])

In [17]:
# plot the results and show that the seeing is better later in the night...
plt.hist(first_half['seeing'], bins=100, range=(0.0, 4.0), label="1st", alpha=0.6)
plt.hist(second_half['seeing'], bins=100, range=(0.0, 4.0), label="2nd", alpha=0.6)
plt.legend(["1st Half: %.2f\"" % med_1st, "2nd Half: %.2f\"" % med_2nd])
plt.xlabel("Seeing (arcsec)")
plt.ylabel("N")
#plt.title("Data and median seeing values grouped by half of the night")
plt.savefig("half_nights.pdf")
plt.show()

FigureCanvasNbAgg()

In [18]:
months = ["{:02d}".format(m) for m in range(1, 13)]
years = ["{:d}".format(y) for y in range(2003, 2019)]
months, years

(['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12'],
 ['2003',
  '2004',
  '2005',
  '2006',
  '2007',
  '2008',
  '2009',
  '2010',
  '2011',
  '2012',
  '2013',
  '2014',
  '2015',
  '2016',
  '2017',
  '2018'])

In [42]:
by_month = fixed.groupby([fixed.index.month]).median()

In [43]:
by_month['seeing']

ut
1     0.964212
2     1.016021
3     0.917360
4     1.024068
5     0.919977
6     0.946994
7     0.879885
8     0.812653
9     0.852961
10    0.893412
11    0.867227
12    0.873610
Name: seeing, dtype: float64

In [53]:
month_sizes = [len(g[1]) for g in fixed.groupby([fixed.index.month]).groups.items()]

In [21]:
mnames = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
fig, ax = plt.subplots()
x = range(12)
ax.scatter(x, by_month['seeing'])
ax.set_xticks(x)
ax.set_xticklabels(mnames, rotation=45)
ax.set_ylabel("Median Seeing (arcsec)")
plt.savefig("monthly_median.pdf")
plt.show()

FigureCanvasNbAgg()

In [57]:
fig, ax = plt.subplots()
x = range(12)
plt.bar(x, month_sizes)
ax.set_xticks(x)
ax.set_xticklabels(mnames, rotation=45)
ax.set_ylabel("Samples per Month")
plt.savefig("monthly_samples.pdf")
plt.show()

FigureCanvasNbAgg()

In [22]:
bino1_dir = Path("/Users/tim/MMT/bino/GDR1")
bino2_dir = Path("/Users/tim/MMT/bino/GDR2")
dfs = []
for c in bino1_dir.glob("*.csv.gz"):
    df = pd.read_csv(c)
    dfs.append(df)
bino1 = pd.concat(dfs)

dfs = []
for c in bino2_dir.glob("*.csv.gz"):
    df = pd.read_csv(c)
    dfs.append(df)
bino2 = pd.concat(dfs)

In [23]:
bino1['airmass'] = 1.0 / np.cos((90. - bino1['EL']).data*u.deg)
bino2['airmass'] = 1.0 / np.cos((90. - bino2['EL']).data*u.deg)
bino1['seeing'] = bino1['gauss_fwhm'] / bino1['airmass']**0.6
bino2['seeing'] = bino2['gauss_fwhm'] / bino2['airmass']**0.6
bino1['mseeing'] = bino1['moffat_fwhm'] / bino1['airmass']**0.6
bino2['mseeing'] = bino2['moffat_fwhm'] / bino2['airmass']**0.6

  """Entry point for launching an IPython kernel.
  


In [24]:
#fixed = fixed.set_index(pd.DatetimeIndex(fixed['obstime'], name='ut'))
bino1_t = bino1.set_index(pd.DatetimeIndex(bino1['DATE-OBS'], name='ut'))
bino2_t = bino2.set_index(pd.DatetimeIndex(bino2['DATE-OBS'], name='ut'))

In [177]:
#bino1 = bino1[bino1['flux'] > 100.]
#bino2 = bino2[bino2['flux'] > 100.]

In [25]:
plt.hist(bino1_t['seeing'], bins=100, range=(0.0, 4.0), label="Gaussian", alpha=0.6)
plt.hist(bino1_t['mseeing'], bins=100, range=(0.0, 4.0), label="Moffat", alpha=0.6)
plt.legend(["Gaussian: %.2f\"" % np.nanmedian(bino1_t['seeing']), "Moffat: %.2f\"" % np.nanmedian(bino1_t['mseeing'])])
plt.xlabel("Seeing (arcsec)")
plt.ylabel("N")
#plt.title("Data and median seeing values grouped by half of the night")
plt.savefig("gauss_v_moffat.pdf")
plt.show()

  keep = (tmp_a >= mn)
  keep &= (tmp_a <= mx)


FigureCanvasNbAgg()

In [26]:
plt.hist(bino1_t['mseeing'], bins=100, range=(0.0, 4.0), label="Camera #1", alpha=0.6)
plt.hist(bino2_t['mseeing'], bins=100, range=(0.0, 4.0), label="Camera #2", alpha=0.6)
plt.legend(["Camera #1: %.2f\"" % np.nanmedian(bino1_t['mseeing']), "Camera #2: %.2f\"" % np.nanmedian(bino2_t['mseeing'])])
#plt.legend()
plt.xlabel("Seeing (arcsec)")
plt.ylabel("N")
#plt.title("Data and median seeing values grouped by half of the night")
plt.savefig("gdr1_v_gdr2.pdf")
plt.show()

  keep = (tmp_a >= mn)
  keep &= (tmp_a <= mx)


FigureCanvasNbAgg()

In [27]:
bino_wfs = fixed[fixed['wfskey'].str.contains('binospec')]
bino2018 = bino_wfs['2018']

In [28]:
plt.hist(bino2018['seeing'], bins=100, range=(0.0, 4.0), density=True, label="WFS", alpha=0.6)
plt.hist(bino2_t['mseeing'], bins=100, range=(0.0, 4.0), density=True, label="Guider", alpha=0.6)
plt.legend(["WFS: %.2f\"" % np.nanmedian(bino2018['seeing']), "Guider: %.2f\"" % np.nanmedian(bino2_t['mseeing'])])
#plt.legend()
plt.xlabel("Seeing (arcsec)")
plt.ylabel("Number Density")
#plt.title("Data and median seeing values grouped by half of the night")
plt.savefig("wfs_v_gdr.pdf")
plt.show()

  keep = (tmp_a >= mn)
  keep &= (tmp_a <= mx)


FigureCanvasNbAgg()

In [29]:
bino2018['EXPTIME'].mean(), bino2_t['EXPTIME'].mean()

(38.892494205740775, 1.5120392333396819)

In [35]:
# make a new data frame that resamples the WFS data daily into the median, min, and max seeing for each month.
fig, ax = plt.subplots()
daily = pd.DataFrame()
daily['seeing'] = fixed.seeing.resample('M').median()
daily['max'] = fixed.seeing.resample('M').max()
daily['min'] = fixed.seeing.resample('M').min()
lowb = daily['seeing'] - daily['min']
upb = daily['max'] - daily['seeing']
#plt.errorbar(daily.index, daily['seeing'], yerr=[lowb, upb], fmt='o')
plt.scatter(daily.index, daily['seeing'])
labels = ax.get_xticklabels()
plt.setp(labels, rotation=20, fontsize=8, ha='right')
plt.ylabel("Seeing (arcsec)")
plt.title("Monthly median seeing")
plt.savefig("monthly_median.pdf")
plt.show()

FigureCanvasNbAgg()

In [76]:
hourly_medians = []
times = []
hourly_sizes = []
for h in range(1, 13):
    stime = "{:02d}:00".format(h)
    mtime = "{:02d}:30".format(h)
    etime = "{:02d}:00".format(h+1)
    tslice = fixed.between_time(start_time=stime, end_time=etime)
    med = np.median(tslice['seeing'])
    hourly_medians.append(med)
    hourly_sizes.append(len(tslice))
    times.append(mtime)

In [77]:
hourly_medians, hourly_sizes

([0.93909991575243523,
  0.96894566659421411,
  0.93584462012493286,
  0.97538610412180982,
  0.94192780707025137,
  0.90825577335575924,
  0.91099219898087158,
  0.91116620071644372,
  0.90229965986625493,
  0.89923105494991185,
  0.88155961992149767,
  0.86716906170902464],
 [7665,
  11481,
  13754,
  11453,
  12736,
  12690,
  11525,
  11129,
  10511,
  9074,
  6191,
  2572])

In [78]:
fig, ax = plt.subplots()
x = range(12)
ax.scatter(x, hourly_medians)
ax.set_xticks(x)
ax.set_xticklabels(times, rotation=45)
ax.set_ylabel("Median Seeing (arcsec)")
ax.set_title("Median Seeing vs. UT")
plt.savefig("hourly_median.pdf")
plt.show()



FigureCanvasNbAgg()

In [79]:
fig, ax = plt.subplots()
x = range(12)
plt.bar(x, hourly_sizes)
ax.set_xticks(x)
ax.set_xticklabels(times, rotation=45)
ax.set_ylabel("Samples per Hour")
plt.savefig("hourly_samples.pdf")
plt.show()



FigureCanvasNbAgg()

In [80]:
len(data), len(fixed)

(223288, 121483)