In [1]:
from datetime import datetime

import time
import os
import sys
from pathlib import Path

import numpy as np
from scipy.stats import lognorm
import pandas as pd

from astropy import stats
import astropy.units as u

import matplotlib
matplotlib.use('nbagg')
from matplotlib import style
style.use('ggplot')
import matplotlib.pyplot as plt

%load_ext autoreload
%autoreload 2

In [3]:
rootdir = Path("/Volumes/LaCie 8TB/wfsdat")
# use find to make this csv list
with open(rootdir / "reanalyze_csvs_2019_q2.txt") as f:
    csvs = f.readlines()

# loop through the csv files and read them with pandas
frames = []
for csv in csvs:
    frames.append(pd.read_csv(rootdir / csv.rstrip()))

# now concat each frame into one big frame containing all of the data
data = pd.concat(frames)

In [4]:
g = data.groupby('wfs')
g.wfs.describe()

Unnamed: 0_level_0,count,unique,top,freq
wfs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
binospec,1448,1,binospec,1448
mmirs,1546,1,mmirs,1546
newf9,86,1,newf9,86


In [5]:
# re-calculate the seeing using updated spot FWHM for binospec. filter out any NaN values...
#fixed_list = []
#for wfskey, group in g:
#    wfs = wfs_systems[wfskey]
#    group = g.get_group(wfskey)
#    #group.apply(seeing, axis=1, args=(wfs,))
#    group['fixed_raw_seeing'], group['fixed_seeing'] = seeing(group, wfs)
#    fixed_list.append(group)
#fixed = pd.concat(fixed_list)
#fixed.rename(index=str, columns={"seeing": "orig_seeing", "raw seeing": "orig_raw_seeing"}, inplace=True)
#fixed.rename(index=str, columns={"fixed_seeing": "seeing", "fixed_raw_seeing": "raw seeing"}, inplace=True)
fixed = data[np.isfinite(data['seeing'])]
fixed = fixed[fixed['fwhm'] > 0.]
fixed = fixed[fixed['seeing'] > 0.]
fixed = fixed[fixed['seeing'] < 4.]
fixed = fixed[fixed['time'] > "2019-01"]

In [5]:
fixed.sort_values(by='time')

Unnamed: 0,time,wfs,file,exptime,airmass,xcen,ycen,seeing,raw_seeing,fwhm,wavefront_rms,residual_rms
0,2019-01-04T02:08:19.180000,newf9,f9wfs_20190103-190824.fits,10.0,1.5020,426.517553,435.550441,2.139837,2.731383,21.333279,7045.039543,4972.654954
1,2019-01-04T02:09:05.739000,newf9,f9wfs_20190103-190910.fits,10.0,1.5005,362.220343,444.515468,0.783837,0.999925,9.507565,8173.857441,1441.667878
2,2019-01-04T02:10:19.401000,newf9,f9wfs_20190103-191043.fits,30.0,1.5100,343.525468,454.939503,0.835492,1.069864,10.021074,4265.012526,1360.526855
3,2019-01-04T02:11:55.468000,newf9,f9wfs_20190103-191220.fits,30.0,1.5200,349.018938,451.828240,0.707429,0.909472,8.837626,5215.090135,1236.739441
4,2019-01-04T05:16:34.295000,newf9,f9wfs_20190103-221639.fits,10.0,1.4866,305.630941,545.175582,0.645472,0.818830,8.159638,8417.877004,2097.552270
5,2019-01-04T05:16:56.999000,newf9,f9wfs_20190103-221701.fits,10.0,1.4846,294.519151,550.836156,0.662956,0.840332,8.321069,8231.490693,1678.906758
6,2019-01-04T05:17:24.586000,newf9,f9wfs_20190103-221724.fits,5.0,1.4825,342.295053,551.647963,0.707715,0.896304,8.739546,7677.886769,2585.529470
7,2019-01-04T05:17:58.186000,newf9,f9wfs_20190103-221758.fits,5.0,1.4795,335.464908,460.697099,0.737730,0.933182,9.013877,6975.013298,1955.782863
8,2019-01-04T05:18:39.531000,newf9,f9wfs_20190103-221844.fits,10.0,1.4754,297.272015,462.272304,0.608087,0.767913,7.775878,7356.276536,1347.736857
9,2019-01-04T05:19:59.138000,newf9,f9wfs_20190103-222024.fits,30.0,1.4667,274.211160,473.457250,1.062021,1.336404,11.944018,4933.247568,1488.704949


In [6]:
# create a date-time index so we can group and analyze the data by timestamps
fixed = fixed.set_index(pd.DatetimeIndex(fixed['time'], name='ut'))

In [7]:
# make raw histogram of the zenith-corrected seeing column
fixed.hist(column='seeing', bins=100, range=(0.0, 4.0), alpha=0.6)
plt.title("2019-01-01 through 2019-03-31")
plt.xlabel("Seeing (arcsec)")
plt.ylabel("N")
plt.show()

<IPython.core.display.Javascript object>

In [11]:
# fit a log-normal distribution to the seeing data
sigma, loc, exp_mu = lognorm.fit(fixed['seeing'])
print(sigma, loc, exp_mu)
x = np.arange(0.0, 4.0, 0.01)
p = lognorm.pdf(x, sigma, loc=loc, scale=exp_mu)
mu = np.log(exp_mu)
mode = np.exp(mu - sigma**2) + loc
median = np.nanmedian(fixed['seeing'])
fit_median = exp_mu + loc

0.3115855447455471 0.1678496391897341 0.8368189296940207


In [13]:
# plot normalized histogram with the fit and the median/mode calculated from the fit.
plt.hist(fixed['seeing'], density=True, bins=100, range=(0.0, 4.0), label="hist", alpha=0.6)
logp = plt.plot(x, p, label="logp")
plt.xlabel("Seeing (arcsec)")
plt.ylabel("Number Density")
plt.title("2019-01-01 through 2019-03-31")
plt.legend(["median=%.2f\", mode=%.2f\"" % (fit_median, mode), "median=%.2f\"" % (
    median
)])
plt.savefig("2019q1_hist.png")
plt.show()

<IPython.core.display.Javascript object>

In [14]:
# panda time indexing makes it easy to group by month
months = {}
legends = []
months['January'] = fixed['2019-01']
months['February'] = fixed['2019-02']
months['March'] = fixed['2019-03']
for m in months:
    plt.hist(months[m]['seeing'], bins=100, range=(0.0, 4.0), label=m, alpha=0.6)
    legend = "%s: %.2f\"" % (m, np.median(months[m]['seeing']))
    legends.append(legend)
plt.legend(legends)
plt.xlabel("Seeing (arcsec)")
plt.ylabel("N")
#plt.title("Data and median seeing values grouped by month")
plt.savefig("2019q1_monthly.png")
plt.show()

<IPython.core.display.Javascript object>

In [15]:
len(months['January']), len(months['February']), len(months['March'])

(1565, 2641, 2684)

In [16]:
# now use the between_time() method to split the data into first and second halves of the night.
first_half = fixed.between_time(start_time='00:00', end_time='07:00')
second_half = fixed.between_time(start_time='07:00', end_time='14:00')
#first_half = months['December'].between_time(start_time='00:00', end_time='07:00')
#second_half = months['December'].between_time(start_time='07:00', end_time='14:00')
med_1st = np.median(first_half['seeing'])
med_2nd = np.median(second_half['seeing'])

In [17]:
# plot the results and show that the seeing is better later in the night...
plt.hist(first_half['seeing'], bins=100, range=(0.0, 4.0), label="1st", alpha=0.6)
plt.hist(second_half['seeing'], bins=100, range=(0.0, 4.0), label="2nd", alpha=0.6)
plt.legend(["1st Half: %.2f\"" % med_1st, "2nd Half: %.2f\"" % med_2nd])
plt.xlabel("Seeing (arcsec)")
plt.ylabel("N")
#plt.title("Data and median seeing values grouped by half of the night")
plt.savefig("2019q1_1st2nd.png")
plt.show()

<IPython.core.display.Javascript object>

In [8]:
import matplotlib.dates as mdates

# make a new data frame that resamples the WFS data daily into the median, min, and max seeing for each day.
#fixed = fixed["2018-06-30":"2018-10-01"]
fig, ax = plt.subplots()
daily = pd.DataFrame()
daily['seeing'] = fixed.seeing.resample('D').median()
daily['max'] = fixed.seeing.resample('D').max()
daily['min'] = fixed.seeing.resample('D').min()
lowb = daily['seeing'] - daily['min']
upb = daily['max'] - daily['seeing']
plt.errorbar(daily.index, daily['seeing'], yerr=[lowb, upb], fmt='o')
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter("%m-%d-%Y"))
fig.autofmt_xdate()
plt.ylabel("Seeing (arcsec)")
#plt.title("Nightly median seeing")
plt.savefig("2019q2_nightly.pdf")
plt.show()

<IPython.core.display.Javascript object>

In [9]:
date_list = list(daily.index.strftime('%Y-%m-%d'))
data = [np.array(fixed[date]['seeing']) for date in date_list]
clean_data = {}
for d, arr in zip(date_list, data):
    if len(arr) > 0:
        clean_data[d] = arr
date_labels = [datetime.strptime(d, "%Y-%m-%d").date() for d in clean_data.keys()]

In [29]:
fig, ax = plt.subplots(figsize=(10,5))
plt.violinplot(clean_data.values(), mdates.date2num(date_labels), points=50, widths=1.5,
               showextrema=True, showmedians=True, bw_method='silverman')

ax.xaxis.set_major_locator(mdates.DayLocator())
ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter("%m-%d-%Y"))
fig.autofmt_xdate()
plt.ylabel("Seeing (arcsec)")
plt.savefig("2019q2_violin.pdf")
plt.show()

<IPython.core.display.Javascript object>

In [30]:
plt.scatter(fixed['2019-06-16'].index, fixed['2019-06-16']['seeing'])
plt.show()

<IPython.core.display.Javascript object>

In [15]:
fixed.columns

Index(['time', 'wfs', 'file', 'exptime', 'airmass', 'az', 'el', 'osst', 'outt',
       'chamt', 'tiltx', 'tilty', 'transx', 'transy', 'focus', 'focerr',
       'cc_x_err', 'cc_y_err', 'xcen', 'ycen', 'seeing', 'raw_seeing', 'fwhm',
       'wavefront_rms', 'residual_rms'],
      dtype='object')

In [24]:
fixed.to_csv("june2019_seeing.csv")

In [34]:
fixed['2019-06-16']['az']

ut
2019-06-16 02:55:31    309.385732
2019-06-16 02:56:03    309.321670
2019-06-16 02:56:36    309.259700
2019-06-16 03:00:09    308.863485
2019-06-16 03:01:11    308.752666
2019-06-16 03:02:13    308.644359
2019-06-16 03:30:47    280.241633
2019-06-16 03:32:14    280.371141
2019-06-16 03:33:25    280.479180
2019-06-16 03:34:36    280.582316
2019-06-16 03:35:47    280.690914
2019-06-16 03:37:01    280.798706
2019-06-16 03:38:13    280.910297
2019-06-16 03:39:24    281.011749
2019-06-16 03:41:05    281.166340
2019-06-16 03:42:07    281.259231
2019-06-16 03:43:41    281.395487
2019-06-16 03:44:43    281.488592
2019-06-16 03:45:50    281.590741
2019-06-16 03:46:53    281.684318
2019-06-16 03:48:32    281.828739
2019-06-16 03:49:35    281.921597
2019-06-16 03:50:37    282.013843
2019-06-16 03:51:40    282.108761
2019-06-16 03:52:42    282.200697
2019-06-16 03:53:45    282.294177
2019-06-16 03:54:47    282.386306
2019-06-16 03:55:50    282.479647
2019-06-16 03:56:52    282.572902
2019-06-16 