In [1]:
from datetime import datetime
from multiprocessing import Pool

import time
import os
import sys
from pathlib import Path

import numpy as np
import scipy
import pandas as pd
import matplotlib
matplotlib.use('nbagg')
from matplotlib import style
style.use('ggplot')
import matplotlib.pyplot as plt

%load_ext autoreload
%autoreload 2

import astropy.units as u
from astropy.io import fits
from mmtwfs.wfs import WFSFactory

In [2]:
# instantiate all of the WFS systems...
wfs_keys = ['newf9', 'f9', 'f5', 'mmirs']
wfs_systems = {}
wfs_names = {}
for w in wfs_keys:
    wfs_systems[w] = WFSFactory(wfs=w)
    wfs_names[w] = wfs_systems[w].name
plt.close('all')

In [3]:
def check_wfs(p):
    """
    Check to see which system was used on a given night
    """
    if Path.exists(p / "F9"):  # wow, i really like pathlib.Path...
        return "f9"
    elif Path.exists(p / "F5"):
        return "f5"
    elif Path.exists(p / "MMIRS"):
        return 'mmirs'
    else:
        return None

def process_image(f):
    """
    Process FITS file, f, to get info we want from the header and then analyse it with the 
    appropriate WFS instance. Return results in a comma-separated line that will be collected 
    and saved in a CSV file.
    """
    if "ave" not in f.name:
        with fits.open(f) as h:
            hdr = h[0].header
            if 'AIRMASS' in hdr:
                airmass = hdr['AIRMASS']
            else:
                airmass = np.nan
            # we need to fix the headers in all cases to have a proper DATE-OBS entry with
            # properly formatted FITS timestamp.  in the meantime, this hack gets us what we need 
            # for analysis in pandas.
            if 'DATEOBS' not in hdr:
                return None
            dateobs = hdr['DATEOBS']
            ut = hdr['ut']
            timestring = dateobs + " " + ut + " UTC"
            dtime = datetime.strptime(timestring , "%a %b %d %Y %H:%M:%S %Z")
            obstime = dtime.isoformat()
            # being conservative here and only using data that has proper slope determination
            # and wavefront solution. also want to get statistics on the quality of the wavefront fits.
            results = wfs_systems[wfskey].measure_slopes(f, plot=False)
            if results['slopes'] is not None:
                zresults = wfs_systems[wfskey].fit_wavefront(results, plot=False)
                line = "%s,%s,%s,%f,%f,%f,%f,%f\n" % (
                    obstime,
                    wfskey,
                    f.name, 
                    airmass,
                    results['seeing'].value,
                    results['raw_seeing'].value,
                    results['fwhm'],
                    zresults['residual_rms'].value
                )
                zfile = f.parent / (f.stem + ".zernike")
                zresults['zernike'].save(filename=zfile)
                return line
            else:
                return None
    return None

In [None]:
#rootdir = Path("/Users/tim/MMT/wfsdat/test")
rootdir = Path("/Volumes/LaCie/wfsdat")
dirs = rootdir.glob("*")  # pathlib, where have you been all my life!
csv_header = "time,wfs,file,airmass,seeing,raw seeing,fwhm,wavefront rms\n"
for d in dirs:
    if d.is_dir():
        if Path.exists(d / "seeing_results.csv"):
            print("Already processed %s..." % d.name)
        else:
            try:
                lines = []
                lines.append(csv_header)
                night = int(d.name)  # valid WFS directories are ints of the form YYYYMMDD. if not this form, int barfs
                msg = "checking %d... " % night
                wfskey = check_wfs(d)
                if wfskey is not None:
                    if wfskey == "mmirs":
                        rawd = d / "rawdata"
                        fitsfiles = rawd.glob("*.fits")
                    else:
                        fitsfiles = d.glob("*.fits")
                    if wfskey == "f9" and night > 20170510:
                        wfskey = "newf9"
                    msg += " using %s." % wfskey
                    print(msg)
                    with Pool(processes=8) as pool:  # my mac's i7 has 4 cores + hyperthreading so 8 virtual cores. 
                        plines = pool.map(process_image, fitsfiles)  # plines comes out in same order as fitslines!
                    plines = list(filter(None.__ne__, plines))  # trim out any None entries
                    lines.extend(plines)
                    with open(d / "seeing_results.csv", "w") as f:
                        f.writelines(lines)
                else:
                    msg = "No valid wfskey for %s..." % d
                    print(msg)
            except ValueError:  # this means running int(d.name) failed so it's not a valid directory...
                print("Skipping %s..." % d.name)

In [3]:
# use find to make this csv list
with open("csvs.txt") as f:
    csvs = f.readlines()

# loop through the csv files and read them with pandas
frames = []
for csv in csvs:
    frames.append(pd.read_csv(csv.rstrip()))

# now concat each frame into one big frame containing all of the data
data = pd.concat(frames)

In [11]:
# load pre-made and filtered data
data = pd.read_csv("seeing_2017Q2.csv")
data = data.drop('ut', axis=1)  # remove this before we make a real time index

In [12]:
# create a date-time index so we can group and analyze the data by timestamps
timedata = data.set_index(pd.DatetimeIndex(data['time'], name='ut'))
trimdata = timedata[timedata['seeing'] > 0.1]  # had some reference images sneak into analysis. ignore them...

In [43]:
# make raw histogram of the zenith-corrected seeing column
trimdata.hist(column='seeing', bins=100, range=(0.0, 2.5), alpha=0.6)
plt.title("2017-04-01 through 2017-06-30")
plt.xlabel("Seeing (arcsec)")
plt.ylabel("N")
plt.show()

<IPython.core.display.Javascript object>

In [23]:
# fit a log-normal distribution to the seeing data
sigma, loc, exp_mu = scipy.stats.lognorm.fit(trimdata['seeing'])
x = np.arange(0.0, 2.5, 0.01)
p = scipy.stats.lognorm.pdf(x, sigma, loc=loc, scale=exp_mu)
mu = np.log(exp_mu)
mode = np.exp(mu - sigma**2) + loc
median = np.median(trimdata['seeing'])
fit_median = exp_mu + loc

In [44]:
# plot normalized histogram with the fit and the median/mode calculated from the fit.
plt.hist(trimdata['seeing'], normed=True, bins=100, range=(0.0, 2.5), label="hist", alpha=0.6)
logp = plt.plot(x, p, label="logp")
plt.xlabel("Seeing (arcsec)")
plt.ylabel("Normalized N")
plt.legend(["median=%.2f\", mode=%.2f\"" % (fit_median, mode), "median=%.2f\"" % (
    median
)])
plt.show()

<IPython.core.display.Javascript object>

In [42]:
# panda time indexing makes it easy to group by month
months = {}
legends = []
months['April'] = trimdata['2017-04']
months['May'] = trimdata['2017-05']
months['June'] = trimdata['2017-06']
for m in months:
    plt.hist(months[m]['seeing'], bins=100, range=(0.0, 2.5), label=m, alpha=0.6)
    legend = "%s: %.2f\"" % (m, np.median(months[m]['seeing']))
    legends.append(legend)
plt.legend(legends)
plt.xlabel("Seeing (arcsec)")
plt.ylabel("N")
plt.title("Data and median seeing values grouped by month")
plt.show()

<IPython.core.display.Javascript object>

In [49]:
# took a lot of MMIRS data in april...
months['April'][months['April']['wfs'] == 'mmirs']

Unnamed: 0_level_0,time,wfs,file,airmass,seeing,raw seeing,fwhm,wavefront rms
ut,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2017-04-05 02:59:07,2017-04-05T02:59:07,mmirs,mmirs_wfs_0001.fits,1.11,1.183844,1.260341,6.802361,356.986898
2017-04-05 02:59:38,2017-04-05T02:59:38,mmirs,mmirs_wfs_0002.fits,1.11,1.175116,1.251050,6.764644,395.365235
2017-04-05 03:00:10,2017-04-05T03:00:10,mmirs,mmirs_wfs_0003.fits,1.11,1.255137,1.336242,7.109579,290.441805
2017-04-05 03:00:43,2017-04-05T03:00:43,mmirs,mmirs_wfs_0004.fits,1.11,1.311149,1.395872,7.349820,268.493373
2017-04-05 03:10:26,2017-04-05T03:10:26,mmirs,mmirs_wfs_0005.fits,1.12,1.111469,1.189674,6.514906,174.516084
2017-04-05 03:10:59,2017-04-05T03:10:59,mmirs,mmirs_wfs_0006.fits,1.12,1.555976,1.665458,8.423618,181.448496
2017-04-05 03:11:31,2017-04-05T03:11:31,mmirs,mmirs_wfs_0007.fits,1.12,1.282974,1.373247,7.258782,184.120385
2017-04-05 03:12:03,2017-04-05T03:12:03,mmirs,mmirs_wfs_0008.fits,1.12,1.505212,1.611123,8.208797,176.189644
2017-04-05 03:28:35,2017-04-05T03:28:35,mmirs,mmirs_wfs_0010.fits,1.14,1.089706,1.178833,6.470686,202.403059
2017-04-05 03:29:07,2017-04-05T03:29:07,mmirs,mmirs_wfs_0011.fits,1.14,1.654728,1.790068,8.913291,578.622410


In [50]:
# almost half the total amount of data was from MMIRS in april...
len(trimdata)

13329

In [57]:
# now use the between_time() method to split the data into first and second halves of the night.
first_half = trimdata.between_time(start_time='00:00', end_time='07:00')
second_half = trimdata.between_time(start_time='07:00', end_time='14:00')
med_1st = np.median(first_half['seeing'])
med_2nd = np.median(second_half['seeing'])

In [60]:
# plot the results and show that the seeing is better later in the night...
plt.hist(first_half['seeing'], bins=100, range=(0.0, 2.5), label="1st", alpha=0.6)
plt.hist(second_half['seeing'], bins=100, range=(0.0, 2.5), label="2nd", alpha=0.6)
plt.legend(["1st Half: %.2f\"" % med_1st, "2nd Half: %.2f\"" % med_2nd])
plt.xlabel("Seeing (arcsec)")
plt.ylabel("N")
plt.title("Data and median seeing values grouped by half of the night")
plt.show()

<IPython.core.display.Javascript object>