In [24]:
import warnings
warnings.filterwarnings('ignore')

In [1]:
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
from scipy.interpolate import interp1d
from scipy.signal import gaussian, convolve
from statistics import mean, median
from astropy import stats
from scipy.optimize import curve_fit, least_squares
import collections
import os

import utils
from dl import queryClient as qc

C:\Users\kylem\Anaconda3\lib\site-packages\numpy\.libs\libopenblas.JPIJNSWNNAN3CE6LLI5FWSPHUT2VXMTH.gfortran-win_amd64.dll
C:\Users\kylem\Anaconda3\lib\site-packages\numpy\.libs\libopenblas.WCDJNK7YVMPZQ2ME2ZZHJJRJ3JIKNDB7.gfortran-win_amd64.dll
  stacklevel=1)


In [2]:
if not os.path.exists('results'):
    os.makedirs('results')
if not os.path.exists('results/plots'):
    os.makedirs('results/plots')

In [3]:
def get_data(df,objname):
    order = ['u','g','r','i','z']
    best_periods = []
    crv=[]
    fltrs=[]
    for f in order:
        selfltr = (df['filter'] == f)
        selfwhm = (df['fwhm'] <= 4.0)
        sel = selfltr & selfwhm
        t = df['mjd'][sel].values
        y = df['mag_auto'][sel].values
        dy = df['magerr_auto'][sel].values
        if len(t) < 25:
            continue

        best_periods.append(get_ls_period(t,y,objname=objname))
        crvi = np.vstack((t,y,dy)).T
        crv.append(crvi[np.argsort(crvi[:,0])])
        fltrs.append(f)
    period = 0
    for p in best_periods:
        period += p/len(best_periods)
    return crv, period, fltrs

def get_tmps(fltrs):
    tmps=[]
    typs =[]
    names=[]
    for fltr in fltrs:
        typ = []
        templets = glob('templets/*{}.dat'.format(fltr))
        tmp = np.zeros((len(templets),501,2))
        for i in range(len(templets)):
            tmp[i] = np.concatenate((np.array([[0,0]]),
                                     np.array(pd.read_csv(templets[i],sep=' ')),
                                     np.array([[1,0]])))
            #adjust if filepath to templets changes
            if len(templets[i])==17:
                typ.append('RRab')
            elif len(templets[i])==15:
                typ.append('RRc')
        typs.append(typ)
        names.append(templets)
        tmps.append(tmp)
    return tmps, names, typs

def double_tmps(tmps):
    tmps2=[]
    for f in range(len(tmps)):
        tmps2.append(np.tile(tmps[f],(2,1)))
        tmps2[f][:,int(len(tmps2[f][0])/2):,0] += 1
    return tmps2

def plot_periodogram(period,power,best_period=None,objname='',ax=None):
   
    if ax is None:
        fig, ax = plt.subplots(figsize=(10,7))
        
    ax.plot(period,power,lw=0.1)
    ax.set_xlabel('period (days)')
    ax.set_ylabel('relative power')
    ax.set_title(objname)
    
    if best_period is not None:
        ax.axvline(best_period,color='r');
        ax.text(0.03,0.93,'period = {:.3f} days'.format(best_period),transform=ax.transAxes,color='r')
    fig.savefig('results/plots/{}_periodogram.png'.format(objname))
    plt.close(fig)

def get_ls_period(t,y,min_freq=1./1.,max_freq=1./0.1,objname='_'):
    """Use Lomb-Scargle periodogram to get an estimate on period"""
    
    ls = stats.LombScargle(t, y)
    frequency, power = ls.autopower(minimum_frequency=min_freq,maximum_frequency=max_freq)
    period = 1./frequency # period is the inverse of frequency
    
    best_period = period[np.argmax(power)]
    
    plot_periodogram(period,power,best_period,objname=objname)
    return best_period

def get_pinit(crv,period):
    pinit = ()
    for ltcrv in crv:
        pinit += ((0.0,max(ltcrv[:,1])-min(ltcrv[:,1]),0.0),)
    pinit += (period,)
    return pinit

def update_pinit(pars,period):
    pinit = ()
    for i in range(len(pars)):
        pinit += (tuple(pars[i,:-1]),)
    pinit += (period,)
    return pinit

def RemoveOutliers(crv,tmps,pars,period):
    n = pars[:,-1].astype(int)
    crv_in = []
    for i in range(len(crv)):
        f = interp1d(tmps[i][n[i],:,0],tmps[i][n[i],:,1]*pars[i,1]+pars[i,2])
        phase = (crv[i][:,0]/period-pars[i,0]) %1
        dif = abs(crv[i][:,1]-f(phase))
        crv_in.append(crv[i][dif<utils.mad(dif)*5])
    return crv_in

def double_period(crv,pars,period):
    crv2 = []
    for i in range(len(crv)):
        crv2.append(crv[i].copy())
        crv2[i][:,1] -= pars[i,2]
        
        crv2[i][:,0] = (crv2[i][:,0]/period-pars[i,0])%1
        crv2[i] = np.tile(crv2[i].T,2).T
        crv2[i][int(len(crv2[i])/2):,0] += 1
        crv2[i] = crv2[i][crv2[i][:,0].argsort()]
        
    return crv2


In [4]:
def get_tmps(fltrs):
    tmps=[]
    typs =[]
    names=[]

    for fltr in fltrs:
        typ = ['RRab','RRab','RRab','RRab','RRab','RRab','RRc']
        tempnames = ['a1','a2','a3','b1','b2','b3','c']
        
        tmp = np.zeros((len(tempnames),51,2))
        tmpmatrix = np.loadtxt('templets/LaydenTemplates.txt',delimiter=',')
        tmp[:,:,0] = np.tile(tmpmatrix[:,0],7).reshape(7,51)
        tmp[:,:,1] = np.swapaxes(tmpmatrix[:,1:],0,1)
        
        typs.append(typ)
        names.append(tempnames)
        tmps.append(tmp)
    return tmps, names, typs

In [5]:
class tmpfitter:
    def __init__ (self, tmps):
        self.fltr=0
        self.n=0
        self.tmps=tmps

    def model(self, t, t0, amplitude, yoffset):
        # modify the template using peak-to-peak amplitude, yoffset
        # fold input times t by period, phase shift to match template
        xtemp = self.tmps[self.fltr][self.n,:,0]
        ytemp = self.tmps[self.fltr][self.n,:,1]*amplitude + yoffset
        ph = (t - t0) %1
        #print((ph[0],period,t0%1))
        #print((period,t0,amplitude,yoffset))
        # interpolate the modified template to the phase we want
        return interp1d(xtemp,ytemp)(ph)

def tmpfit(crv,tmps,pinit,w=.1,steps=21,n=1):
    fitter = tmpfitter(tmps)
    
    lsteps = int(steps/2+.5)
    rsteps = steps - lsteps
    pl = np.linspace(pinit[-1]-w,pinit[-1],lsteps)
    pr = np.linspace(pinit[-1]+w,pinit[-1],rsteps,endpoint=False)
    plist = np.zeros(pl.size+pr.size)
    plist[0::2] = np.flip(pl)
    plist[1::2] = np.flip(pr)
    plist = plist[plist>0]
    
    pars = np.zeros((len(tmps),4))
    minsumx2 = 10**50
    minp = 0
    for p in plist:
        sumx2=0
        ppars=np.zeros((len(tmps),4))
        for f in range(len(tmps)):
            fitter.fltr = f
            phase = crv[f][:,0]/p%n #1 for one period, 2 for two periods
            minx2 = 10**50
            for i in range(len(tmps[f])):
                fitter.n = i
                try:
                    tpars, cov = curve_fit(fitter.model, phase, crv[f][:,1], 
                                          bounds = ((-.5,0,-50),(.5,10,50)),
                                          sigma=crv[f][:,2], p0=pinit[f], maxfev=500)
                except RuntimeError:
                    #print('Error: Curve_fit failed on templet={}-{}, p={:.4}'.format(f,i,p))
                    continue
                
                x2 = sum((fitter.model(phase,tpars[0],tpars[1],tpars[2])-crv[f][:,1])**2/crv[f][:,2]**2)
                if x2 < minx2:
                    ppars[f,:-1] = tpars
                    ppars[f,-1] = i
                    minx2 = x2
            
            sumx2 += minx2
            if sumx2 > minsumx2:
                break
        if sumx2 < minsumx2:
            minsumx2 = sumx2
            minp = p
            pars = ppars
    npoints=0
    for i in range(len(crv)):
        npoints += len(crv[i])
    return pars, minp, minsumx2/npoints

In [6]:
def fit_plot(objname,file):
    star=qc.query(sql="""SELECT meas.* 
                     FROM nsc_dr2.meas
                     WHERE objectid='{:s}'""".format(objname),
              fmt='pandas',
              profile='db01')
    #print(collections.Counter(star['filter']))
    crv,period,fltrs = get_data(star,objname)
    if len(fltrs) == 0:
        return
    tmps, tmpnames, typs = get_tmps(fltrs)
    
    pinit = get_pinit(crv,period)
    pars, p, x2 = tmpfit(crv,tmps,pinit,w=.1,steps=25)
    crv_in = RemoveOutliers(crv,tmps,pars,p)
    pinit = update_pinit(pars,p)
    pars_in,p_in,x2 = tmpfit(crv_in,tmps, pinit,w=.01,steps=25)
    
    crv2 = double_period(crv,pars_in,p_in)
    tmps2= double_tmps(tmps)
    n = pars[:,-1].astype(int)
    
    colors = []
    for f in fltrs:
        if f == 'r' or f == 'g':
            colors.append(f)
        else:
            colors.append('black')

    #Check if each filter is consistent with RR type (RRab or RRc)
    consistent = True
    for i in range(len(typs)):
        for j in range(i+1,len(typs)):
            if typs[i][n[i]] != typs[j][n[j]]:
                consistent = False
                break
        if not consistent:
            break
    if consistent:
        typ = typs[0][n[0]]
    else:
        typ = '???'
    fig, ax = plt.subplots(len(fltrs), figsize=(10,7.5), sharex=True, sharey=True)
    if len(fltrs) == 1:
        ax = [ax]
    for i in range(len(fltrs)):
        crvmean = mean(crv2[i][:,1])
        ax[i].scatter(crv2[i][:,0],crv2[i][:,1]-crvmean,c=colors[i])
        ax[i].plot(tmps2[i][n[i],:,0],tmps2[i][n[i],:,1]*pars_in[i,1]-crvmean,c='black')
        ax[i].invert_yaxis()
        ax[i].set_ylabel(fltrs[i], fontsize=18)

    ax[-1].set_xlabel('Phase', fontsize=16)
    ax[0].set_title("Object: {}    Period: {:.3f} d    Type: {}".format(objname,p_in,typ), fontsize=20)
    fig.savefig('results/plots/{}.png'.format(objname))
    
    file.write("{},{:.3f},{:.3f},\n".format(objname,x2,p_in))
    for i in range(len(fltrs)):
        file.write("{:.3f},{:.3f},{:.3f},{}\n".format(pars_in[i][0],pars_in[i][1]/2,pars_in[i][2],tmpnames[i][n[i]]))#[9:]))
    file.write("---\n")
    plt.close(fig)

In [71]:
from astropy.table import Table

In [12]:
gldorig = np.loadtxt('goldsample\golden_original.txt',delimiter=',',dtype=str)
gldrrab = np.loadtxt('goldsample\golden_RRab.txt',delimiter=',',dtype=str)

In [48]:
t=Table([gldrrab],names=['id'])
t['period'] = -99.99
t['type']   = '          '
t['utyp']   = '          '
t['uprob']  = -99.99
t['uflag']  = -1
t['undat']  = 0
t['uprd']   = -99.99
t['gtyp']   = '          '
t['gprob']  = -99.99
t['gflag']  = -1
t['gndat']  = 0
t['gprd']   = -99.99
t['rtyp']   = '          '
t['rprob']  = -99.99
t['rflag']  = -1
t['rndat']  = 0
t['rprd']   = -99.99
t['ityp']   = '          '
t['iprob']  = -99.99
t['iflag']  = -1
t['indat']  = 0
t['iprd']   = -99.99
t['ztyp']   = '          '
t['zprob']  = -99.99
t['zflag']  = -1
t['zndat']  = 0
t['zprd']   = -99.99

In [50]:
t[:5]

id,period,type,utyp,uprob,uflag,undat,uprd,gtyp,gprob,gflag,gndat,gprd,rtyp,rprob,rflag,rndat,rprd,ityp,iprob,iflag,indat,iprd,ztyp,zprob,zflag,zndat,zprd
str13,float64,str10,str10,float64,int32,int32,float64,str10,float64,int32,int32,float64,str10,float64,int32,int32,float64,str10,float64,int32,int32,float64,str10,float64,int32,int32,float64
100047_2267,-99.99,,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99
100555_10583,-99.99,,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99
100828_28520,-99.99,,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99
101825_934,-99.99,,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99
102849_2399,-99.99,,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99


In [22]:
def get_data(objname):
    df=qc.query(sql="""SELECT meas.* 
                     FROM nsc_dr2.meas
                     WHERE objectid='{:s}'""".format(objname),
              fmt='pandas',
              profile='db01')
    order = ['u','g','r','i','z']
    best_periods = []
    crv=[]
    fltrs=[]
    for f in order:
        selfltr = (df['filter'] == f)
        selfwhm = (df['fwhm'] <= 4.0)
        sel = selfltr & selfwhm
        t = df['mjd'][sel].values
        y = df['mag_auto'][sel].values
        dy = df['magerr_auto'][sel].values
        if len(t) < 25:
            continue

        best_periods.append(get_ls_period(t,y,objname=objname,f=f))
        crvi = np.vstack((t,y,dy)).T
        crv.append(crvi[np.argsort(crvi[:,0])])
        fltrs.append(f)
    period = 0
    for p in best_periods:
        period += p/len(best_periods)
    return crv, period, fltrs

def get_ls_period(t,y,min_freq=1./1.,max_freq=1./0.1,objname='_',f=''):
    """Use Lomb-Scargle periodogram to get an estimate on period"""
    
    ls = stats.LombScargle(t, y)
    frequency, power = ls.autopower(minimum_frequency=min_freq,maximum_frequency=max_freq)
    period = 1./frequency # period is the inverse of frequency
    
    best_period = period[np.argmax(power)]
    
#     plot_periodogram(period,power,best_period,objname=objname,f=f)
    return best_period

import upsilon

-------------------------------
-------------------------------


In [26]:
crv,p_est,fltrs = get_data(gldrrab[0])
rf_model = upsilon.load_rf_model()
i=0
efeat = upsilon.ExtractFeatures(crv[i][:,0],crv[i][:,1],crv[i][:,2])
efeat.run()
feats = efeat.get_features()
rf_model = upsilon.load_rf_model()
label, probability, flag = upsilon.predict(rf_model, feats)

In [72]:
fdict = {'u':0,'g':5,'r':10,'i':15,'z':20}
for n in range(len(gldrrab)):
    crv,p_est,fltrs = get_data(gldrrab[n])
    periods = []
    for i in range(len(fltrs)):
    
        rf_model = upsilon.load_rf_model()
        efeat = upsilon.ExtractFeatures(crv[i][:,0],crv[i][:,1],crv[i][:,2])
        efeat.run()
        feats = efeat.get_features()
        rf_model = upsilon.load_rf_model()
        label, probability, flag = upsilon.predict(rf_model, feats)
                
        x = fdict[fltrs[i]]
        t[n][t.colnames[3+x]] = label
        t[n][t.colnames[4+x]] = probability
        t[n][t.colnames[5+x]] = flag
        t[n][t.colnames[6+x]] = len(crv[i])
        t[n][t.colnames[7+x]] = feats['period']
        periods.append(feats['period'])

    t[n]['period'] = (np.mean(periods))
    
    uprob = t[n]['uprob']
    utyp  = t[n]['utyp']
    gprob = t[n]['gprob']
    gtyp  = t[n]['gtyp']
    rprob = t[n]['rprob']
    rtyp  = t[n]['rtyp']
    iprob = t[n]['iprob']
    ityp  = t[n]['ityp']
    zprob = t[n]['zprob']
    ztyp  = t[n]['ztyp']
    
    m = np.argmax(np.array([[uprob,gprob,rprob,iprob,zprob]]))
    maxtyp = [utyp,gtyp,rtyp,ityp,ztyp][m]
    
    t[n]['type'] = maxtyp

In [73]:
t

id,period,type,utyp,uprob,uflag,undat,uprd,gtyp,gprob,gflag,gndat,gprd,rtyp,rprob,rflag,rndat,rprd,ityp,iprob,iflag,indat,iprd,ztyp,zprob,zflag,zndat,zprd
str13,float64,str10,str10,float64,int32,int32,float64,str10,float64,int32,int32,float64,str10,float64,int32,int32,float64,str10,float64,int32,int32,float64,str10,float64,int32,int32,float64
100047_2267,0.6221811344492266,RRL_ab,RRL_ab,0.86,1,70,0.6221598132668498,RRL_ab,0.95,0,258,0.6221890096889295,RRL_ab,0.96,0,256,0.6221758132967702,RRL_ab,0.97,0,267,0.6221889586416363,RRL_ab,1.0,0,235,0.6221920773519475
100555_10583,27.998177478696128,EB_ED,EB_ED,0.84,1,32,0.26383448636087176,EB_ED,0.6,1,43,0.3650679692453242,EB_ED,0.65,1,46,0.3650522258993856,EB_ED,0.6,1,56,0.3714944024749053,EB_ED,0.4,1,60,138.62543830950017
100828_28520,0.6283259392486608,RRL_ab,,-99.99,-1,0,-99.99,RRL_ab,0.8,1,91,0.6283259392486608,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99
101825_934,0.6442550532084984,RRL_ab,,-99.99,-1,0,-99.99,RRL_ab,0.52,1,1130,0.6442550532084984,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99
102849_2399,0.6488362241200318,RRL_ab,,-99.99,-1,0,-99.99,RRL_ab,0.69,1,1129,0.6488362241200318,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99
102924_1422,1.7620586722694107,T2CEPH,,-99.99,-1,0,-99.99,DSCT,0.33,1,296,0.19415561818921295,,-99.99,-1,0,-99.99,T2CEPH,0.51,1,30,3.3299617263496084,,-99.99,-1,0,-99.99
105919_7718,1.2853866776261773,T2CEPH,,-99.99,-1,0,-99.99,T2CEPH,0.56,1,595,1.2853866776261773,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99
106429_89,0.622850487439848,RRL_ab,,-99.99,-1,0,-99.99,RRL_ab,0.89,1,590,0.622850487439848,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99
106432_8993,0.6359748424018525,RRL_ab,,-99.99,-1,0,-99.99,RRL_ab,0.96,1,1017,0.6359748424018525,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99
107453_1174,0.548078420090605,RRL_ab,,-99.99,-1,0,-99.99,RRL_ab,0.73,1,562,0.548078420090605,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99,,-99.99,-1,0,-99.99


In [74]:
import os
if os.path.exists("res.fits"):
    os.remove("res.fits")
t.write("res.fits",format='fits')

In [78]:
collections.Counter(t['type'])

Counter({'RRL_ab': 446,
         'EB_ED': 10,
         'T2CEPH': 33,
         'EB_EC': 17,
         'DSCT': 16,
         'CEPH_F': 18,
         'CEPH_Other': 17,
         'RRL_c': 15,
         'CEPH_1O': 5,
         'RRL_d': 2,
         'EB_ESD': 16})

In [79]:
446/595

0.7495798319327731

In [7]:
def

SyntaxError: invalid syntax (<ipython-input-7-7b18d017f89f>, line 1)

In [None]:
gldorig = np.loadtxt('goldsample\golden_original.txt',delimiter=',',dtype=str)
gldrrab = np.loadtxt('goldsample\golden_RRab.txt',delimiter=',',dtype=str)

for name in set(gldorig)-set(gldrrab):
    file = open("results/{}_parameters.csv".format(name),'a')
    fit_plot(name,file)
    file.close()

In [None]:
names = ['150536_22075','150023_1179','151047_5422','150537_4644']
file = open("results/parameters.csv",'a')
for name in names:
    fit_plot(name,file)
    print(name)

In [None]:
file = open("results/parameters.csv",'a')
fit_plot('77516_8215',file)

In [None]:
file.close()

In [None]:
reslist=qc.query(sql="""SELECT id FROM nsc_dr2.object
                        WHERE variable10sig=1 AND
                        gmag-rmag>0.1 AND gmag-rmag<0.5
                        AND ndet>100""",
              fmt='table',
              profile='db01')

In [None]:
from tqdm import tqdm
file = open("results/parameters.csv",'a')
for i in tqdm(range(20)):#len(reslist))):
    fit_plot(reslist[i][0],file)
file.close()

In [None]:
file.close()

In [None]:
#res = qc.query(sql="""SELECT * from nsc_dr2.meas
#                      JOIN nsc_dr2.object as obj
#                      ON meas.objectid=obj.id
#                      where obj.variable10sig=1 and 
#                      obj.gmag-obj.rmag>.1 and 
#                      obj.gmag-obj.rmag<0.5 and
#                      obj.ndet>100""",
#              fmt='table')

In [None]:
a="templets/103g.dat"
a

In [None]:
a[9:]