In [2]:
import parsnip
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import glob
import json
import lcdata
import os
import pickle
import astropy.io
from astropy.io import fits
from astropy.table import Table, Column, join, hstack, vstack, unique, setdiff
import h5py
from scipy import stats

This function goes through a confusion matrix table, picking out the most likely transient type and adding it to the predictions table. Just call it and give it predictions and classifications

In [3]:
#This code goes through a confusion matrix table, picking out the most likely transient type and adding it to the predictions table
def add_prob(predictions, classifications):
    if predictions['type'][0] == 'Unknown':
        predictions.add_column([1.05], index=4, name='probability') #add if statement to make this only run if col doesn't exist
        maxval = 0
        maxtype = 'hi'
        columns = ['KN', 'SLSN-I', 'SNII', 'SNIa', 'SNIa-91bg', 'SNIax', 'SNIbc', 'TDE']
        for row in range(len(classifications)):
            for c in columns:
                if classifications[row][c] >= maxval:
                    maxval = classifications[c][row]
                    maxtype = c
            predictions['type'][row] = maxtype
            predictions['probability'][row] = maxval
            maxval = 0
            maxtype = 'hi again'
    else:
        return 'this function has already been run for these predictions'

This is the function to change dataset bands to lsst or ps1 types. If the band is currently just the letter (eg z), call 'none_lsst' to convert to lsst or 'none_ps1' to convert to ps1. Give it the dataset and the 'model'.

In [4]:
def band_swap(dataset, model, path = './bandswap_dataset.h5'):   #model is either ps1 or plasticc, path is a string
    if not os.path.exists(path):
        astropy.io.misc.hdf5.write_table_hdf5(dataset, path, overwrite=True)
    dataset_clone = dataset[:]
    for i in range(len(dataset)):
        dataset_clone.light_curves[i]['band'] = dataset_clone.light_curves[i]['band'].astype(np.dtype('U6'))
        l_c = dataset.light_curves[i]
        if model == 'ps1':
            for j in range(len(l_c)):
                band = l_c['band'][j]
                if 'r' in band:
                    dataset_clone.light_curves[i]['band'][j] = 'ps1::r'
                if 'g' in band:
                    dataset_clone.light_curves[i]['band'][j] = 'ps1::g'
                if 'z' in band:
                    dataset_clone.light_curves[i]['band'][j] = 'ps1::z'
        if model == 'plasticc':
            for k in range(len(l_c)):
                band = l_c['band'][k]
                if 'r' in band:
                    dataset_clone.light_curves[i]['band'][k] = 'lsstr'
                if 'g' in band:
                    dataset_clone.light_curves[i]['band'][k] = 'lsstg'
                if 'z' in band:
                    dataset_clone.light_curves[i]['band'][k] = 'lsstz'
        if model == 'none_lsst':
            for k in range(len(l_c)):
                band = l_c['band'][k]
                if band == 'r':
                    dataset_clone.light_curves[i]['band'][k] = 'lsstr'
                if band == 'g':
                    dataset_clone.light_curves[i]['band'][k] = 'lsstg'
                if band == 'z':
                    dataset_clone.light_curves[i]['band'][k] = 'lsstz'
        if model == 'none_ps1':
            for k in range(len(l_c)):
                band = l_c['band'][k]
                if band == 'r':
                    dataset_clone.light_curves[i]['band'][k] = 'ps1::r'
                if band == 'g':
                    dataset_clone.light_curves[i]['band'][k] = 'ps1::g'
                if band == 'z':
                    dataset_clone.light_curves[i]['band'][k] = 'ps1::z'
    return dataset_clone

In [9]:
def band_swap2(dataset, model, path = './bandswap_dataset.h5'):   #model is either ps1 or plasticc, path is a string
    #if not os.path.exists(path):
        #astropy.io.misc.hdf5.write_table_hdf5(dataset, path, overwrite=True)
    dataset_clone = dataset[:]
    for i in range(len(dataset)):
        dataset_clone.light_curves[i]['band'] = dataset_clone.light_curves[i]['band'].astype(np.dtype('U6'))
        l_c = dataset.light_curves[i]
        if model == 'ps1':
            for j in range(len(l_c)):
                band = l_c['band'][j]
                if 'r' in band:
                    dataset_clone.light_curves[i]['band'][j] = 'ps1::r'
                if 'g' in band:
                    dataset_clone.light_curves[i]['band'][j] = 'ps1::g'
                if 'z' in band:
                    dataset_clone.light_curves[i]['band'][j] = 'ps1::z'
        if model == 'plasticc':
            for k in range(len(l_c)):
                band = l_c['band'][k]
                if 'r' in band:
                    dataset_clone.light_curves[i]['band'][k] = 'lsstr'
                if 'g' in band:
                    dataset_clone.light_curves[i]['band'][k] = 'lsstg'
                if 'z' in band:
                    dataset_clone.light_curves[i]['band'][k] = 'lsstz'
        if model == 'none_lsst':
            for k in range(len(l_c)):
                band = l_c['band'][k]
                if band == 'r':
                    dataset_clone.light_curves[i]['band'][k] = 'lsstr'
                if band == 'g':
                    dataset_clone.light_curves[i]['band'][k] = 'lsstg'
                if band == 'z':
                    dataset_clone.light_curves[i]['band'][k] = 'lsstz'
        if model == 'none_ps1':
            for k in range(len(l_c)):
                band = l_c['band'][k]
                if band == 'r':
                    dataset_clone.light_curves[i]['band'][k] = 'ps1::r'
                if band == 'g':
                    dataset_clone.light_curves[i]['band'][k] = 'ps1::g'
                if band == 'z':
                    dataset_clone.light_curves[i]['band'][k] = 'ps1::z'
    return dataset_clone

Here is the stats table creator. Give it a predictions table with included type and probability (in other words, run the add_prob function), as well as the probability threshold (e.g. inputting 0.85 will result in a stats table looking at transients with probability greater than 0.85

In [5]:
def create_stats_table(predictions, prob_threshold):
    type_list = ['KN', 'SLSN-I', 'SNII', 'SNIa', 'SNIa-91', 'SNIax', 'SNIbc', 'TDE']
    dtypes = ('str','int64','int64','float64','float64','float64','float64','int64','int64','int64')
    colnames = ('Type', 'Total', 'Objects with prob > ' + str(prob_threshold), 'Highest Prob', 'Lowest Prob', 'Mean Prob', 'Median Prob', 'Max Data pts', 'Min Data pts', 'Ave Data pts')
    rows = []
    for i in range(len(type_list)):
        typ = type_list[i]
        current_table = predictions[predictions['type'] == typ]
        if len(current_table) > 0:   
            tot = len(current_table)
            prob = len(current_table[current_table['probability'] >= prob_threshold])
            highest = np.round(max(current_table['probability']), 3)
            lowest = np.round(min(current_table['probability']), 3)
            mean = np.round(sum(current_table['probability'])/len(current_table), 3)
            median = np.round(statistics.median(current_table['probability']), 3)
            maxpts = max(current_table['count'])
            minpts = min(current_table['count'])
            avgpts = np.round(sum(current_table['count'])/len(current_table))
            row = [typ, tot, prob, highest, lowest, mean, median, maxpts, minpts, avgpts]
            rows.append(row)
    stats_table = Table(names = colnames, dtype = dtypes)
    for row in rows:
        stats_table.add_row(row)
    return stats_table

In [34]:
1268 - 42, 894/2

(1226, 447.0)

In [None]:
dataset3 = lcdata.read_hdf5('./DCDE2_535_transients_USING_MAG_APERc_photoz_if_NoZSPEC.h5')

In [29]:
dataset2 = lcdata.read_hdf5('./DCDE3_1086_transients_USING_MAG_APERc_photoz_if_NoZSPEC.h5')

In [28]:
dataset4 = lcdata.read_hdf5('./DCDE4_1027_transients_USING_MAG_APERc_photoz_if_NoZSPEC.h5')

In [40]:
dataset5 = lcdata.read_hdf5('./DCDE5_446_transients_USING_MAG_APERc_photoz_if_NoZSPEC.h5')

In [41]:
dataset5.light_curves[0]

time,flux,fluxerr,band
float64,float32,float32,bytes1
60154.39107831684,2.8722458,0.086541906,g
60154.39223208428,2.2464502,0.0856049,r
60154.39360457863,3.058464,0.17637336,z
60157.38934947671,2.9424887,0.10745901,g
60157.39048598198,2.1224566,0.09690996,r
60157.39186177869,3.264048,0.16409469,z
60159.29809133346,2.14698,0.20454116,r
...,...,...,...
60211.29245272114,3.3374898,0.15335995,z
60214.23198786449,2.7859821,0.4225915,g


In [42]:
test5 = band_swap2(dataset5,'none_lsst')

In [43]:
test5.light_curves[0]

time,flux,fluxerr,band
float64,float32,float32,str6
60154.39107831684,2.8722458,0.086541906,lsstg
60154.39223208428,2.2464502,0.0856049,lsstr
60154.39360457863,3.058464,0.17637336,lsstz
60157.38934947671,2.9424887,0.10745901,lsstg
60157.39048598198,2.1224566,0.09690996,lsstr
60157.39186177869,3.264048,0.16409469,lsstz
60159.29809133346,2.14698,0.20454116,lsstr
...,...,...,...
60211.29245272114,3.3374898,0.15335995,lsstz
60214.23198786449,2.7859821,0.4225915,lsstg


In [27]:
test4.write_hdf5('./DCDE4_1027_transients_USING_MAG_APERc_photoz_if_NoZSPEC_lsst.h5')

In [44]:
test5.write_hdf5('./DCDE5_446_transients_USING_MAG_APERc_photoz_if_NoZSPEC_lsst.h5')

In [22]:
dataset3 = lcdata.read_hdf5('./DCDE4_1027_transients_photoz_if_NoZSPEC_ps1.h5')

In [23]:
dataset3.light_curves

array([<Table length=11>
              time           flux      fluxerr     band
            float64        float32     float32    bytes6
       ------------------ ---------- ------------ ------
       59977.094642265656 0.07368149    5.0517325 ps1::g
        59977.09554925231 0.08198115    5.9342055 ps1::r
       59977.096704828575 0.23100257     5.333844 ps1::z
       59983.100461171714  192.22896 0.0070629823 ps1::g
        59983.10137284168 0.08198115    5.9342055 ps1::r
        59983.10252000991 0.23100257     5.333844 ps1::z
       59986.066317629295 0.07368149    5.0517325 ps1::g
        59986.06725174275 0.08198115    5.9342055 ps1::r
        59986.06841339386   384.9952  0.005770153 ps1::z
        60139.42183215845 0.07368149    5.0517325 ps1::g
        60139.42389003495 0.23100264    5.3338437 ps1::z,
       <Table length=12>
              time           flux      fluxerr     band
            float64        float32     float32    bytes6
       ------------------ ---------- --