In [1]:
# This program finds bad nights in a year

# The algorithm goes like this
# Create a list for stars to process, call is `stars_to_process`
# Add stars 1-1000 to stars_to_process
# Throw out variable stars from `stars_to_process`
# Throw out stars that have <80% attendance for the year from `stars_to_process`

# Create a dictionary, call it `season_mean_signal` to hold mean signals of stars for the entire data season
#   Note that the mean signal for a season for a stars is calculated from set of the internight 
#   normalized signal on each night of the season for the star.
# For each night, create a dictionary to hold data for the night, call it `nights_data_dict`
#   Create a entry in the dictionary with the night_date as key and the value as the list containing:
#   The ratio of signal_of_star_on_night[night] / season_mean_signal[star_i] for each star
# For each night, create a dict to hold std of values in `nights_data_dict`, call this dict `nights_signal_std_dict`
# (note that at this point, we have a standard deviation value for each night)
# We mark nights that have this value above a threshold (example: 0.035 for old camera, 0.03 for new camera) as bad nights for the year

In [1]:
import numpy as np
from typing import Iterable

from trout.stars import get_star, Star
from trout.stars.utils import STAR_START, STAR_END
from trout.nights.year_nights import get_nights_in_a_year

In [2]:
date_format = "%Y-%m-%d"

In [3]:
# We exclude bad stars and LPVs
stars_to_exclude = [
    1,16,41,69,82,100,115,138,166,168,195,245,255,281,285,294,317,332,
    338,356,357,366,377,410,414,441,465,466,504,533,539,592,597,600,
    628,635,664,672,685,697,703,722,736,753,755,777, 788,790, 814,824,
    842,850,852,870,877,879,888,892,904,908,912,929,950,958,981,
                   
    1007,1048,1052,1054,1065,1103,1113,1131,1143,1144,1191,1195,1197,
    1219,1223,1276,1369, 1426,1475,1495,1529,1539,1654,1687,1693,1702,
    1716,1843,1856,1873,1887,2237,2251, 2252,2502,2509,2510
                   ]
stars_to_include = list(range(1, 1000))
stars_to_include = list(filter(lambda x : x not in stars_to_exclude, stars_to_include))

In [4]:
def calculate_nights_badness_value(
    year : int, 
    stars_to_use : Iterable[int], 
    attendance_threshold : float,
    is_primary: bool = True
):
    """
    Returns a dictionary of the nights with their badness value for a year by looking 
    at the data from stars in `stars_to_use`
    
    param: year: Year to analyze
    param: stars_to_user: 
        The list of stars to use for calculation of bad nights
        Note that you should not provide the variable stars in this list
    param: attendance_threshold:
        Stars with less this stated threshold will not be considered when doing
        calcualtion
    param: Whether to calculate badness value for the primary or the secondary dataset
    """
    all_nights = get_nights_in_a_year(year, is_primary)
    
    season_mean_signal = {}
    nightly_ratio = {}
    
    # Filter out star numbers outside our stars numbers range
    stars_to_user = filter(lambda x: STAR_START <= x <= STAR_END, stars_to_use)
    
    for star in stars_to_use:
        star_data = get_star(star, is_primary)
        
        # Skip the star if it doesn't pass attendance threshold
        if star_data.attendance(year) < attendance_threshold:
            continue
            
        # Select only the data for the year
        star_data.select_year(year,
            exclude_bad_nights=False, # Important to include all nights
            exclude_zeros=True
        )
        
        # Save mean for the star for the year
        season_mean_signal[star] = star_data.mean() 
        
        for _, flux, date in star_data.selected_data: # Note this is different from star_selected_data
            night_name = date.strftime(date_format)
            ratio_for_star_for_night = flux / season_mean_signal[star]
            
            # Add this ratio to the nightly_ratio
            nightly_ratio.setdefault(night_name, [ratio_for_star_for_night]).append(ratio_for_star_for_night)
    
    nightly_ratios_std = {}
    for night, ratios in nightly_ratio.items():
        ratios = np.array(ratios)
        nightly_ratios_std[night] = np.std(ratios)
        
    return nightly_ratios_std

In [28]:
def find_bad_nights(year: int, 
                    stars_to_use: Iterable[int], 
                    attendance_threshold: float, 
                    badness_threshold : float,
                    is_primary: bool = True
                   ) -> Iterable[str]:
    """
    Returns the list of bad night in a given year
    param: year: Year
    param: stars_to_user: Stars to use
    param: attendance_threshold: 
        Stars with attendance less than this value (betn 0 and 1) won't be used for analysis
    param: badness_threshold:
        Nights with badness value more than the threshold will be returned
    param: is_primary:
        Whether to use the primary dataset or the secondary
    return: List of bad nights
    """
    bad_nights_list = []
    badness_data = calculate_nights_badness_value(year, stars_to_use, attendance_threshold, is_primary)
    for night, badness in badness_data.items():
        if badness > badness_threshold:
            bad_nights_list.append(night)
    return bad_nights_list

In [29]:
def get_ltpr_threshold(year:int):
    """
    Returns the LTPR aka. badness threshold to use
    for the given year
    """
    if 2003 <= year <= 2006:
        return 0.035
    elif 2007 <= year <= 2008:
        return 0.045
    else:
        return 0.035

# All code is above. What's below is just the usage of the functions above.

In [30]:
calculate_nights_badness_value(2020, stars_to_include, .80, is_primary=False)

{'2020-04-07': 0.05877222378536171,
 '2020-02-28': 0.02373561998974805,
 '2020-03-03': 0.01857613343987197,
 '2020-03-05': 0.017727650250659664,
 '2020-03-06': 0.027780464719466106,
 '2020-03-09': 0.02110252045537795,
 '2020-03-12': 0.022550450880855076,
 '2020-03-29': 0.017139890629109363,
 '2020-03-30': 0.014636119865923319,
 '2020-04-08': 0.021942716604006915,
 '2020-04-09': 0.018693460436127823,
 '2020-04-14': 0.01618540037866043,
 '2020-04-15': 0.01807115004295244,
 '2020-04-17': 0.014376801125693475,
 '2020-04-19': 0.014762658775291542,
 '2020-04-20': 0.013663760648415154,
 '2020-04-29': 0.014651058234539443,
 '2020-05-02': 0.019730873528431437,
 '2020-05-03': 0.03388673584913696,
 '2020-05-06': 0.019910436470127663,
 '2020-05-11': 0.028924921862437225,
 '2020-05-28': 0.0146853146697403,
 '2020-05-29': 0.01707298956925007,
 '2020-05-30': 0.01593220889074782,
 '2020-06-01': 0.015588805947891709,
 '2020-06-10': 0.015602483136915372,
 '2020-06-11': 0.013219832315300549,
 '2020-06-16

In [31]:
calculate_nights_badness_value(2020, stars_to_include, .80, is_primary=True)

{'2020-04-07': 0.04675734179011069,
 '2020-04-08': 0.014663169949568212,
 '2020-04-09': 0.015277124614857786,
 '2020-04-14': 0.013686149362823391,
 '2020-04-15': 0.012236367401182106,
 '2020-04-17': 0.013085579520524134,
 '2020-04-19': 0.010959018117412622,
 '2020-04-20': 0.011734883593433885,
 '2020-04-29': 0.011042602300077852,
 '2020-05-02': 0.018451374055875807,
 '2020-05-03': 0.04945523315020606,
 '2020-05-06': 0.01592840706217632,
 '2020-05-11': 0.026199455668914795,
 '2020-05-28': 0.009272073124242405,
 '2020-05-29': 0.011494488246342659,
 '2020-05-30': 0.009435565296800225,
 '2020-06-01': 0.009880087724367085,
 '2020-06-10': 0.009090964113664638,
 '2020-06-11': 0.011115312503860874,
 '2020-06-16': 0.009419251126656746,
 '2020-06-17': 0.00940995856233593,
 '2020-06-24': 0.01925502914311211,
 '2020-07-05': 0.04010626583189631,
 '2020-07-07': 0.016717185169387555,
 '2020-07-15': 0.013965191878136827,
 '2020-07-16': 0.0138392426936734,
 '2020-07-22': 0.013097513816468734,
 '2020-07

In [32]:
find_bad_nights(2020, stars_to_include, .80, get_ltpr_threshold(2020), is_primary=True)

['2020-04-07', '2020-05-03', '2020-07-05']

In [25]:
find_bad_nights(2020, stars_to_include, .80, get_ltpr_threshold(2020), is_primary=False)

['2020-04-07', '2020-07-05', '2020-07-19', '2020-10-08']

In [33]:
is_primary=True
years = range(2003, 2023)

for year in years:
    print(f"Bad nights for {year}")
    try:
        print(find_bad_nights(year, stars_to_include, .80, get_ltpr_threshold(year), is_primary=is_primary))
    except ValueError:
        print(f"Perhaps year {year} has no data")
    print("\n\n")

Bad nights for 2003
['2003-08-12', '2003-08-29']



Bad nights for 2004
Perhaps year 2004 has no data



Bad nights for 2005
[]



Bad nights for 2006
['2006-04-04', '2006-05-06', '2006-06-29', '2006-07-06', '2006-09-05']



Bad nights for 2007
['2007-04-08', '2007-06-08', '2007-06-18', '2007-06-23', '2007-09-14']



Bad nights for 2008
['2008-02-22', '2008-02-23', '2008-03-09', '2008-03-10', '2008-04-15', '2008-05-11', '2008-05-17', '2008-05-20', '2008-06-30', '2008-07-05', '2008-08-18']



Bad nights for 2009
['2009-04-23', '2009-05-10', '2009-05-13', '2009-05-28', '2009-07-04']



Bad nights for 2010
['2010-03-17', '2010-03-20', '2010-04-27', '2010-05-26', '2010-05-27', '2010-06-28']



Bad nights for 2011
['2011-05-16', '2011-09-07']



Bad nights for 2012
['2012-05-28', '2012-07-31']



Bad nights for 2013
['2013-05-13']



Bad nights for 2014
['2014-02-17', '2014-02-18', '2014-02-22']



Bad nights for 2015
['2015-06-01', '2015-07-03']



Bad nights for 2016
['2016-05-20', '2016-

In [34]:
is_primary=False
years = range(2003, 2023)

for year in years:
    print(f"Bad nights for {year}")
    try:
        print(find_bad_nights(year, stars_to_include, .80, get_ltpr_threshold(year), is_primary=is_primary))
    except ValueError:
        print(f"Perhaps year {year} has no data")
    print("\n\n")

Bad nights for 2003
Perhaps year 2003 has no data



Bad nights for 2004
Perhaps year 2004 has no data



Bad nights for 2005
Perhaps year 2005 has no data



Bad nights for 2006
Perhaps year 2006 has no data



Bad nights for 2007
Perhaps year 2007 has no data



Bad nights for 2008
Perhaps year 2008 has no data



Bad nights for 2009
Perhaps year 2009 has no data



Bad nights for 2010
Perhaps year 2010 has no data



Bad nights for 2011
Perhaps year 2011 has no data



Bad nights for 2012
Perhaps year 2012 has no data



Bad nights for 2013
Perhaps year 2013 has no data



Bad nights for 2014
Perhaps year 2014 has no data



Bad nights for 2015
Perhaps year 2015 has no data



Bad nights for 2016
Perhaps year 2016 has no data



Bad nights for 2017
Perhaps year 2017 has no data



Bad nights for 2018
[]



Bad nights for 2019
['2019-10-06']



Bad nights for 2020
['2020-04-07', '2020-07-05', '2020-07-19', '2020-10-08']



Bad nights for 2021
Perhaps year 2021 has no data



Bad nigh