In [1]:
# This program finds bad nights in a year

# The algorithm goes like this
# Create a list for stars to process, call is `stars_to_process`
# Add stars 1-1000 to stars_to_process
# Throw out variable stars from `stars_to_process`
# Throw out stars that have <80% attendance for the year from `stars_to_process`

# Create a dictionary, call it `season_mean_signal` to hold mean signals of stars for the entire data season
#   Note that the mean signal for a season for a stars is calculated from set of the internight 
#   normalized signal on each night of the season for the star.
# For each night, create a dictionary to hold data for the night, call it `nights_data_dict`
#   Create a entry in the dictionary with the night_date as key and the value as the list containing:
#   The ratio of signal_of_star_on_night[night] / season_mean_signal[star_i] for each star
# For each night, create a dict to hold std of values in `nights_data_dict`, call this dict `nights_signal_std_dict`
# (note that at this point, we have a standard deviation value for each night)
# We mark nights that have this value above a threshold (example: 0.035 for old camera, 0.03 for new camera) as bad nights for the year

In [13]:
import numpy as np
from typing import Iterable

from trout.stars import get_star, Star
from trout.stars.utils import STAR_START, STAR_END
from trout.nights.year_nights import get_nights_in_a_year

In [21]:
date_format = "%m-%d-%Y"

In [22]:
def calculate_nights_badness_value(year : int, stars_to_use : Iterable[int], attendance_threshold : float):
    """
    Returns a dictionary of the nights with their badness value for a year by looking 
    at the data from stars in `stars_to_use`
    
    param: year: Year to analyze
    param: stars_to_user: 
        The list of stars to use for calculation of bad nights
        Note that you should not provide the variable stars in this list
    param: attendance_threshold:
        Stars with less this stated threshold will not be considered when doing
        calcualtion
    """
    all_nights = get_nights_in_a_year(year)
    
    season_mean_signal = {}
    nightly_ratio = {}
    
    # Filter out star numbers outside our stars numbers range
    stars_to_user = filter(lambda x: STAR_START <= x <= STAR_END, stars_to_use)
    
    for star in stars_to_use:
        star_data = get_star(star)
        
        # Skip the star if it doesn't pass attendance threshold
        if star_data.attendance(year) >= attendance_threshold:
            continue
            
        # Select only the data for the year
        star_data.select_year(year,
            exclude_bad_nights=False, # Important to include all nights
            exclude_zeros=True
        )
        
        # Save mean for the star for the year
        season_mean_signal[star] = star_data.mean() 
        
        for _, flux, date in star_data.selected_data: # Note this is different from star_selected_data
            night_name = date.strftime(date_format)
            ratio_for_star_for_night = flux / season_mean_signal[star]
            
            # Add this ratio to the nightly_ratio
            nightly_ratio.setdefault(night_name, [ratio_for_star_for_night]).append(ratio_for_star_for_night)
    
    nightly_ratios_std = {}
    for night, ratios in nightly_ratio.items():
        ratios = np.array(ratios)
        nightly_ratios_std[night] = np.std(ratios)
        
    return nightly_ratios_std

In [28]:
def find_bad_nights(year: int, 
                    stars_to_use: Iterable[int], 
                    attendance_threshold: float, 
                    badness_threshold : float) -> Iterable[str]:
    """
    Returns the list of bad night in a given year
    param: year: Year
    param: stars_to_user: Stars to use
    param: attendance_threshold: 
        Stars with attendance less than this value (betn 0 and 1) won't be used for analysis
    param: badness_threshold:
        Nights with badness value more than the threshold will be returned
    
    return: List of bad nights
    """
    bad_nights_list = []
    badness_data = calculate_nights_badness_value(year, stars_to_use, attendance_threshold)
    for night, badness in badness_data.items():
        if badness > badness_threshold:
            bad_nights_list.append(night)
    return bad_nights_list

In [29]:
find_bad_nights(2021, list(range(1, 1000)), .80, 0.035)

['06-04-2021',
 '03-01-2021',
 '03-07-2021',
 '03-11-2021',
 '03-12-2021',
 '03-31-2021',
 '04-20-2021',
 '06-21-2021',
 '07-18-2021',
 '07-25-2021',
 '09-22-2021']