### Imports

In [3]:
import os

import numpy as np
import pandas as pd

### Load data

In [4]:
lc_datasets = {}
lc_datasets["lsst"] = pd.read_pickle("data/lsst_RRLyr.pkl")
lc_datasets["kepler"] = pd.read_csv("data/kepler_RRLyr.csv")

### Params

In [5]:
bands = "ugrizy"
mag_col = "psfMag"
time_col = "expMidptMJD"

### Class definitions

In [163]:
class Variable:
    """A Variable class"""

    def __init__(self, obj_id):
        self.obj_id = obj_id
        self.lc = {"mjd": np.array([]), "mag": np.array([])}

    def add_observations(self, mjds, mags, mag_errs=None):
        """
        Adds observations to the light curve.

        Args:
          mjds: A vector of Modified Julian Dates (x values).
          mags: A vector of luminosities (y values).
          mag_errs: A vector of magnitude errors.
        """
        self.lc["mjd"] = self.convert_to_array(mjds)
        self.lc["mag"] = self.convert_to_array(mags)
        if mag_errs is not None:
            self.lc["mag_errs"] = self.convert_to_array(mag_errs)
        self.compare_len(self.lc.values())
        return

    def convert_to_array(self, data):
        if not isinstance(data, np.ndarray):
            if isinstance(data, (list, tuple, pd.Series)):
                data = np.array(data)
            elif isinstance(data, (int, float)):
                data = np.array([data])
            else:
                raise ValueError("The data type of the input is incorrect!")
        return data

    def compare_len(self, arrs):
        lens = [len(arr) for arr in arrs]
        if len(set(lens)) > 1:
            raise ValueError(
                "Passed timestamps and mags or mag_errs arrays have different lengths!"
            )
        return

    @property
    def mean_mag(self):
        return np.mean(self.lc["mag"])

    def __str__(self):
        return str(self.obj_id)

    def __len__(self):
        return len(self.lc["mjd"])

### Using the class

In [164]:
obj_id = lc_datasets["lsst"]["objectId"].unique()[7]

In [165]:
b = "g"

In [166]:
filt_band_obj = (lc_datasets["lsst"]["objectId"] == obj_id) & (
    lc_datasets["lsst"]["band"] == b
)

In [167]:
obj_obs = lc_datasets["lsst"][filt_band_obj]

In [168]:
star = Variable(obj_id)

In [169]:
print(star.obj_id)

1405624461041897445


In [170]:
star.add_observations(obj_obs[time_col], obj_obs[mag_col])

In [171]:
print(star)
print(star.lc)

1405624461041897445
{'mjd': array([60559.2973682, 59791.3473572, 60559.2978172, 61017.0665232,
       60281.1630512, 59840.2103322, 60560.2654012, 61298.2853162,
       60610.1669992, 60881.3838612, 61269.3333562, 59841.2948172,
       59840.2107802, 60555.2837972, 60880.4114602, 61109.0454832,
       59841.3286042, 60962.2788762, 60260.1801602, 60145.4057402,
       60610.1014732, 60962.2669892, 61329.2779102, 60588.1944462,
       61329.2769632, 59791.3582372, 59840.2367152, 60993.0741872,
       60880.3849662, 60993.0622822, 59958.1258722, 61109.0305462,
       60286.0827232, 60962.2656102, 60962.1967542, 60962.1836422,
       59840.2284672, 60610.1253832, 60588.2046822, 61356.1581872,
       60610.1010252, 60962.3061112, 61084.1131352, 60530.3755112]), 'mag': array([17.0270149 , 17.9296522 , 17.02132289, 17.35962392, 17.64018619,
       18.37622276, 17.42335279, 18.38669221, 18.38521324, 18.38482772,
       18.38450526, 17.25162572, 18.37937516, 17.88716136, 18.25681606,
       18.

In [172]:
len(star)

44

In [173]:
star.mean_mag

18.03180312045771

### Composition

In [29]:
class Lightcurve:
    """Class Lightcurve"""

    def __init__(self, mjds=None, mags=None, mag_errs=None):
        self.lc = {}
        if mjds is not None:
            self.add_observations(mjds, mags, mag_errs)

    def add_observations(self, mjds, mags, mag_errs=None):
        self.lc["mjds"] = self.convert_to_array(mjds)
        self.lc["mags"] = self.convert_to_array(mags)
        if mag_errs is not None:
            self.lc["mag_errs"] = self.convert_to_array(mag_errs)
        self.compare_len(self.lc.values())
        return self.lc

    def convert_to_array(self, data):
        if not isinstance(data, np.ndarray):
            if isinstance(data, (list, tuple, pd.Series)):
                data = np.array(data)
            elif isinstance(data, (int, float)):
                data = np.array([data])
            else:
                raise ValueError("The data type of the input is incorrect!")
        return data

    def compare_len(self, arrs):
        lens = [len(arr) for arr in arrs]
        if len(set(lens)) > 1:
            raise ValueError(
                "Passed timestamps and mags or mag_errs arrays have different lengths!"
            )
        return

    @property
    def mean_mag(self):
        return np.mean(self.lc["mags"])

    def __len__(self):
        return len(self.lc["mjds"])

In [30]:
class Variable:
    """A Variable class"""

    def __init__(self, obj_id):
        self.obj_id = obj_id
        self.mband_lc = {}

    def add_lc(self, band, mjds, mags, mag_errs=None):
        self.mband_lc[band] = Lightcurve(mjds, mags, mag_errs)
        return self.mband_lc

    def __str__(self):
        return str(self.obj_id)

In [31]:
star2 = Variable(obj_id)
star2.add_lc(band=b, mjds=obj_obs[time_col], mags=obj_obs[mag_col])
print(star2.mband_lc["g"].mean_mag)

NameError: name 'obj_id' is not defined

### Inheritance

In [32]:
class Variable:
    """A Variable class"""

    def __init__(self, obj_id):
        self.obj_id = obj_id
        self.mband_lc = {}

    def add_lc(self, band, mjds, mags, mag_errs=None):
        self.mband_lc[band] = Lightcurve(mjds, mags, mag_errs)
        return self.mband_lc

    def __str__(self):
        return str(self.obj_id)

In [33]:
class RRLyrae(Variable):
    """A class for RR Lyrae stars."""

    def __init__(self, obj_id):
        super().__init__(obj_id)
        self.period = None

    def period_determination(self, period_range=(0.1, 3)):
        """A function to determine the period"""
        self.period = 0.3
        return

In [34]:
rr_lyrae = RRLyrae(obj_id)

NameError: name 'obj_id' is not defined

In [35]:
print(rr_lyrae.mband_lc)
print(rr_lyrae.period)

NameError: name 'rr_lyrae' is not defined

In [207]:
rr_lyrae.period_determination()

In [208]:
rr_lyrae.period

0.3

### Data table class

In [39]:
class Survey:
    def __init__(self, filename):
        self.id_col = "objectId"
        self.band_col = "band"
        self.time_col = "expMidptMJD"
        self.mag_col = "psfMag"
        self.data = self.load_table(filename)
        self.unique_objects = self.data[self.id_col].unique()

    def load_table(self, filename):
        """Load a table from CSV file.

        :param filename: The name of the .csv file to load
        :returns: pd.DataFrame with the data from the file.
        """
        if filename.endswith(".csv"):
            df = pd.read_csv(filename)
        elif filename.endswith(".pkl"):
            df = pd.read_pickle(filename)
        return df

    def get_obj_band_df(self, obj_id, band):
        filt_band_obj = (self.data[self.id_col] == obj_id) & (
            self.data[self.band_col] == band
        )
        return self.data[filt_band_obj]

    def get_lc(self, obj_id, band):
        df = self.get_obj_band_df(obj_id, band)
        lc = Lightcurve(mjds=df[time_col], mags=df[mag_col])
        return lc.lc

In [46]:
lsst = Survey("data/lsst_RRLyr.pkl")

In [47]:
lsst.get_lc(1251384969897480052, "g")

{'mjds': array([59876.1486552, 59840.2376442, 61298.2857732, 61329.2497022,
        61298.3073452, 61378.1931902, 60993.0627302, 61269.3148392,
        60555.2806752, 61378.1712402, 60286.0813702, 60962.2793252,
        60679.1080032, 60679.0854702, 60993.0649582, 60260.2356992,
        61269.3338092, 60555.2943272, 60286.0800202, 59841.2943612,
        61358.0823242, 59876.1473062, 59791.3514662, 61084.1135872,
        61109.0278642, 61384.1322352, 61109.0427072, 60962.1831932,
        61017.0816122, 61356.1775552, 61017.0811632, 60177.3163482,
        60962.1845452, 60962.1760522, 60177.3321762, 61050.1563862,
        60610.1441012, 60286.0663042, 61329.2703862, 59958.1358992,
        60588.2042292, 59841.3281512, 59876.1665182, 60260.2130452]),
 'mags': array([26.65128046, 25.95542808, 26.29130909, 25.56378835, 25.95958066,
        24.98153458, 25.51662685, 28.47945744, 26.46714184, 25.70013033,
        25.86521625, 26.19794681, 26.48325015, 27.72604482, 24.59013262,
        25.2603

In [44]:
lc_datasets["kepler"].columns

Index(['time', 'flux', 'flux_err', 'quality', 'timecorr', 'centroid_col',
       'centroid_row', 'cadenceno', 'sap_flux', 'sap_flux_err', 'sap_bkg',
       'sap_bkg_err', 'pdcsap_flux', 'pdcsap_flux_err', 'sap_quality',
       'psf_centr1', 'psf_centr1_err', 'psf_centr2', 'psf_centr2_err',
       'mom_centr1', 'mom_centr1_err', 'mom_centr2', 'mom_centr2_err',
       'pos_corr1', 'pos_corr2'],
      dtype='object')

In [1]:
import pandas as pd

In [3]:
pd.__doc__

