# Test Development Notebook

### Imports

In [16]:
import pandas as pd

import lcanalyzer.models as models

### Params

In [95]:
bands = "ugrizy"

### Load the data

In [119]:
lc_datasets = {}
lc_datasets["lsst"] = pd.read_pickle("data/lsst_RRLyr.pkl")
lc_datasets["kepler"] = pd.read_csv("data/kepler_RRLyr.csv")

### Inspect the data

In [182]:
lc_datasets['lsst'].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11177 entries, 0 to 11176
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   band         11177 non-null  object 
 1   ccdVisitId   11177 non-null  int64  
 2   coord_ra     11177 non-null  float64
 3   coord_dec    11177 non-null  float64
 4   objectId     11177 non-null  int64  
 5   psfFlux      11177 non-null  float64
 6   psfFluxErr   11177 non-null  float64
 7   psfMag       10944 non-null  float64
 8   ccdVisitId2  11177 non-null  int64  
 9   band2        11177 non-null  object 
 10  expMidptMJD  11177 non-null  float64
 11  zeroPoint    11177 non-null  float32
dtypes: float32(1), float64(6), int64(3), object(2)
memory usage: 1004.3+ KB


In [120]:
lc_datasets["kepler"].head()

Unnamed: 0,time,flux,flux_err,quality,timecorr,centroid_col,centroid_row,cadenceno,sap_flux,sap_flux_err,...,psf_centr1,psf_centr1_err,psf_centr2,psf_centr2_err,mom_centr1,mom_centr1_err,mom_centr2,mom_centr2_err,pos_corr1,pos_corr2
0,131.512404,10180609.0,78.926155,128,0.00141,653.37247,51.053028,1105,10129629.0,79.18698,...,,,,,653.37247,6e-06,51.053028,6.3e-05,0.011782,-0.010195
1,131.532839,10013518.0,78.23377,128,0.001411,653.372292,51.053872,1106,9949931.0,78.459984,...,,,,,653.372292,6e-06,51.053872,6.2e-05,0.011726,-0.010246
2,131.553273,9852474.0,77.67316,128,0.001412,653.372167,51.044559,1107,9783633.0,77.8272,...,,,,,653.372167,6e-06,51.044559,6.2e-05,0.011575,-0.010089
3,131.573707,9722936.0,77.10971,128,0.001413,653.371408,51.045081,1108,9651452.0,77.31359,...,,,,,653.371408,6e-06,51.045081,6.2e-05,0.011366,-0.009939
4,131.594142,9717073.0,77.10355,0,0.001414,653.372167,51.052828,1109,9646289.0,77.262634,...,,,,,653.372167,6e-06,51.052828,6.2e-05,0.011526,-0.010702


In [98]:
len(lc_datasets["lsst"]["objectId"].unique())

25

### Select a single light curve

In [99]:
# Pick an object
obj_id = lc_datasets["lsst"]["objectId"].unique()[4]

In [183]:
### Get all the observations for this obj_id for each band
# Create an empty dict
lc = {}
# Define the bands names
bands = 'ugrizy'
# For each band create a bool array that indicates
# that this observation belongs to a certain object and is made in a
# certain band
for b in bands:
    filt_band_obj = (lc_datasets["lsst"]["objectId"] == obj_id) & (
        lc_datasets["lsst"]["band"] == b
    )
    # Select the observations and store in the dict 'lc'
    lc[b] = lc_datasets["lsst"][filt_band_obj]

In [190]:
models.max_mag(lc["g"], "psfMag")

19.183367224358136

In [102]:
# max_tab_by_groupby = lc_datasets["lsst"].groupby(["objectId", "band"])["psfMag"].max()
# max_tab_by_groupby.reset_index()

## Developing new function for getting statistics for all bands

In [126]:
mag_col = "psfMag"

In [127]:
def calc_stat(lc, bands, mag_col):
    stat = {}
    for b in bands:
        stat[b + "_max"] = models.max_mag(lc[b], mag_col)
    return stat

In [129]:
calc_stat(lc, bands, mag_col)

{'u_max': 21.14104463470903,
 'g_max': 19.183367224358136,
 'r_max': 18.88863489356158,
 'i_max': 18.827676664867155,
 'z_max': 18.869036901698408,
 'y_max': 18.854156410782494}

## Experimenting with testing

In [208]:
test_input = pd.DataFrame(data=[[1, 5, 3], [7, 8, 9], [3, 4, 1]], columns=list("abc"))
test_output = 7

In [207]:
assert models.max_mag(test_input, "a") == test_output

AssertionError: 

In [221]:
test_input = pd.DataFrame(data=[[-7, -7, -3], [-4, -3, -1], [-1, -5, -3]], columns=list("abc"))

In [222]:
test_input

Unnamed: 0,a,b,c
0,-7,-7,-3
1,-4,-3,-1
2,-1,-5,-3


In [226]:
models.min_mag(test_input, "b")

-7

In [199]:
df1 = pd.DataFrame(data=[[1, 5, 3], [7, 8, 9], [3, 4, 1]], columns=list("abc"))
df2 = pd.DataFrame(data=[[7, 3, 2], [8, 4, 2], [5, 6, 4]], columns=list("abc"))
df3 = pd.DataFrame(data=[[2, 6, 3], [1, 3, 6], [8, 9, 1]], columns=list("abc"))
test_input = {"df1": df1, "df2": df2, "df3": df3}

In [200]:
calc_stat(test_input, ["df1", "df2", "df3"], "b")

{'df1_max': 8, 'df2_max': 6, 'df3_max': 9}

In [201]:
test_output = {"df1_max": 8, "df12_max": 6, "df3_max": 8}

In [202]:
test_output == calc_stat(test_input, ["df1", "df2", "df3"], "b")

False

In [189]:
lc.keys()

dict_keys(['u', 'g', 'r', 'i', 'z', 'y'])

In [184]:
import unittest

In [186]:
unittest.TestCase.assertDictEqual(calc_stat(test_input, ["df1", "df2", "df3"], "b"), test_output)

TypeError: TestCase.assertDictEqual() missing 1 required positional argument: 'd2'

In [163]:
import numpy as np

In [164]:
df1 = pd.DataFrame(data=[[1, 5, 3], [7, np.NaN, 9], [3, 4, 1]], columns=list("abc"))
df2 = pd.DataFrame(data=[[7, 3, 2], [8, 4, 2], [5, 6, 4]], columns=list("abc"))
df3 = pd.DataFrame(data=[[2, 6, 3], [1, 3, 6], [8, 9, 1]], columns=list("abc"))
test_input = {"df1": df1, "df2": df2, "df3": df3}

In [165]:
calc_stat(test_input, ["df1", "df2", "df3"], "b")

{'df1_max': 5.0, 'df2_max': 6, 'df3_max': 9}

In [167]:
df1

Unnamed: 0,a,b,c
0,1,5.0,3
1,7,,9
2,3,4.0,1


In [227]:
np.random.randint(1,5,(3,3))

array([[2, 2, 2],
       [2, 2, 2],
       [2, 4, 4]])

In [228]:
np.random.rand(4,5)

array([[0.74388382, 0.63958656, 0.91144613, 0.68235806, 0.62163621],
       [0.58642309, 0.48093582, 0.48578257, 0.97574531, 0.30148308],
       [0.86746741, 0.77709938, 0.28548563, 0.77155226, 0.35750828],
       [0.94201326, 0.90645773, 0.29164586, 0.14766958, 0.35175889]])

In [231]:
test_df = pd.DataFrame(data=[[1, 5, 3], 
                            [7, 8, 9], 
                            [3, 4, 1]],
                      columns=list("abc"))

In [247]:
models.mean_mag(test_df,'a')

3.6666666666666665

In [233]:
import pytest

In [250]:
assert models.mean_mag(test_df,'a') == pytest.approx(3.66,0.01)

AssertionError: 