# CARMENES RV CURVES SAMPLING PATTERN POOL

In this notebook we generate the pool of sampling patterns from the **all GTO** available CARMENES RV curves that we will use to modify the basic synthetic dataset to get a modeled dataset as similar as possible to the real CARMENES dataset.

## Modules and configuration

### Modules

In [2]:
import pandas as pd
import numpy as np
import json


### Configuration

In [3]:
GTO_FILE = "../data/GTO_objects_with_PG.csv"
DIST_FILES_FOLDER = "./DIST_FILES/"

RV_PATTERN_POOL_FILE = "RV_All_GTO_sampling_patterns.csv"

### Functions

In [4]:
def rv_extract_sampling(rv_filename):
    '''Reads a CARMENES RV curve file and returns the sampling pattern as a list of time differences,
    referred to the time of the initial sample point'''
    try:
        rv_ts = pd.read_csv(rv_filename, sep=' ', decimal='.', names=['time', 'rv', 'rv_error'])
        ref_time = rv_ts['time'].min()
        sampling_pattern = (rv_ts['time'] - ref_time).tolist()
    except Exception as e:
        print("***ERROR: an error happened with file %s. Error: %s" %(rv_filename, str(e)))
        sampling_pattern = None
    return sampling_pattern

## Load the GTO file

In [5]:
gto = pd.read_csv(GTO_FILE, sep=',', decimal='.')
gto.head()

Unnamed: 0,Karmn,Name,Comp,GJ,RA_J2016_deg,DE_J2016_deg,RA_J2000,DE_J2000,l_J2016_deg,b_J2016_deg,...,T0_PG_TESS,e_T0_PG_TESS,offset_PG_TESS,e_offset_PG_TESS,FAP_PG_TESS,valid_PG_TESS,error_PG_TESS,elapsed_time_PG_TESS,fits_file_TESS,fig_file_TESS
0,J23585+076,Wolf 1051,AB,4383.0,359.63642,7.656947,23:58:32.65,+07:39:30.1,100.839686,-52.931049,...,,,,,,0.0,Not recognized as a supported data product:\nn...,0.002199,,
1,J23556-061,GJ 912,AB,912.0,358.913617,-6.144283,23:55:39.78,-06:08:33.4,88.129933,-65.175491,...,,,,,,0.0,Not recognized as a supported data product:\nn...,0.000974,,
2,J23548+385,RX J2354.8+3831,-,,358.713658,38.52634,23:54:51.46,+38:31:36.2,110.941908,-23.024449,...,1764.609498,0.010704,8262.219751,1.365667,1.0,1.0,,344.002685,../data/CARMENES_GTO_TESS_PGs/J23548+385_TESS_...,../data/CARMENES_GTO_TESS_PGs/figures/J23548+3...
3,J23505-095,LP 763-012,-,4367.0,357.634705,-9.560964,23:50:31.64,-09:33:32.7,80.777067,-67.303426,...,1354.108815,0.001261,7767.134654,0.094298,0.064148,1.0,,473.533042,../data/CARMENES_GTO_TESS_PGs/J23505-095_TESS_...,../data/CARMENES_GTO_TESS_PGs/figures/J23505-0...
4,J23492+024,BR Psc,-,908.0,357.306604,2.396918,23:49:12.53,+02:24:04.4,93.567467,-56.885396,...,,,,,,0.0,Not recognized as a supported data product:\nn...,0.000984,,


### Clean the object list

To be on the safe side, we remove the objects that gave trouble during the RV periodogram calculations:

In [6]:
gto.loc[gto['valid_PG_RV'] == 0, 'Karmn']

92     J17572+707
99     J17198+417
115    J16102-193
260    J07051-101
270    J06318+414
273    J06103+821
309    J04173+088
344    J01352-072
Name: Karmn, dtype: object

In [7]:
gto = gto[gto['valid_PG_RV'] == 1].reset_index().copy()
gto.head()

Unnamed: 0,index,Karmn,Name,Comp,GJ,RA_J2016_deg,DE_J2016_deg,RA_J2000,DE_J2000,l_J2016_deg,...,T0_PG_TESS,e_T0_PG_TESS,offset_PG_TESS,e_offset_PG_TESS,FAP_PG_TESS,valid_PG_TESS,error_PG_TESS,elapsed_time_PG_TESS,fits_file_TESS,fig_file_TESS
0,0,J23585+076,Wolf 1051,AB,4383.0,359.63642,7.656947,23:58:32.65,+07:39:30.1,100.839686,...,,,,,,0.0,Not recognized as a supported data product:\nn...,0.002199,,
1,1,J23556-061,GJ 912,AB,912.0,358.913617,-6.144283,23:55:39.78,-06:08:33.4,88.129933,...,,,,,,0.0,Not recognized as a supported data product:\nn...,0.000974,,
2,2,J23548+385,RX J2354.8+3831,-,,358.713658,38.52634,23:54:51.46,+38:31:36.2,110.941908,...,1764.609498,0.010704,8262.219751,1.365667,1.0,1.0,,344.002685,../data/CARMENES_GTO_TESS_PGs/J23548+385_TESS_...,../data/CARMENES_GTO_TESS_PGs/figures/J23548+3...
3,3,J23505-095,LP 763-012,-,4367.0,357.634705,-9.560964,23:50:31.64,-09:33:32.7,80.777067,...,1354.108815,0.001261,7767.134654,0.094298,0.064148,1.0,,473.533042,../data/CARMENES_GTO_TESS_PGs/J23505-095_TESS_...,../data/CARMENES_GTO_TESS_PGs/figures/J23505-0...
4,4,J23492+024,BR Psc,-,908.0,357.306604,2.396918,23:49:12.53,+02:24:04.4,93.567467,...,,,,,,0.0,Not recognized as a supported data product:\nn...,0.000984,,


In [8]:
gto.shape

(355, 249)

## CARMENES RV sampling patterns pool

### Populate the pool

In [9]:
# Initialize the results array
rv_patterns = pd.DataFrame(columns=['sampling_delta_RV'])
rv_patterns

Unnamed: 0,sampling_delta_RV


In [10]:
# Populate the patterns:
#for i in range(0,5): # TEST
for i in range(0,len(gto)):
    new_pattern = rv_extract_sampling(gto.loc[i, 'rv_file'])
    rv_patterns = rv_patterns.append({'sampling_delta_RV': json.dumps(new_pattern)}, ignore_index=True)
rv_patterns.tail()

Unnamed: 0,sampling_delta_RV
350,"[0.0, 10.991649999748915, 17.91978000011295, 2..."
351,"[0.0, 13.99055999983102, 18.00107999984175, 19..."
352,"[0.0, 1.0492699998430908, 2.0221199998632073, ..."
353,"[0.0, 0.999199999962002, 11.033569999970496, 1..."
354,"[0.0, 20.975219999905676, 188.3961300002411, 2..."


In [11]:
# See the last pattern added:
print(new_pattern)

[0.0, 20.975219999905676, 188.3961300002411, 205.38048000028357, 209.3837600001134, 210.3894600002095, 211.33719999995083, 220.15363000007346, 233.30207000020891, 236.28709000023082, 245.3053999999538, 247.20803000032902, 248.36134999990463, 249.32451000018045, 253.23772000009194, 257.2442999999039, 278.12988999998197, 285.1992100002244, 288.181510000024, 295.1172899999656, 305.06492999987677, 311.0662299999967, 510.30483999988064, 536.335820000153, 537.331430000253, 539.2210099999793, 546.3397200000472, 551.2010599998757, 554.1786000002176, 556.2090199999511, 560.3323099999689, 561.3038400001824, 562.3307000002824, 564.3055000002496, 565.3272500000894, 568.3514499999583, 570.3849400002509, 571.3390899999067, 577.1731400000863, 587.300980000291, 597.2429400002584, 600.2649199999869, 601.2674799999222, 602.3093000003137, 605.2399100000039, 607.2822199999355, 609.2366400002502, 630.1764000002295, 658.1371500003152, 659.1520799999125, 660.1032300000079, 693.2164799999446]


### Save the pool to a file

In [12]:
# Save the file:
rv_patterns.to_csv(DIST_FILES_FOLDER + RV_PATTERN_POOL_FILE, sep=',', decimal='.', index=False)

### Test the access to the saved file and patterns

In [13]:
# Check file re-reading:
reloaded_patterns = pd.read_csv(DIST_FILES_FOLDER + RV_PATTERN_POOL_FILE, sep=',', decimal='.')
reloaded_patterns.tail()

Unnamed: 0,sampling_delta_RV
350,"[0.0, 10.991649999748915, 17.91978000011295, 2..."
351,"[0.0, 13.99055999983102, 18.00107999984175, 19..."
352,"[0.0, 1.0492699998430908, 2.0221199998632073, ..."
353,"[0.0, 0.999199999962002, 11.033569999970496, 1..."
354,"[0.0, 20.975219999905676, 188.3961300002411, 2..."


In [14]:
# Check pattern access:
recovered_pattern = reloaded_patterns.iloc[238,0]
recovered_pattern

'[0.0, 353.87464999966323, 353.95542999962345, 401.82480999967083, 467.70063999993727, 482.88781999982893, 495.69307000003755, 501.62616999959573, 541.754229999613, 560.7125699999742, 1180.5783999999985]'

In [15]:
# Notice that we need to convert this string to an array:
rec_pattern_array = json.loads(recovered_pattern)
print(rec_pattern_array)

[0.0, 353.87464999966323, 353.95542999962345, 401.82480999967083, 467.70063999993727, 482.88781999982893, 495.69307000003755, 501.62616999959573, 541.754229999613, 560.7125699999742, 1180.5783999999985]


In [16]:
type(rec_pattern_array)

list

In [17]:
# Check access to a given pattern position:
rec_pattern_array[4]

467.70063999993727

## Summary

**CONCLUSIONS:**
- We have generated a pool of sampling patterns for all GTO CARMENES RV curves (but for the 8 problematic records) that will be later used to affect the benchmark synthetic database with irregular sampling in a random way.