# ML SUBSAMPLE CURVES PREPROCESSING FOR `LSTperiod`

This notebook preprocesses the CARMENES VIS time series (corrected by NZPs with _SERVAL_) to generate the proper datafiles for `LSTperiodogram`.

It takes the files under the folder containing the CARMENES VIS time series files and retrieves the proper ones, depending on the file extension pattern specified at the beginning, and on their pertaining or not to the PG subsample. The files are already in the format needed for _LSTperiod_.

It also stores the list of available star names in a new file.

## Modules and configuration

### Modules

In [16]:
import pandas as pd

import shutil

import os

### Configuration

In [22]:
# FILES AND FOLDERS:
PG_SUBSAMPLE_FILE = "../data/SELECTION_for_PG_CARM_VIS_objects_with_PG.csv"
CARM_VIS_FOLDER = "../data/CARM_VIS_RVs/"
FILEEXT_PATTERN = ".avc.dat"

TESS_FOLDER = "./GTO_TESS_LC_LSTperiod/"
FILEEXT_PATTERN_TESS = ".dat"

TARGET_FOLDER = "./PG_subsample_RV_LSTperiod/"
TARGET_FOLDER_TESS = "./PG_subsample_TESS_LSTperiod/"


## Set the list of stars that belong to the ML subsample

In [23]:
pg_df = pd.read_csv(PG_SUBSAMPLE_FILE, sep=',', decimal='.')
pg_df.head()

Unnamed: 0,Karmn,Name,Comp,GJ,RA_J2016_deg,DE_J2016_deg,RA_J2000,DE_J2000,l_J2016_deg,b_J2016_deg,...,WF_offset_PG_TESS,WF_e_offset_PG_TESS,WF_FAP_PG_TESS,WF_valid_PG_TESS,WF_error_PG_TESS,WF_elapsed_time_PG_TESS,WF_plain_file_TESS,WF_fig_file_TESS,PG_file_RV,PG_file_TESS
0,J23548+385,RX J2354.8+3831,-,,358.713658,38.52634,23:54:51.46,+38:31:36.2,110.941908,-23.024449,...,999.999756,2.151008e-06,1.0,1.0,,94.758838,../data/CARM_VIS_TESS_WinFunc_PGs/WF_J23548+38...,../data/CARM_VIS_TESS_WinFunc_PGs/figures/WF_J...,../data/CARM_VIS_RVs_PGs/J23548+385_RV_PG.dat,../data/CARM_VIS_TESS_PGs/J23548+385_RV_PG.dat
1,J23505-095,LP 763-012,-,4367.0,357.634705,-9.560964,23:50:31.64,-09:33:32.7,80.777067,-67.303426,...,1000.000122,9.022946e-07,1.0,1.0,,132.607176,../data/CARM_VIS_TESS_WinFunc_PGs/WF_J23505-09...,../data/CARM_VIS_TESS_WinFunc_PGs/figures/WF_J...,../data/CARM_VIS_RVs_PGs/J23505-095_RV_PG.dat,../data/CARM_VIS_TESS_PGs/J23505-095_RV_PG.dat
2,J23431+365,GJ 1289,-,1289.0,355.781509,36.53631,23:43:06.31,+36:32:13.1,107.922839,-24.336479,...,999.999512,4.306074e-06,1.0,1.0,,97.939914,../data/CARM_VIS_TESS_WinFunc_PGs/WF_J23431+36...,../data/CARM_VIS_TESS_WinFunc_PGs/figures/WF_J...,../data/CARM_VIS_RVs_PGs/J23431+365_RV_PG.dat,../data/CARM_VIS_TESS_PGs/J23431+365_RV_PG.dat
3,J23381-162,G 273-093,-,4352.0,354.532687,-16.236514,23:38:08.16,-16:14:10.2,61.845437,-69.82522,...,1000.000122,9.022946e-07,1.0,1.0,,136.603404,../data/CARM_VIS_TESS_WinFunc_PGs/WF_J23381-16...,../data/CARM_VIS_TESS_WinFunc_PGs/figures/WF_J...,../data/CARM_VIS_RVs_PGs/J23381-162_RV_PG.dat,../data/CARM_VIS_TESS_PGs/J23381-162_RV_PG.dat
4,J23245+578,BD+57 2735,-,895.0,351.126628,57.853057,23:24:30.51,+57:51:15.5,111.552287,-3.085183,...,999.999512,3.720858e-06,1.0,1.0,,131.327304,../data/CARM_VIS_TESS_WinFunc_PGs/WF_J23245+57...,../data/CARM_VIS_TESS_WinFunc_PGs/figures/WF_J...,../data/CARM_VIS_RVs_PGs/J23245+578_RV_PG.dat,../data/CARM_VIS_TESS_PGs/J23245+578_RV_PG.dat


In [24]:
pg_objects = pg_df['Karmn'].unique().tolist()
print(pg_objects)

['J23548+385', 'J23505-095', 'J23431+365', 'J23381-162', 'J23245+578', 'J22532-142', 'J22468+443', 'J22298+414', 'J22252+594', 'J22231-176', 'J22137-176', 'J22114+409', 'J22102+587', 'J22057+656', 'J22020-194', 'J22012+283', 'J21474+627', 'J21466+668', 'J21463+382', 'J21348+515', 'J21152+257', 'J20533+621', 'J20451-313', 'J20450+444', 'J20336+617', 'J20305+654', 'J20260+585', 'J20198+229', 'J20109+708', 'J19511+464', 'J19251+283', 'J19242+755', 'J19216+208', 'J19206+731S', 'J19084+322', 'J19025+754', 'J18427+596S', 'J18427+596N', 'J18419+318', 'J18363+136', 'J18356+329', 'J18353+457', 'J18346+401', 'J18319+406', 'J18224+620', 'J18189+661', 'J18180+387E', 'J18174+483', 'J18131+260', 'J18022+642', 'J17578+465', 'J17378+185', 'J17364+683', 'J17355+616', 'J17338+169', 'J17198+417', 'J17115+384', 'J17071+215', 'J17033+514', 'J16581+257', 'J16578+473', 'J16462+164', 'J16343+571', 'J16313+408', 'J16254+543', 'J16167+672S', 'J16167+672N', 'J16028+205', 'J15583+354', 'J15499+796', 'J15412+759',

## Copy the relevant files into the RV _LSTperiod_ folder

In [27]:
star_list = []
files = os.listdir(CARM_VIS_FOLDER)
for file_name in files:
    full_file_name = os.path.join(CARM_VIS_FOLDER, file_name)
    file_ext = file_name[-len(FILEEXT_PATTERN):]
    star_name = file_name[:-len(FILEEXT_PATTERN)]
    print(file_ext)
    print(star_name)
    if file_ext == FILEEXT_PATTERN and star_name in pg_objects:
        if os.path.getsize(full_file_name) == 0:
            print("Skipping file %s, as it is empty" %file_name)
        else:
            shutil.copy2(full_file_name, TARGET_FOLDER)
            star_list.append(file_name[:-len(FILEEXT_PATTERN)])
    else:
        pass
print("Num stars (RV): %d" %len(star_list))
print(star_list)


lues.csv
CARMENES_ML_subsample_RMS_va
lues.csv
CARMENES_RV_RMS_va
eans.png
gtoc_vis_night_m
zero.txt
gtoc_vis_night_
1120.txt
gtoc_vis_night_zero_2017
hist.png
gtoc_vis_RVstd_
.avc.dat
J00051+457
avcn.dat
J00051+457.
.avc.dat
J00067-075
avcn.dat
J00067-075.
.avc.dat
J00162+198E
avcn.dat
J00162+198E.
.avc.dat
J00162+198W
avcn.dat
J00162+198W.
.avc.dat
J00183+440
avcn.dat
J00183+440.
.avc.dat
J00184+440
avcn.dat
J00184+440.
.avc.dat
J00286-066
avcn.dat
J00286-066.
.avc.dat
J00389+306
avcn.dat
J00389+306.
.avc.dat
J00403+612
avcn.dat
J00403+612.
.avc.dat
J00570+450
avcn.dat
J00570+450.
.avc.dat
J01013+613
avcn.dat
J01013+613.
.avc.dat
J01019+541
avcn.dat
J01019+541.
.avc.dat
J01025+716
avcn.dat
J01025+716.
.avc.dat
J01026+623
avcn.dat
J01026+623.
.avc.dat
J01033+623
avcn.dat
J01033+623.
.avc.dat
J01048-181
avcn.dat
J01048-181.
.avc.dat
J01056+284
avcn.dat
J01056+284.
.avc.dat
J01066+192
avcn.dat
J01066+192.
.avc.dat
J01125-169
avcn.dat
J01125-169.
.avc.dat
J01339-176
avcn.dat
J01339-176.


avcn.dat
J14152+450.
.avc.dat
J14155+046
avcn.dat
J14155+046.
.avc.dat
J14173+454
avcn.dat
J14173+454.
.avc.dat
J14251+518
avcn.dat
J14251+518.
.avc.dat
J14257+236E
avcn.dat
J14257+236E.
.avc.dat
J14257+236W
avcn.dat
J14257+236W.
.avc.dat
J14294+155
avcn.dat
J14294+155.
.avc.dat
J14307-086
avcn.dat
J14307-086.
.avc.dat
J14310-122
avcn.dat
J14310-122.
.avc.dat
J14321+081
avcn.dat
J14321+081.
.avc.dat
J14342-125
avcn.dat
J14342-125.
.avc.dat
J14524+123
avcn.dat
J14524+123.
.avc.dat
J14544+355
avcn.dat
J14544+355.
.avc.dat
J15013+055
avcn.dat
J15013+055.
.avc.dat
J15095+031
avcn.dat
J15095+031.
.avc.dat
J15100+193
avcn.dat
J15100+193.
.avc.dat
J15194-077
avcn.dat
J15194-077.
.avc.dat
J15218+209
avcn.dat
J15218+209.
.avc.dat
J15305+094
avcn.dat
J15305+094.
.avc.dat
J15369-141
avcn.dat
J15369-141.
.avc.dat
J15412+759
avcn.dat
J15412+759.
.avc.dat
J15474-108
avcn.dat
J15474-108.
.avc.dat
J15499+796
avcn.dat
J15499+796.
.avc.dat
J15583+354
avcn.dat
J15583+354.
.avc.dat
J15598-082
avcn.dat
J15

## Copy the relevant files into the TESS _LSTperiod_ folder

In [31]:
star_list = []
files = os.listdir(TESS_FOLDER)
for file_name in files:
    full_file_name = os.path.join(TESS_FOLDER, file_name)
    file_ext = file_name[-len(FILEEXT_PATTERN_TESS):]
    star_name = file_name[:-len(FILEEXT_PATTERN_TESS)].replace("TESS-","")
    #print(file_ext)
    #print(star_name)
    if file_ext == FILEEXT_PATTERN_TESS and star_name in pg_objects:
        if os.path.getsize(full_file_name) == 0:
            print("Skipping file %s, as it is empty" %file_name)
        else:
            shutil.copy2(full_file_name, TARGET_FOLDER_TESS)
            star_list.append(file_name[:-len(FILEEXT_PATTERN_TESS)].replace("TESS-",""))
    else:
        pass
print("Num stars (TESS): %d" %len(star_list))
print(star_list)


Num stars (TESS): 269
['J00051+457', 'J00183+440', 'J00286-066', 'J00389+306', 'J00403+612', 'J00570+450', 'J01013+613', 'J01019+541', 'J01025+716', 'J01026+623', 'J01033+623', 'J01048-181', 'J01066+192', 'J01125-169', 'J01339-176', 'J01352-072', 'J01518+644', 'J01550+379', 'J02015+637', 'J02070+496', 'J02088+494', 'J02123+035', 'J02222+478', 'J02336+249', 'J02362+068', 'J02442+255', 'J02486+621', 'J02489-145E', 'J02489-145W', 'J02560-006', 'J02565+554W', 'J02573+765', 'J03090+100', 'J03133+047', 'J03142+286', 'J03181+382', 'J03217-066', 'J03230+420', 'J03463+262', 'J03473+086', 'J03473-019', 'J03531+625', 'J04153-076', 'J04167-120', 'J04198+425', 'J04225+105', 'J04311+589', 'J04343+430', 'J04376+528', 'J04376-110', 'J04406-128', 'J04520+064', 'J04538-177', 'J04588+498', 'J05019+011', 'J05019-069', 'J05033-173', 'J05062+046', 'J05084-210', 'J05280+096', 'J05314-036', 'J05337+019', 'J05348+138', 'J05360-076', 'J05365+113', 'J05366+112', 'J05394+406', 'J05415+534', 'J05421+124', 'J06000+

# Summary

**OBSERVATIONS AND CONCLUSIONS:**
- We gathered (just chosen the right format ones) the 2 x 269 files of the CARMENES RV curves and TESS light curves in the proper format for `LSTperiod` and in new, separate folders.