# Reading data

In [None]:
# !pip install /content/imt_lightcurve-1.2-py3-none-any.whl --force-reinstall
# !pip install /content/imt_lightcurve-1.2-py3-none-any.whl

In [None]:
# Importing packages

from imt_lightcurve.models.lightcurve import LightCurve

import pandas as pd
import numpy as np

In [None]:
# Chosen lightcurve
# LIGHTCURVE = 'RESAMPLED_0101086161_20070516T060226'
LIGHTCURVE = 'RESAMPLED_0102890318_20070206T133547'

# Importing lightcurve data from github
data = pd.read_csv('https://raw.githubusercontent.com/Guilherme-SSB/IC-CoRoT_Kepler/main/resampled_files/' + LIGHTCURVE + '.csv')
time = data.DATE.to_numpy()
flux = data.WHITEFLUX.to_numpy()

# Create the LightCurve object
curve = LightCurve(time=time, flux=flux)
curve.plot()

# General view at the problem

# Get the data

# Discover and visualize the data to gain insights

## Feature: Periodograms

---



### Spectrum


In [None]:
#@title MultiLinePlot function
from bokeh.plotting import figure, show, output_file
from bokeh.io import output_notebook
from bokeh.models import Legend, LegendItem
from bokeh.models import ColumnDataSource, LassoSelectTool, HoverTool

# https://docs.bokeh.org/en/latest/docs/user_guide/interaction/legends.html#userguide-interaction-legends
def multi_line_plot(
        x_data=None,
        y1_data=None,
        y2_data=None,
        label_y1='y1 Data',
        label_y2='y2 Data',
        title='Multi-Line Plot',
        x_axis='x-axis',
        y_axis='y-axis',
        x_axis_type='auto',
        y_axis_type='auto'):

    p = figure(title=title,
               x_axis_type=x_axis_type,
               y_axis_type=y_axis_type,
               plot_width=650, plot_height=400,
               x_range=(10**-7, 10**-3), 
              #  y_range=(bottom, top)
               background_fill_color='#fefefe')

    p.xaxis[0].axis_label = x_axis
    p.yaxis[0].axis_label = y_axis

    xs = [x_data, x_data]
    ys = [y1_data, y2_data]

    r = p.multi_line(xs, ys, color=['blue', 'red'], line_width=2)

    legend = Legend(items=[
        LegendItem(label=label_y1, renderers=[r], index=0),
        LegendItem(label=label_y2, renderers=[r], index=1)
    ])

    p.add_layout(legend)
    p.add_tools(LassoSelectTool())
    p.add_tools(HoverTool())

    show(p)

In [None]:
time_sampling = (pd.Series(curve.time).diff().min())*86400
frequency_sampling = 1/time_sampling

print(frequency_sampling)

0.003185505581548652


In [None]:
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning) 

import scipy.signal as ssg
from imt_lightcurve.visualization.data_viz import line_plot


# X, Y = ssg.periodogram(curve.flux, frequency_sampling, detrend='linear', scaling='density')     # -> 4e+11
# X, Y = ssg.periodogram(curve.flux, frequency_sampling, detrend='constant', scaling='density')   # -> 5e+11

X, Y = ssg.periodogram(curve.flux, frequency_sampling, detrend='linear', scaling='spectrum')
X, Y_raw = ssg.periodogram(curve.flux, frequency_sampling, detrend='constant', scaling='spectrum')

multi_line_plot(X, Y_raw, Y, label_y1='Raw LC', label_y2='Detrend LC' , x_axis_type='log')

### Algorithm

In [None]:
from imt_lightcurve.models.lightcurve import LightCurve
import os
import numpy as np
import pandas as pd
import scipy.signal as ssg

confirmed_exoplanets = False
eclipsing_binaries = False
nothing_detected = False

def compute_periodogram_feature(DATA_DIR: str, label: int = None) -> pd.DataFrame:
  DF = pd.DataFrame()

  for root_dir_path, sub_dirs, files in os.walk(DATA_DIR):
    for j in range(0, len(files)):
        if files[j].endswith('.csv'):
          path = root_dir_path + "/" + files[j]
          data = pd.read_csv(path)
          time = data.DATE.to_numpy()
          flux = data.WHITEFLUX.to_numpy()
          curve = LightCurve(time, flux)

          # Compute frequency sampling
          time_sampling = (pd.Series(curve.time).diff().min())*86400
          frequency_sampling = 1/time_sampling

          # Create the periodogram
          freq, spec = ssg.periodogram(curve.flux, frequency_sampling, detrend='linear', scaling='spectrum')

          # Save the data on a pd.DataFrame
          DF = DF.append(pd.Series(spec), ignore_index=True)  

  # Labeling data
  try:
    if label == 0: # confirmed_exoplanets
      labels = np.full((DF.shape[0],), 0, dtype='int')

    if label == 1: # eclipsing_binaries
      labels = np.full((DF.shape[0],), 1, dtype='int')

    if label == 2: # nothing_detected
      labels = np.full((DF.shape[0],), 2, dtype='int')
      
  except:
    raise ValueError('Label not available')


  DF['label'] = labels

  return DF

The corrent way to use this function is:



```Python
periodograms = compute_periodogram_feature(>>> PATH_TO_CONFIRMED_EXOPLANETS_DATASET:str, label:int <<<)

# Saving feature
periodograms.to_csv(>>> WHERE_TO_SAVE_PATH:str <<<, index=False)
```



In [None]:
PATH_TO_CONFIRMED_EXOPLANETS_DATA = '/content/drive/MyDrive/01 - Iniciação Científica/IC-CoRoT_Kepler/resampled_files'
PATH_TO_ECLIPSING_BINARIES_DATA = '/content/drive/MyDrive/01 - Iniciação Científica/02 - Datasets/eclipsing_binaries'

In [None]:
CE_periodogram_feature = compute_periodogram_feature(PATH_TO_CONFIRMED_EXOPLANETS_DATA, label=0)
CE_periodogram_feature.head()

# Saving feature
CE_periodogram_feature.to_csv('confirmed_exoplanets_periodograms.csv', index=False)

In [None]:
EB_periodogram_feature = compute_periodogram_feature(PATH_TO_ECLIPSING_BINARIES_DATA, label=1)
EB_periodogram_feature.head()

# Saving feature
EB_periodogram_feature.to_csv('eclipsing_binaries_periodograms.csv', index=False)

Now that all the periodograms have been extracted (from confirmed exoplanets and eclipsing binaries dataset), we are going to merge both labeled periodograms and finally save the `periodogram_feature.csv`.

In [None]:
PERIODOGRAM_CONFIRMED_EXOPLANETS_PATH = '/content/confirmed_exoplanets_periodograms.csv'
PERIODOGRAM_ECLIPSING_BINARIES_PATH = '/content/eclipsing_binaries_periodograms.csv'

periodogram_CE = pd.read_csv(PERIODOGRAM_CONFIRMED_EXOPLANETS_PATH)
periodogram_EB = pd.read_csv(PERIODOGRAM_ECLIPSING_BINARIES_PATH)

Just to make sure everything is going to be great, let's check if both data have the same number os rows

In [None]:
print(periodogram_CE.shape)
print(periodogram_EB.shape)

(33, 7527)
(98, 7527)


Perfect! Now, let's merge them

In [None]:
periodogram_feature = pd.concat([periodogram_CE, periodogram_EB])
periodogram_feature.sample(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,7487,7488,7489,7490,7491,7492,7493,7494,7495,7496,7497,7498,7499,7500,7501,7502,7503,7504,7505,7506,7507,7508,7509,7510,7511,7512,7513,7514,7515,7516,7517,7518,7519,7520,7521,7522,7523,7524,7525,label
70,3.351984e-21,464431.821815,86301.233084,64660.695217,43221.717795,66383.014966,28931.722692,5420.248726,1956.324483,65303.503969,14188.690963,17510.962169,20975.679952,5010.947015,17051.355315,37133.818786,18290.447416,5762.884065,29621.515416,1483.200404,4197.855399,4694.197296,5430.57263,903.184705,27279.290955,11160.142208,2488.443864,20441.690061,6821.032662,4397.219561,14104.186234,8990.159763,1812.24173,19044.45476,3153.234756,3139.816275,14596.069893,22354.485618,1383.647443,18586.635666,...,2.662037,2.154102,5.967441,4.36354,9.966903,2.178706,2.938641,0.403986,6.859427,3.653691,4.271633,3.470198,1.371506,1.839115,5.086221,2.111682,6.934027,5.573846,3.402828,0.634058,1.528502,4.120737,1.67216,3.708579,4.506172,0.027153,18.737193,1.938899,0.65383,2.216671,0.492524,0.35184,0.895148,10.490376,4.516829,12.064425,10.447576,0.649647,0.0008384866,1
37,1.857629e-22,316.201946,1260.056889,249.166919,167.81277,10.630384,234.053656,47.826147,203.925275,162.70604,82.835774,620.090717,131.118498,119.774802,262.409657,37.286177,112.689252,47.908595,70.461274,413.437135,151.082858,104.356152,131.355512,72.034455,35.825651,75.013576,163.333323,90.674001,180.468139,182.809547,91.654046,181.431473,48.894377,94.54512,260.332624,179.888364,172.407902,54.01861,156.831696,114.875781,...,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.000231,0.0001153684,1
64,2.6907060000000004e-23,870.32953,4374.014382,241.297285,1091.18443,902.154421,317.156397,474.677574,1212.002323,107.308339,143.650961,437.107047,365.776411,135.046159,102.039933,121.264515,73.096038,12.809064,122.507477,141.070735,30.948384,84.338729,47.881403,27.193505,2.64981,9.414482,26.971884,3.364537,158.812306,7.129188,92.659425,11.933758,19.705526,35.733975,14.822395,7.020242,23.448927,23.21488,63.517796,4.175995,...,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,0.00018,9.001917e-05,1
87,1.2150040000000002e-25,762.53459,6290.271667,1407.078476,7448.033061,357.51828,346.241948,220.731693,530.650159,155.834513,425.307345,493.475203,109.148458,75.240359,311.81072,313.131109,171.408819,107.355372,221.842152,631.123441,66.01251,219.962882,146.900998,132.639975,32.069796,211.824918,73.981253,38.812138,107.187756,38.297537,41.939486,37.570212,108.31203,228.776123,151.866243,73.272661,166.384671,61.692175,64.33082,20.130667,...,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,0.000138,6.900793e-05,1
63,7.685049000000001e-23,3101.992073,16567.55274,10223.714951,4865.127026,123.019369,220.750548,593.064053,366.631045,297.521278,1073.55846,503.27295,789.288964,499.028543,500.962398,53.860128,753.650651,893.469759,218.186344,694.59522,80.56456,201.985084,237.792701,2.816744,139.450255,15.19264,44.980317,71.531109,98.032323,106.821796,110.979065,28.845866,0.80953,238.554715,11.65007,8.023795,123.139615,49.798508,153.628274,7.088007,...,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,1e-06,5.058032e-07,1


Saving feature

In [None]:
periodogram_feature.to_csv('periodogram_feature.csv', index=False)

# Prepare the data for Machine Learning algorithms

label