In [1]:
# INSTALLS
!pip install lightkurve
!pip install tslearn 
!pip install pytictoc

# IMPORTING NECESSARY LIBRARIES
import lightkurve as lk
import pandas as pd
import numpy as np
from tslearn.utils import to_time_series_dataset
from tslearn.preprocessing import TimeSeriesResampler
from pytictoc import TicToc

# IMPORT AND PROCESSING THE KEPLER ID DATA
finalData = pd.read_csv('finalData.csv')
finalData = finalData[finalData['koi_disposition'].str.contains('CANDIDATE')==False] # removing 'candidate' rows
finalData = finalData.drop_duplicates(subset=['kepid']) # removing duplicates where kepid is repeated ==> CONTROVERSIAL!!! to deal with multiple planets (0, 1, 1+) classification, need raw data
finalData = finalData.replace(['CONFIRMED', 'FALSE POSITIVE'], [1, 0]) # converting 'confirmed' / 'false positive' into 1 / 0
data = finalData
data = data.reset_index(drop=True)

initial_X1 = []
initial_Y1 = []
i = 0 # dummy variable to verify the loop is working

for kepid in data['kepid'].iloc[1001:2000]:
  t = TicToc()
  start_time = t.tic()


  try: 
    # DOWNLOADING THE DATA
    KIC = 'KIC ' + str(kepid)
    lcs = lk.search_lightcurve(KIC, author='kepler', cadence='long').download_all()

    # FINDING THE PERIOD, T0 AND DURATION FOR PROCESSING
    row_number = finalData[finalData['kepid'] == kepid].index[0]
    period, t0, duration_hours =  finalData['koi_period'][row_number],  finalData['koi_time0bk'][row_number],  finalData['koi_duration'][row_number]
    
    # PROCESSING THE LIGHTKURVE DATA
    lc_raw = lcs.stitch()
    lc_clean = lc_raw.remove_outliers(sigma=20, sigma_upper=4)
    temp_fold = lc_clean.fold(period, epoch_time=t0)
    fractional_duration = (duration_hours / 24.0) / period
    phase_mask = np.abs(temp_fold.phase.value) < (fractional_duration * 1.5)
    transit_mask = np.in1d(lc_clean.time.value, temp_fold.time_original.value[phase_mask])
    lc_flat, trend_lc = lc_clean.flatten(return_trend=True, mask=transit_mask)
    lc_fold = lc_flat.fold(period, epoch_time=t0)
    
    # CREATING THE GLOBAL VIEW
    lc_global = lc_fold.bin(time_bin_size=0.005).normalize() - 1
    lc_global = (lc_global / np.abs(lc_global.flux.min()) ) * 2.0 + 1


    # CONVERTING TO PANDAS DF
    lc_global = lc_global.to_pandas()

    # CREATE LISTS OF X (TIMESERIES DATA) AND Y (LABEL)
    initial_X1.append(lc_global['flux'].tolist())
    initial_Y1.append(finalData['koi_disposition'][row_number])

    print(i)
    i += 1

    end_time = t.toc()
    print(end_time)


  except: 
    pass


# converting initial_X and initial_Y to pandas df
df1 = pd.DataFrame(initial_X1)
df1['target'] = initial_Y1

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting lightkurve
  Downloading lightkurve-2.0.11-py3-none-any.whl (247 kB)
[K     |████████████████████████████████| 247 kB 12.3 MB/s 
[?25hCollecting memoization>=0.3.1
  Downloading memoization-0.4.0.tar.gz (41 kB)
[K     |████████████████████████████████| 41 kB 139 kB/s 
Collecting astroquery>=0.3.10
  Downloading astroquery-0.4.6-py3-none-any.whl (4.5 MB)
[K     |████████████████████████████████| 4.5 MB 58.3 MB/s 
[?25hCollecting fbpca>=1.0
  Downloading fbpca-1.0.tar.gz (11 kB)
Collecting uncertainties>=3.1.4
  Downloading uncertainties-3.1.7-py2.py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 5.5 MB/s 
Collecting oktopus>=0.1.2
  Downloading oktopus-0.1.2.tar.gz (10 kB)
Collecting pyvo>=1.1
  Downloading pyvo-1.2.1-py3-none-any.whl (832 kB)
[K     |████████████████████████████████| 832 kB 50.3 MB/s 
[?25hCollecting keyring>=4.0
  Downloading key



465
Elapsed time is 32.571281 seconds.
None
466
Elapsed time is 55.158135 seconds.
None
467
Elapsed time is 13.357468 seconds.
None
468
Elapsed time is 25.170115 seconds.
None
469
Elapsed time is 19.069077 seconds.
None
470
Elapsed time is 83.291904 seconds.
None
471
Elapsed time is 27.182202 seconds.
None
472
Elapsed time is 86.216042 seconds.
None
473
Elapsed time is 17.096459 seconds.
None
474
Elapsed time is 83.077795 seconds.
None
475
Elapsed time is 87.550178 seconds.
None
476
Elapsed time is 21.813282 seconds.
None
477
Elapsed time is 17.775153 seconds.
None
478
Elapsed time is 34.109352 seconds.
None
479
Elapsed time is 30.581977 seconds.
None
480
Elapsed time is 35.851107 seconds.
None
481
Elapsed time is 24.211215 seconds.
None
482
Elapsed time is 21.684797 seconds.
None
483
Elapsed time is 36.586328 seconds.
None
484
Elapsed time is 22.430636 seconds.
None
485
Elapsed time is 21.363449 seconds.
None
486
Elapsed time is 11.879000 seconds.
None
487
Elapsed time is 18.558018 se



538
Elapsed time is 18.128617 seconds.
None
539
Elapsed time is 20.496837 seconds.
None
540
Elapsed time is 32.110498 seconds.
None
541
Elapsed time is 16.711850 seconds.
None
542
Elapsed time is 20.662602 seconds.
None
543
Elapsed time is 17.247784 seconds.
None
544
Elapsed time is 15.141007 seconds.
None
545
Elapsed time is 18.270512 seconds.
None
546
Elapsed time is 16.664525 seconds.
None
547
Elapsed time is 21.259652 seconds.
None
548
Elapsed time is 32.584320 seconds.
None




549
Elapsed time is 21.204735 seconds.
None
550
Elapsed time is 27.860697 seconds.
None
551
Elapsed time is 25.199993 seconds.
None
552
Elapsed time is 14.963010 seconds.
None
553
Elapsed time is 24.670264 seconds.
None
554
Elapsed time is 73.319906 seconds.
None
555
Elapsed time is 18.721663 seconds.
None
556
Elapsed time is 11.225872 seconds.
None
557
Elapsed time is 62.670700 seconds.
None
558
Elapsed time is 21.899047 seconds.
None
559
Elapsed time is 23.284774 seconds.
None
560
Elapsed time is 21.438524 seconds.
None
561
Elapsed time is 20.968272 seconds.
None
562
Elapsed time is 22.746139 seconds.
None
563
Elapsed time is 37.042577 seconds.
None
564
Elapsed time is 40.909526 seconds.
None
565
Elapsed time is 23.597375 seconds.
None
566
Elapsed time is 17.839578 seconds.
None
567
Elapsed time is 37.919061 seconds.
None
568
Elapsed time is 21.180620 seconds.
None
569
Elapsed time is 17.731112 seconds.
None
570
Elapsed time is 18.260447 seconds.
None
571
Elapsed time is 26.302986 se



809
Elapsed time is 20.434095 seconds.
None
810
Elapsed time is 14.669253 seconds.
None
811
Elapsed time is 20.690095 seconds.
None
812
Elapsed time is 20.757676 seconds.
None
813
Elapsed time is 18.814857 seconds.
None
814
Elapsed time is 17.417778 seconds.
None
815
Elapsed time is 22.893169 seconds.
None
816
Elapsed time is 107.158287 seconds.
None
817
Elapsed time is 21.836006 seconds.
None
818
Elapsed time is 21.028635 seconds.
None
819
Elapsed time is 17.346009 seconds.
None
820
Elapsed time is 23.203284 seconds.
None
821
Elapsed time is 22.951584 seconds.
None
822
Elapsed time is 54.720210 seconds.
None
823
Elapsed time is 20.261449 seconds.
None
824
Elapsed time is 19.717328 seconds.
None
825
Elapsed time is 12.745162 seconds.
None
826
Elapsed time is 18.837192 seconds.
None
827
Elapsed time is 29.798509 seconds.
None
828
Elapsed time is 24.580017 seconds.
None
829
Elapsed time is 24.083228 seconds.
None
830
Elapsed time is 20.520216 seconds.
None
831
Elapsed time is 80.495065 s

In [3]:
from google.colab import drive

drive.mount('/content/drive', force_remount=True)
path = '/content/drive/My Drive/GSOC/WEEK 7/FINALoutput1001_2000.csv'

with open(path, 'w', encoding = 'utf-8-sig') as f:
  df1.to_csv(f)

Mounted at /content/drive
