In [1]:
!nvidia-smi

Wed Jul 20 11:55:47 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
# INSTALLS
!pip install lightkurve
!pip install tslearn 
!pip install pytictoc

# IMPORTING NECESSARY LIBRARIES
import lightkurve as lk
import pandas as pd
import numpy as np
from tslearn.utils import to_time_series_dataset
from tslearn.preprocessing import TimeSeriesResampler
from pytictoc import TicToc

# IMPORT AND PROCESSING THE KEPLER ID DATA
finalData = pd.read_csv('finalData.csv')
finalData = finalData[finalData['koi_disposition'].str.contains('CANDIDATE')==False] # removing 'candidate' rows
finalData = finalData.drop_duplicates(subset=['kepid']) # removing duplicates where kepid is repeated ==> CONTROVERSIAL!!! to deal with multiple planets (0, 1, 1+) classification, need raw data
finalData = finalData.replace(['CONFIRMED', 'FALSE POSITIVE'], [1, 0]) # converting 'confirmed' / 'false positive' into 1 / 0
data = finalData
data = data.reset_index(drop=True)

initial_X1 = []
initial_Y1 = []
i = 0 # dummy variable to verify the loop is working

for kepid in data['kepid'].iloc[0:1000]:
  t = TicToc()
  start_time = t.tic()


  try: 
    # DOWNLOADING THE DATA
    KIC = 'KIC ' + str(kepid)
    lcs = lk.search_lightcurve(KIC, author='kepler', cadence='long').download_all()

    # FINDING THE PERIOD, T0 AND DURATION FOR PROCESSING
    row_number = finalData[finalData['kepid'] == kepid].index[0]
    period, t0, duration_hours =  finalData['koi_period'][row_number],  finalData['koi_time0bk'][row_number],  finalData['koi_duration'][row_number]
    
    # PROCESSING THE LIGHTKURVE DATA
    lc_raw = lcs.stitch()
    lc_clean = lc_raw.remove_outliers(sigma=20, sigma_upper=4)
    temp_fold = lc_clean.fold(period, epoch_time=t0)
    fractional_duration = (duration_hours / 24.0) / period
    phase_mask = np.abs(temp_fold.phase.value) < (fractional_duration * 1.5)
    transit_mask = np.in1d(lc_clean.time.value, temp_fold.time_original.value[phase_mask])
    lc_flat, trend_lc = lc_clean.flatten(return_trend=True, mask=transit_mask)
    lc_fold = lc_flat.fold(period, epoch_time=t0)
    
    # CREATING THE GLOBAL VIEW
    lc_global = lc_fold.bin(time_bin_size=0.005).normalize() - 1
    lc_global = (lc_global / np.abs(lc_global.flux.min()) ) * 2.0 + 1


    # CONVERTING TO PANDAS DF
    lc_global = lc_global.to_pandas()

    # CREATE LISTS OF X (TIMESERIES DATA) AND Y (LABEL)
    initial_X1.append(lc_global['flux'].tolist())
    initial_Y1.append(finalData['koi_disposition'][row_number])

    print(i)
    i += 1

    end_time = t.toc()
    print(end_time)


  except: 
    pass


# converting initial_X and initial_Y to pandas df
df1 = pd.DataFrame(initial_X1)
df1['target'] = initial_Y1

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting lightkurve
  Downloading lightkurve-2.0.11-py3-none-any.whl (247 kB)
[K     |████████████████████████████████| 247 kB 4.3 MB/s 
Collecting astroquery>=0.3.10
  Downloading astroquery-0.4.6-py3-none-any.whl (4.5 MB)
[K     |████████████████████████████████| 4.5 MB 52.9 MB/s 
Collecting oktopus>=0.1.2
  Downloading oktopus-0.1.2.tar.gz (10 kB)
Collecting memoization>=0.3.1
  Downloading memoization-0.4.0.tar.gz (41 kB)
[K     |████████████████████████████████| 41 kB 187 kB/s 
[?25hCollecting fbpca>=1.0
  Downloading fbpca-1.0.tar.gz (11 kB)
Collecting uncertainties>=3.1.4
  Downloading uncertainties-3.1.7-py2.py3-none-any.whl (98 kB)
[K     |████████████████████████████████| 98 kB 8.0 MB/s 
Collecting keyring>=4.0
  Downloading keyring-23.7.0-py3-none-any.whl (34 kB)
Collecting pyvo>=1.1
  Downloading pyvo-1.2.1-py3-none-any.whl (832 kB)
[K     |████████████████████████████



62
Elapsed time is 44.354846 seconds.
None
63
Elapsed time is 31.620188 seconds.
None
64
Elapsed time is 38.736078 seconds.
None
65
Elapsed time is 31.141538 seconds.
None
66
Elapsed time is 31.166369 seconds.
None
67
Elapsed time is 39.773409 seconds.
None
68
Elapsed time is 37.534686 seconds.
None
69
Elapsed time is 97.446091 seconds.
None
70
Elapsed time is 36.136486 seconds.
None
71
Elapsed time is 38.983318 seconds.
None
72
Elapsed time is 33.956987 seconds.
None
73
Elapsed time is 38.843149 seconds.
None
74
Elapsed time is 45.316047 seconds.
None
75
Elapsed time is 30.180827 seconds.
None
76
Elapsed time is 44.778975 seconds.
None
77
Elapsed time is 34.063953 seconds.
None
78
Elapsed time is 39.079066 seconds.
None
79
Elapsed time is 33.379308 seconds.
None
80
Elapsed time is 37.533294 seconds.
None
81
Elapsed time is 39.645348 seconds.
None
82
Elapsed time is 45.781759 seconds.
None
83
Elapsed time is 40.348074 seconds.
None
84
Elapsed time is 35.169886 seconds.
None
85
Elapsed 



202
Elapsed time is 43.556741 seconds.
None
203
Elapsed time is 44.970474 seconds.
None
204
Elapsed time is 36.413693 seconds.
None
205
Elapsed time is 47.396463 seconds.
None
206
Elapsed time is 31.392415 seconds.
None
207
Elapsed time is 41.382104 seconds.
None




208
Elapsed time is 40.415977 seconds.
None
209
Elapsed time is 42.585931 seconds.
None
210
Elapsed time is 125.615770 seconds.
None
211
Elapsed time is 41.315304 seconds.
None
212
Elapsed time is 34.998143 seconds.
None
213
Elapsed time is 41.328317 seconds.
None




214
Elapsed time is 50.008334 seconds.
None
215
Elapsed time is 43.878957 seconds.
None
216
Elapsed time is 33.538816 seconds.
None
217
Elapsed time is 47.184947 seconds.
None
218
Elapsed time is 45.494830 seconds.
None
219
Elapsed time is 41.676714 seconds.
None
220
Elapsed time is 42.375888 seconds.
None
221
Elapsed time is 63.737497 seconds.
None
222
Elapsed time is 41.148870 seconds.
None
223
Elapsed time is 41.432856 seconds.
None
224
Elapsed time is 40.332207 seconds.
None
225
Elapsed time is 41.977147 seconds.
None
226
Elapsed time is 45.596426 seconds.
None
227
Elapsed time is 42.246104 seconds.
None
228
Elapsed time is 43.352597 seconds.
None
229
Elapsed time is 43.334925 seconds.
None
230
Elapsed time is 39.151955 seconds.
None
231
Elapsed time is 34.432014 seconds.
None
232
Elapsed time is 39.136692 seconds.
None
233
Elapsed time is 53.933105 seconds.
None
234
Elapsed time is 40.937242 seconds.
None
235
Elapsed time is 41.469715 seconds.
None
236
Elapsed time is 41.181543 se



278
Elapsed time is 42.003054 seconds.
None
279
Elapsed time is 41.563585 seconds.
None
280
Elapsed time is 44.117092 seconds.
None
281
Elapsed time is 65.641106 seconds.
None
282
Elapsed time is 38.914784 seconds.
None
283
Elapsed time is 42.826336 seconds.
None
284
Elapsed time is 49.022486 seconds.
None
285
Elapsed time is 46.022897 seconds.
None
286
Elapsed time is 55.523387 seconds.
None
287
Elapsed time is 48.656775 seconds.
None
288
Elapsed time is 72.069750 seconds.
None
289
Elapsed time is 41.351822 seconds.
None
290
Elapsed time is 35.680101 seconds.
None
291
Elapsed time is 43.721171 seconds.
None
292
Elapsed time is 36.607511 seconds.
None
293
Elapsed time is 50.483592 seconds.
None
294
Elapsed time is 34.949344 seconds.
None
295
Elapsed time is 41.175483 seconds.
None
296
Elapsed time is 33.746987 seconds.
None
297
Elapsed time is 33.700278 seconds.
None




298
Elapsed time is 30.082448 seconds.
None
299
Elapsed time is 42.317185 seconds.
None
300
Elapsed time is 42.086515 seconds.
None
301
Elapsed time is 41.717694 seconds.
None
302
Elapsed time is 28.596947 seconds.
None
303
Elapsed time is 42.255646 seconds.
None
304
Elapsed time is 101.293694 seconds.
None
305
Elapsed time is 27.443231 seconds.
None
306
Elapsed time is 38.104168 seconds.
None
307
Elapsed time is 46.113249 seconds.
None
308
Elapsed time is 41.788129 seconds.
None
309
Elapsed time is 43.933238 seconds.
None
310
Elapsed time is 41.164798 seconds.
None
311
Elapsed time is 42.426264 seconds.
None
312
Elapsed time is 42.490569 seconds.
None
313
Elapsed time is 45.248946 seconds.
None
314
Elapsed time is 93.288517 seconds.
None
315
Elapsed time is 45.281033 seconds.
None
316
Elapsed time is 46.559092 seconds.
None
317
Elapsed time is 57.648966 seconds.
None
318
Elapsed time is 37.079816 seconds.
None
319
Elapsed time is 36.022773 seconds.
None
320
Elapsed time is 78.314116 s



353
Elapsed time is 45.762053 seconds.
None
354
Elapsed time is 42.774814 seconds.
None
355
Elapsed time is 46.434238 seconds.
None
356
Elapsed time is 44.631080 seconds.
None
357
Elapsed time is 44.951947 seconds.
None
358
Elapsed time is 41.324143 seconds.
None
359
Elapsed time is 42.933176 seconds.
None
360
Elapsed time is 34.332416 seconds.
None
361
Elapsed time is 42.705858 seconds.
None
362
Elapsed time is 47.578855 seconds.
None
363
Elapsed time is 25.837115 seconds.
None
364
Elapsed time is 34.957216 seconds.
None
365
Elapsed time is 44.671229 seconds.
None
366
Elapsed time is 45.253152 seconds.
None
367
Elapsed time is 44.692285 seconds.
None
368
Elapsed time is 45.280169 seconds.
None
369
Elapsed time is 45.296512 seconds.
None
370
Elapsed time is 52.853969 seconds.
None
371
Elapsed time is 49.096045 seconds.
None
372
Elapsed time is 55.260470 seconds.
None
373
Elapsed time is 44.062408 seconds.
None
374
Elapsed time is 44.597244 seconds.
None
375
Elapsed time is 40.410131 se



400
Elapsed time is 49.852933 seconds.
None
401
Elapsed time is 36.303603 seconds.
None
402
Elapsed time is 65.049499 seconds.
None
403
Elapsed time is 52.636130 seconds.
None
404
Elapsed time is 44.909298 seconds.
None
405
Elapsed time is 49.232103 seconds.
None
406
Elapsed time is 52.462755 seconds.
None
407
Elapsed time is 53.783387 seconds.
None
408
Elapsed time is 53.938934 seconds.
None
409
Elapsed time is 64.386356 seconds.
None
410
Elapsed time is 60.431935 seconds.
None
411
Elapsed time is 69.416266 seconds.
None
412
Elapsed time is 64.044312 seconds.
None
413
Elapsed time is 61.915500 seconds.
None
414
Elapsed time is 50.674801 seconds.
None




415
Elapsed time is 58.757848 seconds.
None
416
Elapsed time is 59.908841 seconds.
None
417
Elapsed time is 123.648782 seconds.
None
418
Elapsed time is 59.341910 seconds.
None
419
Elapsed time is 60.513294 seconds.
None
420
Elapsed time is 61.960217 seconds.
None
421
Elapsed time is 60.806894 seconds.
None
422
Elapsed time is 70.216266 seconds.
None
423
Elapsed time is 70.660197 seconds.
None
424
Elapsed time is 69.815596 seconds.
None
425
Elapsed time is 56.240125 seconds.
None
426
Elapsed time is 66.715773 seconds.
None
427
Elapsed time is 71.589388 seconds.
None
428
Elapsed time is 64.280875 seconds.
None
429
Elapsed time is 84.186131 seconds.
None
430
Elapsed time is 41.902117 seconds.
None
431
Elapsed time is 75.410607 seconds.
None
432
Elapsed time is 34.412824 seconds.
None
433
Elapsed time is 37.161164 seconds.
None
434
Elapsed time is 40.878044 seconds.
None
435
Elapsed time is 42.091942 seconds.
None
436
Elapsed time is 35.658660 seconds.
None




437
Elapsed time is 40.316303 seconds.
None
438
Elapsed time is 44.027266 seconds.
None
439
Elapsed time is 43.252526 seconds.
None
440
Elapsed time is 42.273967 seconds.
None




441
Elapsed time is 31.725923 seconds.
None
442
Elapsed time is 41.780447 seconds.
None
443
Elapsed time is 40.443809 seconds.
None
444
Elapsed time is 41.157212 seconds.
None
445
Elapsed time is 34.986860 seconds.
None
446
Elapsed time is 92.475169 seconds.
None
447
Elapsed time is 61.538906 seconds.
None
448
Elapsed time is 67.860741 seconds.
None
449
Elapsed time is 33.916869 seconds.
None
450
Elapsed time is 41.302895 seconds.
None
451
Elapsed time is 109.901173 seconds.
None
452
Elapsed time is 47.018882 seconds.
None
453
Elapsed time is 40.563550 seconds.
None
454
Elapsed time is 49.254847 seconds.
None
455
Elapsed time is 41.077817 seconds.
None
456
Elapsed time is 41.062123 seconds.
None
457
Elapsed time is 50.250204 seconds.
None
458
Elapsed time is 42.642507 seconds.
None
459
Elapsed time is 43.275647 seconds.
None
460
Elapsed time is 43.675071 seconds.
None
461
Elapsed time is 42.509679 seconds.
None
462
Elapsed time is 34.280383 seconds.
None
463
Elapsed time is 43.626332 s

In [5]:
from google.colab import drive

drive.mount('/content/drive', force_remount=True)
path = '/content/drive/My Drive/GSOC/WEEK 7/FINALoutput0_1000.csv'

with open(path, 'w', encoding = 'utf-8-sig') as f:
  df1.to_csv(f)

Mounted at /content/drive


In [6]:
df1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,212841,212842,212843,212844,212845,212846,212847,212848,212849,target
0,0.919597,0.760238,1.101303,1.034527,0.837521,0.685986,0.688989,1.135959,1.092625,1.003870,...,,,,,,,,,,1
1,1.025398,1.016231,0.987951,1.038180,1.023081,1.001104,0.982287,1.011871,1.004704,1.016585,...,,,,,,,,,,0
2,1.071332,1.341602,1.092280,0.944699,0.984180,0.948657,0.896450,0.934901,0.752725,0.828430,...,,,,,,,,,,1
3,1.017824,1.004170,0.850218,1.162286,0.989298,0.738813,1.040777,1.142984,0.836422,1.045461,...,,,,,,,,,,1
4,0.939003,0.839082,1.148021,0.913888,0.880893,1.153466,0.585936,0.759167,0.643355,0.761305,...,,,,,,,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
981,,,,,,,,,,,...,,,,,,,,,,1
982,0.589711,1.038403,0.661607,1.456011,1.044161,1.346148,0.518640,1.113973,1.243550,1.054374,...,,,,,,,,,,1
983,1.279825,0.679833,1.039892,0.624641,1.049217,1.175793,0.704409,0.849909,0.844958,1.115324,...,,,,,,,,,,1
984,0.944225,0.903859,0.957781,1.069567,0.796505,1.001235,1.045407,1.054819,0.818677,0.770443,...,,,,,,,,,,1
