In [87]:
import pandas as pd
from datetime import datetime

## Load the data

In [88]:
data_path = "DATA/"
vex_boundary_df = pd.read_csv(data_path+"VEX-ICME-boundaries.csv")

display(vex_boundary_df.head())

Unnamed: 0,Year,DiscontinuityDOY,MCstartDOY,MCendDOY,FRstartDOY,FRendDOY,rH
0,2007,44.2,44.6113,45.3981,44.6113,45.3981,0.725
1,2007,117.01,117.37,117.674,117.37,117.674,0.719
2,2007,126.03,126.35,126.87,126.35,126.87,0.719
3,2007,144.8,145.15,145.833,145.15,145.833,0.721
4,2007,167.094,167.094,167.72,167.094,167.72,0.724


## Dates of Coronal Mass Ejection (CME)

### Get the start & end time of a CME
start time = start of discontinuity  
end time = end of flux rope (FR)  

In [89]:
# convert the year to a datetime object
vex_boundary_df['Year'] = vex_boundary_df.Year.map(lambda t: datetime.strptime(str(t), '%Y'))

def get_datetime_from_dec(doy, year):
    # returns the datetime object associated with the day of the year & year
    # make a timestamp because the data is not in a known format
    return pd.to_datetime((float(str(doy)) - 1) * 24 * 3600, origin=year, unit='s')

vex_boundary_df['start_time'] = vex_boundary_df.apply(lambda t: get_datetime_from_dec(t.DiscontinuityDOY, t.Year), axis=1)
vex_boundary_df['end_time'] = vex_boundary_df.apply(lambda t: get_datetime_from_dec(t.MCendDOY, t.Year), axis=1)

In [90]:
columns_to_keep = ['start_time', 'end_time']

# Keep only the columns of the start and end of a CME
vex_boundary_df = vex_boundary_df.drop(columns=vex_boundary_df.columns.difference(columns_to_keep))

display(vex_boundary_df.head())

Unnamed: 0,start_time,end_time
0,2007-02-13 04:48:00.000,2007-02-14 09:33:15.840
1,2007-04-27 00:14:24.000,2007-04-27 16:10:33.600
2,2007-05-06 00:43:12.000,2007-05-06 20:52:48.000
3,2007-05-24 19:12:00.000,2007-05-25 19:59:31.200
4,2007-06-16 02:15:21.600,2007-06-16 17:16:48.000


### Exporting the CSV file with start and end time of a CME

In [91]:
csv_file_path = data_path + "VEX_CME_date_time.csv"
vex_boundary_df.to_csv(csv_file_path, index=False)