In [126]:
import pandas as pd
from datetime import datetime
import dateutil.parser

# VENUS EXPRESS

## Load the data

In [98]:
data_path = "DATA/VENUS_EXPRESS/"
vex_boundary_df = pd.read_csv(data_path+"VEX-ICME-boundaries.csv")

display(vex_boundary_df)

Unnamed: 0,Year,DiscontinuityDOY,MCstartDOY,MCendDOY,FRstartDOY,FRendDOY,rH
0,2007,44.200,44.6113,45.3981,44.6113,45.3981,0.725
1,2007,117.010,117.3700,117.6740,117.3700,117.6740,0.719
2,2007,126.030,126.3500,126.8700,126.3500,126.8700,0.719
3,2007,144.800,145.1500,145.8330,145.1500,145.8330,0.721
4,2007,167.094,167.0940,167.7200,167.0940,167.7200,0.724
...,...,...,...,...,...,...,...
62,2013,201.421,201.8820,202.7410,201.8820,202.7410,0.721
63,2013,261.487,261.4870,261.9390,261.4870,261.9390,0.728
64,2013,278.179,278.5230,279.6240,278.5230,279.6240,0.728
65,2013,334.188,334.5830,335.6670,334.5830,335.6670,0.723


## Dates of Coronal Mass Ejection (CME)

### Get the start & end time of a CME
start time = start of discontinuity  
end time = end of flux rope (FR)  

In [99]:
# convert the year to a datetime object
vex_boundary_df['Year'] = vex_boundary_df.Year.map(lambda t: datetime.strptime(str(t), '%Y'))

def get_datetime_from_dec(doy, year):
    # returns the datetime object associated with the day of the year & year
    # make a timestamp because the data is not in a known format
    return pd.to_datetime((float(str(doy)) - 1) * 24 * 3600, origin=year, unit='s')

vex_boundary_df['start_time'] = vex_boundary_df.apply(lambda t: get_datetime_from_dec(t.DiscontinuityDOY, t.Year), axis=1)
vex_boundary_df['end_time'] = vex_boundary_df.apply(lambda t: get_datetime_from_dec(t.MCendDOY, t.Year), axis=1)

In [100]:
columns_to_keep = ['start_time', 'end_time']

# Keep only the columns of the start and end of a CME
vex_boundary_df = vex_boundary_df.drop(columns=vex_boundary_df.columns.difference(columns_to_keep))

display(vex_boundary_df.head())

Unnamed: 0,start_time,end_time
0,2007-02-13 04:48:00.000,2007-02-14 09:33:15.840
1,2007-04-27 00:14:24.000,2007-04-27 16:10:33.600
2,2007-05-06 00:43:12.000,2007-05-06 20:52:48.000
3,2007-05-24 19:12:00.000,2007-05-25 19:59:31.200
4,2007-06-16 02:15:21.600,2007-06-16 17:16:48.000


### Exporting the CSV file with start and end time of a CME

In [101]:
csv_file_path = data_path + "VEX_CME_date_time.csv"
vex_boundary_df.to_csv(csv_file_path, index=False)

# ROSETTA

## Load the data

In [141]:
data_path = "DATA/ROSETTA/"
ros_cme_df = pd.read_csv(data_path+"ROS_CME.txt", sep='\t', index_col=False)
ros_cme_df = ros_cme_df.drop(columns=['Event ID', 'References'])

display(ros_cme_df)

  ros_cme_df = pd.read_csv(data_path+"ROS_CME.txt", sep='\t', index_col=False)


Unnamed: 0,Date-Time,Type,Instrument(s),Location info (Event or Sub-spacecraft),Lat.,Long.
0,2015-07-10T02:10:18,Outburst,NavCam,E,74,200
1,2015-07-19T03:38:09,Outburst,NAC,E,-24,296
2,2015-07-26T20:22:42,Outburst,NAC,E,−36,75
3,2015-07-27T00:14:29,Outburst,NAC,E,-31,333
4,2015-07-28T05:23:43,Outburst,WAC,E,-4,264
...,...,...,...,...,...,...
119,2015-12-17T19:15:11.0,Outburst,ALICE,S,-15.8,81.4
120,2016-01-01T06:00:18.0,Outburst,ALICE,S,-51.7,32.9
121,2016-01-02T07:33:02.0,Outburst,ALICE,S,-67.1,13.3
122,2016-01-02T12:59:09.0,Outburst,ALICE,S,-69.2,217


## Get the dates of CME

Only CME start date & time from 2014 to 2016

In [142]:
def try_parsing_date(text):
    # there are 4 possible formats for the date :)
    for fmt in ('%Y-%m-%dT%H:%M:%S', '%Y-%m-%dT%H:%M', '%Y-%m-%dT%H:%M:%S.%f', '%Y-%m-%d'):
        try:
            return datetime.strptime(text, fmt)
        except ValueError:
            pass
    raise ValueError(f'no valid date format found for {text}')

ros_cme_df['CME_date'] = ros_cme_df['Date-Time'].map(lambda t: try_parsing_date(t))
display(ros_cme_df)

Unnamed: 0,Date-Time,Type,Instrument(s),Location info (Event or Sub-spacecraft),Lat.,Long.,CME_date
0,2015-07-10T02:10:18,Outburst,NavCam,E,74,200,2015-07-10 02:10:18
1,2015-07-19T03:38:09,Outburst,NAC,E,-24,296,2015-07-19 03:38:09
2,2015-07-26T20:22:42,Outburst,NAC,E,−36,75,2015-07-26 20:22:42
3,2015-07-27T00:14:29,Outburst,NAC,E,-31,333,2015-07-27 00:14:29
4,2015-07-28T05:23:43,Outburst,WAC,E,-4,264,2015-07-28 05:23:43
...,...,...,...,...,...,...,...
119,2015-12-17T19:15:11.0,Outburst,ALICE,S,-15.8,81.4,2015-12-17 19:15:11
120,2016-01-01T06:00:18.0,Outburst,ALICE,S,-51.7,32.9,2016-01-01 06:00:18
121,2016-01-02T07:33:02.0,Outburst,ALICE,S,-67.1,13.3,2016-01-02 07:33:02
122,2016-01-02T12:59:09.0,Outburst,ALICE,S,-69.2,217,2016-01-02 12:59:09


## Create new CSV with the dates

In [143]:
columns_to_keep = ['CME_date']
ros_cme_df = ros_cme_df.drop(columns=ros_cme_df.columns.difference(columns_to_keep))
ros_cme_df = ros_cme_df.sort_values(by=['CME_date'])

csv_file_path = data_path + "ROS_CME_date_time.csv"
ros_cme_df.to_csv(csv_file_path, index=False)

# MARS EXPRESS

## Load the data

In [149]:
data_path = "DATA/MARS_EXPRESS/"
mex_cme_df = pd.read_csv(data_path+"MEX_CME.txt", sep=' ')
mex_cme_df = mex_cme_df.drop(columns=['No.'])

print(mex_cme_df.columns)

display(mex_cme_df)

Index(['Start-UT(date)', 'Start-UT(time)', 'End-UT(date)', 'End-UT(time)',
       'Shock-Time(date)', 'Shock-Time(time)', 'Heliocentric-Distance(au)',
       'Bmean(nT)', 'Bmax(nT)', 'Nmean(cm−3)', 'Nmax(cm−3)', 'Vmean(km.s−1)',
       'Vmax(km.s−1)', 'Pdymean(nPa)', 'Pdymax(nPa)',
       'SEP-Peak-Time(Ions)(date)', 'SEP-Peak-Time(Ions)(time)',
       'SEP-Peak-Time(Electrons)(date)', 'SEP-Peak-Time(Electrons)(time)'],
      dtype='object')


Unnamed: 0,Start-UT(date),Start-UT(time),End-UT(date),End-UT(time),Shock-Time(date),Shock-Time(time),Heliocentric-Distance(au),Bmean(nT),Bmax(nT),Nmean(cm−3),Nmax(cm−3),Vmean(km.s−1),Vmax(km.s−1),Pdymean(nPa),Pdymax(nPa),SEP-Peak-Time(Ions)(date),SEP-Peak-Time(Ions)(time),SEP-Peak-Time(Electrons)(date),SEP-Peak-Time(Electrons)(time)
0,2014.12.18,14:24,2014.12.21,00:00,,,1.38,6.77,12.32,5.1,10.93,366.8,423.8,1.15,2.32,2014.12.19,04:51,2014.12.18,05:36
1,2015.02.27,06:40,2015.02.28,04:40,,,1.42,8.03,14.17,6.86,9.67,386.9,429.1,1.72,2.49,2015.02.27,11:41,2015.02.27,12:51
2,2015.03.04,04:50,2015.03.04,22:00,,,1.43,13.14,25.45,4.44,12.7,520.9,615.0,2.02,8.02,2015.03.03,20:05,2015.03.03,18:30
3,2015.03.07,04:50,2015.03.08,06:40,,,1.43,4.7,9.94,3.25,7.15,572.0,610.9,1.77,4.43,2015.03.07,06:54,2015.03.07,15:01
4,2015.03.09,01:00,2015.03.10,16:00,,,1.43,10.57,16.7,1.24,3.78,436.3,573.6,0.44,1.88,2015.03.08,15:22,2015.03.08,16:00
5,2015.07.06,18:24,2015.07.08,02:09,,,1.58,3.43,8.57,5.48,11.63,381.1,425.2,1.37,3.45,2015.07.06,18:30,2015.07.06,20:32
6,2015.08.28,05:00,2015.08.29,07:20,,,1.63,6.23,10.13,4.98,10.81,354.5,379.6,1.04,2.21,,,,
7,2015.10.06,17:00,2015.10.07,13:54,,,1.66,13.23,21.62,5.63,14.2,375.5,465.3,1.48,4.66,,,,
8,2015.11.08,06:27,2015.11.09,22:00,,,1.67,10.82,18.84,2.11,10.84,331.5,444.7,0.38,2.27,2015.11.07,11:47,,
9,2016.01.02,03:10,2016.01.03,04:20,,,1.66,9.25,15.82,8.26,16.5,467.7,534.1,2.96,6.95,,,,


## Get the dates of CME

CME start & end date time from 2014 to 2018 from Mars (and not really Mars Express)

In [160]:
mex_cme_df['start_time'] = mex_cme_df.apply(lambda t: pd.to_datetime(t['Start-UT(date)']+' '+t['Start-UT(time)']), axis=1)
mex_cme_df['end_time'] = mex_cme_df.apply(lambda t: pd.to_datetime(t['End-UT(date)']+' '+t['End-UT(time)']), axis=1)

## Create new csv with the dates

In [162]:
columns_to_keep = ['start_time', 'end_time']
mex_cme_df = mex_cme_df.drop(columns=mex_cme_df.columns.difference(columns_to_keep))

display(mex_cme_df)

csv_file_path = data_path + "MEX_CME_date_time.csv"
mex_cme_df.to_csv(csv_file_path, index=False)

Unnamed: 0,start_time,end_time
0,2014-12-18 14:24:00,2014-12-21 00:00:00
1,2015-02-27 06:40:00,2015-02-28 04:40:00
2,2015-03-04 04:50:00,2015-03-04 22:00:00
3,2015-03-07 04:50:00,2015-03-08 06:40:00
4,2015-03-09 01:00:00,2015-03-10 16:00:00
5,2015-07-06 18:24:00,2015-07-08 02:09:00
6,2015-08-28 05:00:00,2015-08-29 07:20:00
7,2015-10-06 17:00:00,2015-10-07 13:54:00
8,2015-11-08 06:27:00,2015-11-09 22:00:00
9,2016-01-02 03:10:00,2016-01-03 04:20:00
