In [2]:
import numpy as np
import pandas as pd
from sunpy.physics.differential_rotation import rot_hpc
from astropy import units as u
from shapely.geometry import Polygon, Point
from shapely import wkt
import os
import datetime
from sunpy.time import parse_time

In [33]:
def associate_er(inputFile_fl, inputFile_er, temporal_sep_hr, spatial_sep, output2file=False, out_file = None):
    
    #import a record of flare events as a DataFrame
    flare_set = pd.read_csv(inputFile_fl, delimiter = ',', header = 0)
    #import a record of eruption events as a DataFrame
    er_set = pd.read_csv(inputFile_er, delimiter = ',', header = 0)
    
    #set solar radius
    r = 966
    
    #how many flare events working with
    length = flare_set.shape[0]
    #list of zeroes with length of number of flare events
    zeroes = [0 for i in range(length)]
    nones = [None for i in range(length)]
    #create columns (filled with zeroes/nulls) for tracking associated events and number of associated events
    flare_set.loc[:, 'associated_er'] = nones
    flare_set.loc[:, 'num_associated_er'] = zeroes
    flare_set.loc[:, 'dist_er'] = zeroes
    
    #get a list of eruption keywords of relevance to flare events
    er_keywords = list(np.genfromtxt('keywords_eruption.csv', delimiter=',', dtype=str))
    #rename eruption keywords already employeed by flare database
    er_set = er_set.rename(columns={'event_starttime':'starttime_er', 'event_endtime':'endtime_er',
                                    'hpc_bbox':'hpc_bbox_er', 'area_raw': 'area_raw_er', 
                                    'area_unit':'area_unit_er', 'event_description':'event_description_er',
                                    'frm_name':'frm_name_er', 'obs_channelid': 'obs_channelid_er',
                                    'hpc_x':'hpc_x_er', 'hpc_y':'hpc_y_er'})
    
    #create columns filled with zeroes for each eruption keyword
    for elem in er_keywords:
        flare_set.loc[:, elem] = nones

    #convert start, peak, and end times and delta_t to datetime objects
    flare_set['event_starttime'] = map(parse_time, flare_set['event_starttime'])
    flare_set['event_peaktime'] = map(parse_time, flare_set['event_peaktime'])
    flare_set['event_endtime'] = map(parse_time, flare_set['event_endtime'])
    er_set['starttime_er'] = map(parse_time, er_set['starttime_er'])
    delta_t = datetime.timedelta(hours = temporal_sep_hr)
    
    #set positional row index 
    i = -1
    
    #iterate through flare start times
    for elem in flare_set['event_starttime']:
        
        #add one to our row position index
        i += 1
        #create a counter for the number of associated ef events for a particular flare
        num_associated_er = 0
        
        #print which flare event function is currently processing, so the user has an idea of how much longer
        #program will need to run (print every 100 flare events so as to not overwhelm the kernel)
        if (i+1)%100 == 0:
            print '%d / %d events' %((i+1), length)
        
        #calculate what time the latest associated eruption could have occured
        start_er = elem+delta_t
        #begin eliminating ef events based on temporal parameters—eruption events must began after the beginning
        #of the flare and before the temporal cutoff given via input parameters 
        er_search = er_set.ix[er_set['starttime_er']>=elem]
        er_search = er_search.ix[er_search['starttime_er']<=start_er]
        
        #as long as the temporal search does not eliminate all possible related eruption events, proceed
        if er_search.empty == False:
            #create a shapely point object from the flare's mean coordinates
            fl_point = Point((flare_set['hpc_x'].values[i], flare_set['hpc_y'].values[i]))
            
            #
            min_s = spatial_sep
            event_index = None
            found_er = False
            #iterate through the properly timed eruption events to see whether they are spatially relevant
            for j in range(er_search.shape[0]):
                er_poly = wkt.loads(er_search['hpc_bbox_er'].values[j])
                #calculate the minimum 2D distance between the eruption's polygon and the flare's mean coordinate
                chord = fl_point.distance(er_poly)
                #calculate the minimum 3D distance along the sun's curved surface between the eruption & flare events
                #assumes the same radius for all events
                s = r*np.arcsin(chord/(2*r))
                #determine whether the spatial distance between eruption and flare meets the set parameter
                if s <= spatial_sep:
                    #have found an associated eruption
                    found_er = True
                    num_associated_er+=1 
                    #determine whether this is closest eruption, (currently) only recording, of the temporally relevant 
                    #events, the one closest (spatially) to the flare
                    if s <= min_s: 
                        min_s = s
                        event_index = j
                #if found an associated event, record it in flares dataset
                if found_er:
                    flare_set['associated_er'].values[i] = er_search['SOL_standard'].values[event_index]
                    flare_set['num_associated_er'].values[i] = num_associated_er
                    flare_set['dist_er'].values[i] = min_s
                    for elem in er_keywords:
                        flare_set.loc[i, elem] = er_search[elem].values[event_index]
                
    #create boolean var to easily determine whether flare associated with an eruption
    k = 0
    is_er = zeroes
    for elem in flare_set['associated_er']:
        if elem!=None:
            is_er[k] = 1
        k+=1
    flare_set.loc[:, 'is_er'] = is_er
    
    #write dataframe to a csv file depending on initial parameters
    if output2file == True:
        if out_file == None:
            #create a generic name for file if no file name specified
            out_file = inputFile_fl[0:-4]+'_with_er.csv'
        #import which keywords to keep for outported data
        flare_keywords = list(np.genfromtxt('keywords_flare_after_merge.csv', delimiter=',', dtype=str))
        #add to these keywords descriptors of associated eruption
        flare_keywords.extend(['is_er','associated_er', 'num_associated_er', 'dist_er'])
        flare_keywords.extend(er_keywords)
        #write to csv
        flare_set.to_csv(path_or_buf=out_file, columns = flare_keywords, index = False)
        
    return flare_set 

In [34]:
# associate_er(inputFile_fl, inputFile_er, temporal_sep_hr, spatial_sep, output2file=False, out_file = None)
associate_er('flare_dataset_cleaned_30min_100arcsec_with_GOES_with_ar_with_sigmoid1.csv', 'raw_er.csv', 
             2, 100, output2file=True, out_file = None)

100 / 40293 events
200 / 40293 events
300 / 40293 events
400 / 40293 events
500 / 40293 events
600 / 40293 events
700 / 40293 events
800 / 40293 events
900 / 40293 events
1000 / 40293 events
1100 / 40293 events
1200 / 40293 events
1300 / 40293 events
1400 / 40293 events
1500 / 40293 events
1600 / 40293 events
1700 / 40293 events
1800 / 40293 events
1900 / 40293 events
2000 / 40293 events
2100 / 40293 events
2200 / 40293 events
2300 / 40293 events
2400 / 40293 events
2500 / 40293 events
2600 / 40293 events
2700 / 40293 events
2800 / 40293 events
2900 / 40293 events
3000 / 40293 events
3100 / 40293 events
3200 / 40293 events
3300 / 40293 events
3400 / 40293 events
3500 / 40293 events
3600 / 40293 events
3700 / 40293 events
3800 / 40293 events
3900 / 40293 events
4000 / 40293 events
4100 / 40293 events
4200 / 40293 events
4300 / 40293 events
4400 / 40293 events
4500 / 40293 events
4600 / 40293 events
4700 / 40293 events
4800 / 40293 events
4900 / 40293 events
5000 / 40293 events
5100 / 40

Unnamed: 0,SOL_standard,event_starttime,event_endtime,event_peaktime,hpc_bbox,hpc_coord,hpc_radius,hpc_x,hpc_y,is_associated_fl,...,intensunit,obs_channelid_er,outflow_length,outflow_lengthunit,outflow_speed,outflow_speedunit,outflow_transspeed,outflow_width,outflow_widthunit,is_er
0,SOL2010-06-11T20:27:24L105C069,2010-06-11 20:27:24,2010-06-11 21:02:48,2010-06-11 20:37:36,"POLYGON((614.4 307.2,691.2 307.2,691.2 384,614...",POINT(652.8 345.6),738.638748,652.8,345.6,1,...,,,,,,,,,,0
1,SOL2010-06-11T20:27:26L105C069,2010-06-11 20:27:26,2010-06-12 14:31:26,2010-06-12 01:00:14,"POLYGON((614.4 307.2,768 307.2,768 460.8,614.4...",POINT(652.8 345.6),738.638748,652.8,345.6,1,...,,,,,,,,,,0
2,SOL2010-06-12T05:20:39L100C069,2010-06-12 05:20:39,2010-06-12 05:27:03,2010-06-12 05:21:39,"POLYGON((614.4 307.2,691.2 307.2,691.2 384,614...",POINT(652.8 345.6),738.638748,652.8,345.6,1,...,km/sec,304,,,,,,,,1
3,SOL2010-06-12T09:04:03L098C069,2010-06-12 09:04:03,2010-06-12 09:29:27,2010-06-12 09:14:27,"POLYGON((614.4 307.2,768 307.2,768 460.8,614.4...",POINT(652.8 345.6),738.638748,652.8,345.6,1,...,km/sec,304,,,,,,,,1
4,SOL2010-06-13T05:35:35L128C115,2010-06-13 05:35:35,2010-06-13 05:50:47,2010-06-13 05:40:59,"POLYGON((844.8 -460.8,921.6 -460.8,921.6 -307....",POINT(883.2 -422.4),979.011747,883.2,-422.4,1,...,,,,,,,,,,0
5,SOL2010-06-13T05:54:48L105C069,2010-06-13 05:54:48,2010-06-13 05:58:24,2010-06-13 05:56:12,"POLYGON((768 307.2,844.8 307.2,844.8 384,768 3...",POINT(806.4 345.6),877.337062,806.4,345.6,1,...,km/s,304,5.12782e+09,cm,2.28004,km/s,-2.96001,1.11415e+09,cm,1
6,SOL2010-06-13T07:05:36L110C116,2010-06-13 07:05:36,2010-06-13 07:16:00,2010-06-13 07:08:48,"POLYGON((768 -460.8,844.8 -460.8,844.8 -384,76...",POINT(806.4 -422.4),910.331105,806.4,-422.4,1,...,,,,,,,,,,0
7,SOL2010-06-13T07:33:00L097C064,2010-06-13 07:33:00,2010-06-13 07:38:24,2010-06-13 07:34:48,"POLYGON((691.2 307.2,768 307.2,768 460.8,691.2...",POINT(729.6 422.4),843.052739,729.6,422.4,1,...,km/s,304,3.33379e+09,cm,4.56398,km/s,-2.16644,8.0624e+08,cm,1
8,SOL2010-06-13T08:09:39L104C069,2010-06-13 08:09:39,2010-06-13 08:17:51,2010-06-13 08:13:03,"POLYGON((768 307.2,844.8 307.2,844.8 384,768 3...",POINT(806.4 345.6),877.337062,806.4,345.6,1,...,km/s,304,2.16316e+09,cm,5.25099,km/s,1.24169,8.59221e+08,cm,1
9,SOL2010-06-13T09:39:26L103C069,2010-06-13 09:39:26,2010-06-13 09:56:26,2010-06-13 09:45:38,"POLYGON((768 307.2,844.8 307.2,844.8 384,768 3...",POINT(806.4 345.6),877.337062,806.4,345.6,1,...,km/s,304,2.16316e+09,cm,5.25099,km/s,1.24169,8.59221e+08,cm,1
