In [1]:
import numpy as np
import pandas as pd
from sunpy.physics.differential_rotation import rot_hpc
from astropy import units as u
from shapely.geometry import Polygon, Point
from shapely import wkt
import os
import datetime
from sunpy.time import parse_time

In [2]:
def unicode2polygon(bbox_array):
    if isinstance(bbox_array, basestring):
        bbox_array = wkt.loads(bbox_array)
    else:
        bbox_array = map(lambda x: wkt.loads(x), bbox_array)
#         for i, elem in bbox_array:
#             bbox_array[i] = wkt.loads(elem)
    return bbox_array

In [3]:
def pull_sample_flare_1(inputFile, begin_time, end_time, max_dist_frm_center, min_peak_flux):
    flare_set = pd.read_csv(inputFile, delimiter = ',', header = 0)
    flare_set['event_starttime'] = map(parse_time, flare_set['event_starttime'])
    flare_set['event_endtime'] = map(parse_time, flare_set['event_endtime'])
    flare_set = flare_set.ix[flare_set['event_starttime']>=begin_time]
    flare_set = flare_set.ix[flare_set['event_endtime']<=end_time]
#     flare_set = flare_set.ix[(flare_set['dist_frm_center']/966)<=(0.01*max_dist_frm_center)]
    flare_set = flare_set.ix[flare_set['sum_peakflux']>=min_peak_flux]
    return flare_set

In [4]:
def pull_sample_ef(inputFile, begin_time, end_time, delta_t, min_max_b_strength):
    ef_set = pd.read_csv(inputFile, delimiter = ',', header = 0)
    ef_set['event_starttime'] = map(parse_time, ef_set['event_starttime'])
    ef_set['event_endtime'] = map(parse_time, ef_set['event_endtime'])
#     ef_set = ef_set.ix[ef_set['event_probability']==1]
    ef_set = ef_set.ix[ef_set['maxmagfieldstrength']>=100]
    ef_set = ef_set.ix[ef_set['event_starttime']>=(begin_time-delta_t)]
    ef_set = ef_set.ix[ef_set['event_endtime']<=(end_time-delta_t)]
    ef_set = ef_set.rename(columns={'event_starttime': 'ef_starttime', 'event_endtime': 'ef_endtime',
                                    'hpc_bbox': 'ef_hpc_bbox', 'hpc_coord': 'ef_hpc_coord',
                                    'hpc_radius': 'ef_hpc_radius','hpc_x' : 'ef_hpc_x','hpc_y': 'ef_hpc_y', 
                                    'SOL_standard' : 'ef_SOL_standard'})
    return ef_set

In [9]:
def associate_ef2(inputFile_fl, inputFile_ef, begin_year, begin_month, begin_day, end_year, end_month, end_day, 
                 max_dist_frm_center, min_peak_flux, min_max_b_strength, temporal_sep_hr, spatial_sep, 
                  output2file=False, out_file = None):
    #covert times to datetime objects
    delta_t = datetime.timedelta(hours = temporal_sep_hr)
    begin_time = datetime.datetime(begin_year, begin_month, begin_day)
    end_time = datetime.datetime(end_year, end_month, end_day) 
    
    #pull appropiate flare events given input parameters
    flare_set = pull_sample_flare_1(inputFile_fl, begin_time, end_time, max_dist_frm_center, min_peak_flux)
    #pull potentially related ef events given input parameters
    ef_set = pull_sample_ef(inputFile_ef, begin_time, end_time, delta_t, min_max_b_strength)
    #set the radius of the sun in arc secs
    r = 966
    #how many flare events working with after cull by intial spatial, temporal, and size paramters
    length = flare_set.shape[0]
    #list of zeroes with length of number of flare events
    zeroes = [0 for i in range(length)]
    float_zeroes = [0.0 for i in range(length)]
    nones = [None for i in range(length)]
    #create columns (filled with zeroes) for tracking associated events and number of associated events
    flare_set.loc[:, 'associated_ef'] = nones
    flare_set.loc[:, 'num_associated_ef'] = zeroes
    flare_set.loc[:, 'ef_association_strength'] = float_zeroes
    #get a list of ef keywords of relevance to flare events
    ef_keywords = list(np.genfromtxt('keywords_ef_append_fl.csv', delimiter=',', dtype=str))
    #create columns filled with zeroes for each ef keyword
    for elem in ef_keywords:
        flare_set.loc[:, elem] = nones
    #create an empty list to fill with row indexes of duplicated flare events to delete at end of function
    to_be_deleted = []
    #get indexes in list 
    idx = flare_set['event_starttime'].index.tolist()
    #last row index
    idx_last = idx[-1]
    #convert start, peak, and end times to datetime objects
    flare_set['event_starttime'] = map(parse_time, flare_set['event_starttime'])
    flare_set['event_peaktime'] = map(parse_time, flare_set['event_peaktime'])
    flare_set['event_endtime'] = map(parse_time, flare_set['event_endtime'])
    #set positional row index 
    i = -1
    for elem in flare_set['event_starttime']:
        i += 1
        #print which flare event function is currently processing, so the user has an idea of how much longer
        #program will need to run
        if (i+1)%10 == 0:
            print '%d / %d events' %((i+1), length)
        #calculate what time the earliest associated ef could have occured
        start_ef = elem-delta_t
        #begin eliminating ef events based on temporal parameters—ef events must end after the earliest possible 
        #event start time and end before the flare event begins
        ef_search = ef_set.ix[ef_set['ef_endtime']>=start_ef]
        ef_search = ef_search.ix[ef_search['ef_endtime']<=elem]
        #create a counter for the number of associated ef events for a particular flare
        num_associated_ef = 0
        #as long as the temporal search does not eliminate all possible related ef events, proceed
        if ef_search.empty == False:
            #create a shapely point object from the flare's mean coordinates
            #keep in mind, Point in HPC units 
            fl_point = Point((flare_set['hpc_x'].values[i], flare_set['hpc_y'].values[i]))
            #run this loop if temporal qualifications have limited ef_search to only one ef event
            if ef_search.shape[0] == 1:
                #create a shapely polygon object from ef's location
                ef_poly = wkt.loads(ef_search['ef_hpc_bbox'].values[0])
                #calculate the minimum 2D distance between the ef's polygon and the flare's mean coordinate
                chord = fl_point.distance(ef_poly)
                #calculate the minimum 3D distance along the sun's curved surface between the ef & flare events
                #assumes the same radius for all events
                s = r*np.arcsin(chord/(2*r))
                #determine whether the spatial distance between ef and flare meets the set parameter
                if s <= spatial_sep:
                    #have found an associated ef
                    one_associated_ef(i, 0, flare_set, ef_search, s, ef_keywords, idx)
            #run this loop if temporal qualifications have limited ef_search to a list of possible ef
            #works in the same manner as the loop for a single event except each element of the list is
            #run through consecutively  
            else:
                ef_search['ef_hpc_bbox'] = unicode2polygon(ef_search['ef_hpc_bbox'])
                #create a dummy counter variable because python won't let me use enumerate on a dataframe column
                j = 0
                #create an empty list to add found event indices to for later access
                listOindices = []
                listOs = []
                for elem in ef_search['ef_hpc_bbox'].values:
                    chord = fl_point.distance(elem)
                    s = r*np.arcsin(chord/(2*r))
                    if s <= spatial_sep:
                        num_associated_ef+=1 
                        listOindices.append(j)
                        listOs.append(s)
                    j+=1
                #run if we've found at least one associted ef event
                if num_associated_ef == 1:
                    one_associated_ef(i, listOindices[0], flare_set, ef_search, s, ef_keywords, idx)
                elif num_associated_ef > 1:
                    flare_set['num_associated_ef'].values[i] = num_associated_ef
                    highest_association = 0
                    highest_association_index = 0
                    for counter, index in enumerate(listOindices):
                        if listOs[counter]==0: listOs[counter]=1
                        temporal_sep = pd.to_timedelta(flare_set['event_peaktime'].values[i]-ef_search['ef_endtime'].values[index])
                        temporal_sep = (temporal_sep.total_seconds())/60
                        ef_association_strength = (((ef_search['maxmagfieldstrength'].values[index])**2)/
                                                   (listOs[counter]*temporal_sep))
                        if ef_association_strength >= highest_association:
                            highest_association_index = index
                            highest_association_strength = ef_association_strength
                    flare_set['associated_ef'].values[i] = ef_search['ef_SOL_standard'].values[highest_association_index]
                    for elem in ef_keywords:
                        flare_set.loc[idx[i], elem] = ef_search[elem].values[highest_association_index]
                    flare_set['ef_association_strength'].values[i] = highest_association_strength
                    
    #create boolean var to easily determine whether flare associated with an ef
    k = 0
    is_ef = [0 for i in range(flare_set.shape[0])]
    for elem in flare_set['associated_ef']:
        if elem!=None:
            is_ef[k] = 1
        k+=1
    flare_set.loc[:, 'is_ef'] = is_ef
    
    #write dataframe to a csv file depending on initial parameters
    if output2file == True:
        if out_file == None:
            #create a generic name for file based on search parameters if no file name specified
             out_file = ('flare_search_'+str(begin_time)[0:10]+'_'+ str(end_time)[0:10]+'_'+str(temporal_sep_hr)+'_'+
                         str(max_dist_frm_center)+'_'+str(min_peak_flux)+'_'+str(min_max_b_strength)+'.csv')
        #import which keywords to keep for outported data
        flare_keywords = list(np.genfromtxt('keywords_flare_after_merge.csv', delimiter=',', dtype=str))
        #add to these keywords descriptors of associated ef
        flare_keywords.extend(['is_ef','associated_ef', 'num_associated_ef', 'ef_association_strength'])
        flare_keywords.extend(ef_keywords)
        #write to csv
        flare_set.to_csv(path_or_buf=out_file, columns = flare_keywords, index = False)
        
    return flare_set 

In [7]:
def one_associated_ef(flare_index, ef_index, fl_set, ef_set, s, ef_keywords, idx):
    #enter into the flare dataset information about its associated ef
    fl_set['associated_ef'].values[flare_index] = ef_set['ef_SOL_standard'].values[ef_index]
    for elem in ef_keywords:
        fl_set.loc[idx[flare_index], elem] = ef_set[elem].values[ef_index]
    #set the number of associated ef events to 1
    fl_set['num_associated_ef'].values[flare_index] = 1 
    #calculate the association strength of the ef to the flare
    #if flare is contained in ef, set the seperation distance to 1 so as to not divide by 0
    if s==0: s=1
    temporal_sep = pd.to_timedelta(fl_set['event_peaktime'].values[flare_index]-ef_set['ef_endtime'].values[ef_index])
    temporal_sep = (temporal_sep.total_seconds())/60
    fl_set['ef_association_strength'].values[flare_index] = (((ef_set['maxmagfieldstrength'].values[ef_index])**2)
                                                                /(s*temporal_sep))
    b2 = (ef_set['maxmagfieldstrength'].values[ef_index])**2


In [10]:
#associate_ef2(inputFile_fl, inputFile_ef, begin_year, begin_month, begin_day, end_year, end_month, end_day, 
#                  max_dist_frm_center, min_peak_flux, min_max_b_strength, temporal_sep_hr, spatial_sep, 
#                   output2file=False, out_file = None)

associate_ef2('flare_dataset_cleaned.csv', 'raw_ef.csv', 2010, 6, 1, 2016, 6, 1, 60, 500, 100, 12, 100, 
                  output2file=True, out_file = None)

10 / 16324 events
20 / 16324 events
30 / 16324 events
40 / 16324 events
50 / 16324 events
60 / 16324 events
70 / 16324 events
80 / 16324 events
90 / 16324 events
100 / 16324 events
110 / 16324 events
120 / 16324 events
130 / 16324 events
140 / 16324 events
150 / 16324 events
160 / 16324 events
170 / 16324 events
180 / 16324 events
190 / 16324 events
200 / 16324 events
210 / 16324 events
220 / 16324 events
230 / 16324 events
240 / 16324 events
250 / 16324 events
260 / 16324 events
270 / 16324 events
280 / 16324 events
290 / 16324 events
300 / 16324 events
310 / 16324 events
320 / 16324 events
330 / 16324 events
340 / 16324 events
350 / 16324 events
360 / 16324 events
370 / 16324 events
380 / 16324 events
390 / 16324 events
400 / 16324 events
410 / 16324 events
420 / 16324 events
430 / 16324 events
440 / 16324 events
450 / 16324 events
460 / 16324 events
470 / 16324 events
480 / 16324 events
490 / 16324 events
500 / 16324 events
510 / 16324 events
520 / 16324 events
530 / 16324 events
54

Unnamed: 0,SOL_standard,event_starttime,event_endtime,event_peaktime,fl_goescls,hpc_bbox,hpc_coord,hpc_radius,hpc_x,hpc_y,...,ef_aspectratio,ef_proximityratio,area_atdiskcenter,ef_hpc_bbox,ef_hpc_coord,ef_hpc_radius,ef_hpc_x,ef_hpc_y,event_probability,is_ef
0,SOL2010-06-11T20:27:24L105C069,2010-06-11 20:27:24,2010-06-11 21:02:48,2010-06-11 20:37:36,,"POLYGON((614.4 307.2,691.2 307.2,691.2 384,614...",POINT(652.8 345.6),738.638748,652.8,345.6,...,,,,,,,,,,0
1,SOL2010-06-11T20:27:26L105C069,2010-06-11 20:27:26,2010-06-12 14:31:26,2010-06-12 01:00:14,,"POLYGON((614.4 307.2,768 307.2,768 460.8,614.4...",POINT(652.8 345.6),738.638748,652.8,345.6,...,,,,,,,,,,0
2,SOL2010-06-12T05:20:39L100C069,2010-06-12 05:20:39,2010-06-12 05:27:03,2010-06-12 05:21:39,,"POLYGON((614.4 307.2,691.2 307.2,691.2 384,614...",POINT(652.8 345.6),738.638748,652.8,345.6,...,,,,,,,,,,0
3,SOL2010-06-12T09:04:03L098C069,2010-06-12 09:04:03,2010-06-12 09:29:27,2010-06-12 09:14:27,,"POLYGON((614.4 307.2,768 307.2,768 460.8,614.4...",POINT(652.8 345.6),738.638748,652.8,345.6,...,,,,,,,,,,0
4,SOL2010-06-13T05:35:35L128C115,2010-06-13 05:35:35,2010-06-13 05:50:47,2010-06-13 05:40:59,,"POLYGON((844.8 -460.8,921.6 -460.8,921.6 -307....",POINT(883.2 -422.4),979.011747,883.2,-422.4,...,,,,,,,,,,0
5,SOL2010-06-13T05:36:44L128C111,2010-06-13 05:36:44,2010-06-13 05:42:20,2010-06-13 05:39:44,,"POLYGON((844.8 -384,921.6 -384,921.6 -307.2,84...",POINT(883.2 -345.6),948.410038,883.2,-345.6,...,,,,,,,,,,0
6,SOL2010-06-13T05:54:48L105C069,2010-06-13 05:54:48,2010-06-13 05:58:24,2010-06-13 05:56:12,,"POLYGON((768 307.2,844.8 307.2,844.8 384,768 3...",POINT(806.4 345.6),877.337062,806.4,345.6,...,,,,,,,,,,0
7,SOL2010-06-13T07:05:36L110C116,2010-06-13 07:05:36,2010-06-13 07:16:00,2010-06-13 07:08:48,,"POLYGON((768 -460.8,844.8 -460.8,844.8 -384,76...",POINT(806.4 -422.4),910.331105,806.4,-422.4,...,,,,,,,,,,0
8,SOL2010-06-13T07:33:00L097C064,2010-06-13 07:33:00,2010-06-13 07:38:24,2010-06-13 07:34:48,,"POLYGON((691.2 307.2,768 307.2,768 460.8,691.2...",POINT(729.6 422.4),843.052739,729.6,422.4,...,,,,,,,,,,0
10,SOL2010-06-13T08:09:39L104C069,2010-06-13 08:09:39,2010-06-13 08:17:51,2010-06-13 08:13:03,,"POLYGON((768 307.2,844.8 307.2,844.8 384,768 3...",POINT(806.4 345.6),877.337062,806.4,345.6,...,,,,,,,,,,0
